25#ifndef _GLIBCXX_SIMD_LOADSTORE_H
26#define _GLIBCXX_SIMD_LOADSTORE_H 1
29#pragma GCC system_header
32#if __cplusplus >= 202400L
37#pragma GCC diagnostic push
38#pragma GCC diagnostic ignored "-Wpsabi"
41namespace std _GLIBCXX_VISIBILITY(default)
43_GLIBCXX_BEGIN_NAMESPACE_VERSION
46 template <
typename _Vp,
typename _Tp>
47 struct __vec_load_return
48 {
using type = _Vp; };
50 template <
typename _Tp>
51 struct __vec_load_return<void, _Tp>
52 {
using type = basic_vec<_Tp>; };
54 template <
typename _Vp,
typename _Tp>
55 using __vec_load_return_t =
typename __vec_load_return<_Vp, _Tp>::type;
57 template <
typename _Vp,
typename _Tp>
58 using __load_mask_type_t =
typename __vec_load_return_t<_Vp, _Tp>::mask_type;
60 template <
typename _Tp>
61 concept __sized_contiguous_range
62 = ranges::contiguous_range<_Tp> && ranges::sized_range<_Tp>;
64 template <
typename _Vp = void, __sized_contiguous_range _Rg,
typename... _Flags>
65 [[__gnu__::__always_inline__]]
66 constexpr __vec_load_return_t<_Vp, ranges::range_value_t<_Rg>>
67 unchecked_load(_Rg&& __r, flags<_Flags...> __f = {})
69 using _Tp = ranges::range_value_t<_Rg>;
70 using _RV = __vec_load_return_t<_Vp, _Tp>;
71 using _Rp =
typename _RV::value_type;
72 static_assert(__loadstore_convertible_to<ranges::range_value_t<_Rg>, _Rp, _Flags...>,
73 "'flag_convert' must be used for conversions that are not value-preserving");
75 constexpr bool __allow_out_of_bounds = __f._S_test(__allow_partial_loadstore);
76 constexpr size_t __static_size = __static_range_size(__r);
78 if constexpr (!__allow_out_of_bounds && __static_sized_range<_Rg>)
79 static_assert(ranges::size(__r) >= _RV::size(),
"given range must have sufficient size");
81 const auto* __ptr = __f.template _S_adjust_pointer<_RV>(ranges::data(__r));
82 const auto __rg_size = std::ranges::size(__r);
83 if constexpr (!__allow_out_of_bounds)
84 __glibcxx_simd_precondition(
85 std::ranges::size(__r) >= _RV::size(),
86 "Input range is too small. Did you mean to use 'partial_load'?");
90 return _RV([&](
size_t __i) -> _Rp {
94 return static_cast<_Rp
>(__r[__i]);
99 if constexpr ((__static_size != dynamic_extent && __static_size >= size_t(_RV::size()))
100 || !__allow_out_of_bounds)
101 return _RV(_LoadCtorTag(), __ptr);
103 return _RV::_S_partial_load(__ptr, __rg_size);
107 template <
typename _Vp = void, __sized_contiguous_range _Rg,
typename... _Flags>
108 [[__gnu__::__always_inline__]]
109 constexpr __vec_load_return_t<_Vp, ranges::range_value_t<_Rg>>
110 unchecked_load(_Rg&& __r,
const __load_mask_type_t<_Vp, ranges::range_value_t<_Rg>>& __mask,
111 flags<_Flags...> __f = {})
113 using _Tp = ranges::range_value_t<_Rg>;
114 using _RV = __vec_load_return_t<_Vp, _Tp>;
115 using _Rp =
typename _RV::value_type;
116 static_assert(__vectorizable<_Tp>);
117 static_assert(__explicitly_convertible_to<_Tp, _Rp>);
118 static_assert(__loadstore_convertible_to<_Tp, _Rp, _Flags...>,
119 "'flag_convert' must be used for conversions that are not value-preserving");
121 constexpr bool __allow_out_of_bounds = __f._S_test(__allow_partial_loadstore);
122 constexpr auto __static_size = __static_range_size(__r);
124 if constexpr (!__allow_out_of_bounds && __static_sized_range<_Rg>)
125 static_assert(ranges::size(__r) >= _RV::size(),
"given range must have sufficient size");
127 const auto* __ptr = __f.template _S_adjust_pointer<_RV>(ranges::data(__r));
129 if constexpr (!__allow_out_of_bounds)
130 __glibcxx_simd_precondition(
131 ranges::size(__r) >=
size_t(_RV::size()),
132 "Input range is too small. Did you mean to use 'partial_load'?");
134 const size_t __rg_size = ranges::size(__r);
137 return _RV([&](
size_t __i) -> _Rp {
138 if (__i >= __rg_size || !__mask[
int(__i)])
141 return static_cast<_Rp
>(__r[__i]);
146 constexpr bool __no_size_check
147 = !__allow_out_of_bounds
148 || (__static_size != dynamic_extent
149 && __static_size >= size_t(_RV::size.value));
150 if constexpr (_RV::size() == 1)
151 return __mask[0] && (__no_size_check || __rg_size > 0) ? _RV(_LoadCtorTag(), __ptr)
153 else if constexpr (__no_size_check)
154 return _RV::_S_masked_load(__ptr, __mask);
155 else if (__rg_size >=
size_t(_RV::size()))
156 return _RV::_S_masked_load(__ptr, __mask);
157 else if (__rg_size > 0)
158 return _RV::_S_masked_load(
159 __ptr, __mask && _RV::mask_type::_S_partial_mask_of_n(
int(__rg_size)));
165 template <
typename _Vp = void, contiguous_iterator _It,
typename... _Flags>
166 [[__gnu__::__always_inline__]]
167 constexpr __vec_load_return_t<_Vp, iter_value_t<_It>>
168 unchecked_load(_It __first, iter_difference_t<_It> __n, flags<_Flags...> __f = {})
169 {
return simd::unchecked_load<_Vp>(span<
const iter_value_t<_It>>(__first, __n), __f); }
171 template <
typename _Vp = void, contiguous_iterator _It,
typename... _Flags>
172 [[__gnu__::__always_inline__]]
173 constexpr __vec_load_return_t<_Vp, iter_value_t<_It>>
174 unchecked_load(_It __first, iter_difference_t<_It> __n,
175 const __load_mask_type_t<_Vp, iter_value_t<_It>>& __mask,
176 flags<_Flags...> __f = {})
177 {
return simd::unchecked_load<_Vp>(span<
const iter_value_t<_It>>(__first, __n), __mask, __f); }
179 template <
typename _Vp =
void, contiguous_iterator _It, sized_sentinel_for<_It> _Sp,
181 [[__gnu__::__always_inline__]]
182 constexpr __vec_load_return_t<_Vp, iter_value_t<_It>>
183 unchecked_load(_It __first, _Sp __last, flags<_Flags...> __f = {})
184 {
return simd::unchecked_load<_Vp>(span<
const iter_value_t<_It>>(__first, __last), __f); }
186 template <
typename _Vp =
void, contiguous_iterator _It, sized_sentinel_for<_It> _Sp,
188 [[__gnu__::__always_inline__]]
189 constexpr __vec_load_return_t<_Vp, iter_value_t<_It>>
190 unchecked_load(_It __first, _Sp __last,
191 const __load_mask_type_t<_Vp, iter_value_t<_It>>& __mask,
192 flags<_Flags...> __f = {})
194 return simd::unchecked_load<_Vp>(span<
const iter_value_t<_It>>(__first, __last), __mask, __f);
197 template <
typename _Vp = void, __sized_contiguous_range _Rg,
typename... _Flags>
198 [[__gnu__::__always_inline__]]
199 constexpr __vec_load_return_t<_Vp, ranges::range_value_t<_Rg>>
200 partial_load(_Rg&& __r, flags<_Flags...> __f = {})
201 {
return simd::unchecked_load<_Vp>(__r, __f | __allow_partial_loadstore); }
203 template <
typename _Vp = void, __sized_contiguous_range _Rg,
typename... _Flags>
204 [[__gnu__::__always_inline__]]
205 constexpr __vec_load_return_t<_Vp, ranges::range_value_t<_Rg>>
206 partial_load(_Rg&& __r,
const __load_mask_type_t<_Vp, ranges::range_value_t<_Rg>>& __mask,
207 flags<_Flags...> __f = {})
208 {
return simd::unchecked_load<_Vp>(__r, __mask, __f | __allow_partial_loadstore); }
210 template <
typename _Vp = void, contiguous_iterator _It,
typename... _Flags>
211 [[__gnu__::__always_inline__]]
212 constexpr __vec_load_return_t<_Vp, iter_value_t<_It>>
213 partial_load(_It __first, iter_difference_t<_It> __n, flags<_Flags...> __f = {})
214 {
return partial_load<_Vp>(span<
const iter_value_t<_It>>(__first, __n), __f); }
216 template <
typename _Vp = void, contiguous_iterator _It,
typename... _Flags>
217 [[__gnu__::__always_inline__]]
218 constexpr __vec_load_return_t<_Vp, iter_value_t<_It>>
219 partial_load(_It __first, iter_difference_t<_It> __n,
220 const __load_mask_type_t<_Vp, iter_value_t<_It>>& __mask,
221 flags<_Flags...> __f = {})
222 {
return partial_load<_Vp>(span<
const iter_value_t<_It>>(__first, __n), __mask, __f); }
224 template <
typename _Vp =
void, contiguous_iterator _It, sized_sentinel_for<_It> _Sp,
226 [[__gnu__::__always_inline__]]
227 constexpr __vec_load_return_t<_Vp, iter_value_t<_It>>
228 partial_load(_It __first, _Sp __last, flags<_Flags...> __f = {})
229 {
return partial_load<_Vp>(span<
const iter_value_t<_It>>(__first, __last), __f); }
231 template <
typename _Vp =
void, contiguous_iterator _It, sized_sentinel_for<_It> _Sp,
233 [[__gnu__::__always_inline__]]
234 constexpr __vec_load_return_t<_Vp, iter_value_t<_It>>
235 partial_load(_It __first, _Sp __last,
const __load_mask_type_t<_Vp, iter_value_t<_It>>& __mask,
236 flags<_Flags...> __f = {})
237 {
return partial_load<_Vp>(span<
const iter_value_t<_It>>(__first, __last), __mask, __f); }
239 template <
typename _Tp,
typename _Ap, __sized_contiguous_range _Rg,
typename... _Flags>
240 requires indirectly_writable<ranges::iterator_t<_Rg>, _Tp>
241 [[__gnu__::__always_inline__]]
243 unchecked_store(
const basic_vec<_Tp, _Ap>& __v, _Rg&& __r, flags<_Flags...> __f = {})
245 using _TV = basic_vec<_Tp, _Ap>;
246 static_assert(destructible<_TV>);
247 static_assert(__loadstore_convertible_to<_Tp, ranges::range_value_t<_Rg>, _Flags...>,
248 "'flag_convert' must be used for conversions that are not value-preserving");
250 constexpr bool __allow_out_of_bounds = __f._S_test(__allow_partial_loadstore);
251 if constexpr (!__allow_out_of_bounds && __static_sized_range<_Rg>)
252 static_assert(ranges::size(__r) >= _TV::size(),
"given range must have sufficient size");
254 auto* __ptr = __f.template _S_adjust_pointer<_TV>(ranges::data(__r));
255 const auto __rg_size = ranges::size(__r);
256 if constexpr (!__allow_out_of_bounds)
257 __glibcxx_simd_precondition(
258 ranges::size(__r) >= _TV::size(),
259 "output range is too small. Did you mean to use 'partial_store'?");
263 for (
unsigned __i = 0; __i < __rg_size && __i < _TV::size(); ++__i)
264 __ptr[__i] =
static_cast<ranges::range_value_t<_Rg>
>(__v[__i]);
268 if constexpr (!__allow_out_of_bounds)
271 _TV::_S_partial_store(__v, __ptr, __rg_size);
275 template <
typename _Tp,
typename _Ap, __sized_contiguous_range _Rg,
typename... _Flags>
276 requires indirectly_writable<ranges::iterator_t<_Rg>, _Tp>
277 [[__gnu__::__always_inline__]]
279 unchecked_store(
const basic_vec<_Tp, _Ap>& __v, _Rg&& __r,
280 const typename basic_vec<_Tp, _Ap>::mask_type& __mask,
281 flags<_Flags...> __f = {})
283 using _TV = basic_vec<_Tp, _Ap>;
284 static_assert(__loadstore_convertible_to<_Tp, ranges::range_value_t<_Rg>, _Flags...>,
285 "'flag_convert' must be used for conversions that are not value-preserving");
287 constexpr bool __allow_out_of_bounds = __f._S_test(__allow_partial_loadstore);
288 if constexpr (!__allow_out_of_bounds && __static_sized_range<_Rg>)
289 static_assert(ranges::size(__r) >= _TV::size(),
"given range must have sufficient size");
291 auto* __ptr = __f.template _S_adjust_pointer<_TV>(ranges::data(__r));
293 if constexpr (!__allow_out_of_bounds)
294 __glibcxx_simd_precondition(
295 ranges::size(__r) >=
size_t(_TV::size()),
296 "output range is too small. Did you mean to use 'partial_store'?");
298 const size_t __rg_size = ranges::size(__r);
301 for (
int __i = 0; __i < _TV::size(); ++__i)
303 if (__mask[__i] && (!__allow_out_of_bounds ||
size_t(__i) < __rg_size))
304 __ptr[__i] =
static_cast<ranges::range_value_t<_Rg>
>(__v[__i]);
309 if (__allow_out_of_bounds && __rg_size <
size_t(_TV::size()))
310 _TV::_S_masked_store(__v, __ptr,
311 __mask && _TV::mask_type::_S_partial_mask_of_n(
int(__rg_size)));
313 _TV::_S_masked_store(__v, __ptr, __mask);
317 template <
typename _Tp,
typename _Ap, contiguous_iterator _It,
typename... _Flags>
318 requires indirectly_writable<_It, _Tp>
319 [[__gnu__::__always_inline__]]
321 unchecked_store(
const basic_vec<_Tp, _Ap>& __v, _It __first,
322 iter_difference_t<_It> __n, flags<_Flags...> __f = {})
323 { simd::unchecked_store(__v, std::span<iter_value_t<_It>>(__first, __n), __f); }
325 template <
typename _Tp,
typename _Ap, contiguous_iterator _It,
typename... _Flags>
326 requires indirectly_writable<_It, _Tp>
327 [[__gnu__::__always_inline__]]
329 unchecked_store(
const basic_vec<_Tp, _Ap>& __v, _It __first, iter_difference_t<_It> __n,
330 const typename basic_vec<_Tp, _Ap>::mask_type& __mask,
331 flags<_Flags...> __f = {})
332 { simd::unchecked_store(__v, std::span<iter_value_t<_It>>(__first, __n), __mask, __f); }
334 template <
typename _Tp,
typename _Ap, contiguous_iterator _It, sized_sentinel_for<_It> _Sp,
336 requires indirectly_writable<_It, _Tp>
337 [[__gnu__::__always_inline__]]
339 unchecked_store(
const basic_vec<_Tp, _Ap>& __v, _It __first, _Sp __last,
340 flags<_Flags...> __f = {})
341 { simd::unchecked_store(__v, std::span<iter_value_t<_It>>(__first, __last), __f); }
343 template <
typename _Tp,
typename _Ap, contiguous_iterator _It, sized_sentinel_for<_It> _Sp,
345 requires indirectly_writable<_It, _Tp>
346 [[__gnu__::__always_inline__]]
348 unchecked_store(
const basic_vec<_Tp, _Ap>& __v, _It __first, _Sp __last,
349 const typename basic_vec<_Tp, _Ap>::mask_type& __mask,
350 flags<_Flags...> __f = {})
351 { simd::unchecked_store(__v, std::span<iter_value_t<_It>>(__first, __last), __mask, __f); }
353 template <
typename _Tp,
typename _Ap, __sized_contiguous_range _Rg,
typename... _Flags>
354 requires indirectly_writable<ranges::iterator_t<_Rg>, _Tp>
355 [[__gnu__::__always_inline__]]
357 partial_store(
const basic_vec<_Tp, _Ap>& __v, _Rg&& __r, flags<_Flags...> __f = {})
358 { simd::unchecked_store(__v, __r, __f | __allow_partial_loadstore); }
360 template <
typename _Tp,
typename _Ap, __sized_contiguous_range _Rg,
typename... _Flags>
361 requires indirectly_writable<ranges::iterator_t<_Rg>, _Tp>
362 [[__gnu__::__always_inline__]]
364 partial_store(
const basic_vec<_Tp, _Ap>& __v, _Rg&& __r,
365 const typename basic_vec<_Tp, _Ap>::mask_type& __mask,
366 flags<_Flags...> __f = {})
367 { simd::unchecked_store(__v, __r, __mask, __f | __allow_partial_loadstore); }
369 template <
typename _Tp,
typename _Ap, contiguous_iterator _It,
typename... _Flags>
370 requires indirectly_writable<_It, _Tp>
371 [[__gnu__::__always_inline__]]
373 partial_store(
const basic_vec<_Tp, _Ap>& __v, _It __first, iter_difference_t<_It> __n,
374 flags<_Flags...> __f = {})
375 { partial_store(__v, span(__first, __n), __f); }
377 template <
typename _Tp,
typename _Ap, contiguous_iterator _It,
typename... _Flags>
378 requires indirectly_writable<_It, _Tp>
379 [[__gnu__::__always_inline__]]
381 partial_store(
const basic_vec<_Tp, _Ap>& __v, _It __first, iter_difference_t<_It> __n,
382 const typename basic_vec<_Tp, _Ap>::mask_type& __mask, flags<_Flags...> __f = {})
383 { partial_store(__v, span(__first, __n), __mask, __f); }
385 template <
typename _Tp,
typename _Ap, contiguous_iterator _It, sized_sentinel_for<_It> _Sp,
387 requires indirectly_writable<_It, _Tp>
388 [[__gnu__::__always_inline__]]
390 partial_store(
const basic_vec<_Tp, _Ap>& __v, _It __first, _Sp __last,
391 flags<_Flags...> __f = {})
392 { partial_store(__v, span(__first, __last), __f); }
394 template <
typename _Tp,
typename _Ap, contiguous_iterator _It, sized_sentinel_for<_It> _Sp,
396 requires indirectly_writable<_It, _Tp>
397 [[__gnu__::__always_inline__]]
399 partial_store(
const basic_vec<_Tp, _Ap>& __v, _It __first, _Sp __last,
400 const typename basic_vec<_Tp, _Ap>::mask_type& __mask, flags<_Flags...> __f = {})
401 { partial_store(__v, span(__first, __last), __mask, __f); }
403_GLIBCXX_END_NAMESPACE_VERSION
406#pragma GCC diagnostic pop
ISO C++ entities toplevel namespace is std.