libstdc++
simd_loadstore.h
1// Implementation of <simd> -*- C++ -*-
2
3// Copyright The GNU Toolchain Authors.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25#ifndef _GLIBCXX_SIMD_LOADSTORE_H
26#define _GLIBCXX_SIMD_LOADSTORE_H 1
27
28#ifdef _GLIBCXX_SYSHDR
29#pragma GCC system_header
30#endif
31
32#if __cplusplus >= 202400L
33
34#include "simd_vec.h"
35
36// psabi warnings are bogus because the ABI of the internal types never leaks into user code
37#pragma GCC diagnostic push
38#pragma GCC diagnostic ignored "-Wpsabi"
39
40// [simd.reductions] ----------------------------------------------------------
41namespace std _GLIBCXX_VISIBILITY(default)
42{
43_GLIBCXX_BEGIN_NAMESPACE_VERSION
44namespace simd
45{
46 template <typename _Vp, typename _Tp>
47 struct __vec_load_return
48 { using type = _Vp; };
49
50 template <typename _Tp>
51 struct __vec_load_return<void, _Tp>
52 { using type = basic_vec<_Tp>; };
53
54 template <typename _Vp, typename _Tp>
55 using __vec_load_return_t = typename __vec_load_return<_Vp, _Tp>::type;
56
57 template <typename _Vp, typename _Tp>
58 using __load_mask_type_t = typename __vec_load_return_t<_Vp, _Tp>::mask_type;
59
60 template <typename _Tp>
61 concept __sized_contiguous_range
62 = ranges::contiguous_range<_Tp> && ranges::sized_range<_Tp>;
63
64 template <typename _Vp = void, __sized_contiguous_range _Rg, typename... _Flags>
65 [[__gnu__::__always_inline__]]
66 constexpr __vec_load_return_t<_Vp, ranges::range_value_t<_Rg>>
67 unchecked_load(_Rg&& __r, flags<_Flags...> __f = {})
68 {
69 using _Tp = ranges::range_value_t<_Rg>;
70 using _RV = __vec_load_return_t<_Vp, _Tp>;
71 using _Rp = typename _RV::value_type;
72 static_assert(__loadstore_convertible_to<ranges::range_value_t<_Rg>, _Rp, _Flags...>,
73 "'flag_convert' must be used for conversions that are not value-preserving");
74
75 constexpr bool __allow_out_of_bounds = __f._S_test(__allow_partial_loadstore);
76 constexpr size_t __static_size = __static_range_size(__r);
77
78 if constexpr (!__allow_out_of_bounds && __static_sized_range<_Rg>)
79 static_assert(ranges::size(__r) >= _RV::size(), "given range must have sufficient size");
80
81 const auto* __ptr = __f.template _S_adjust_pointer<_RV>(ranges::data(__r));
82 const auto __rg_size = std::ranges::size(__r);
83 if constexpr (!__allow_out_of_bounds)
84 __glibcxx_simd_precondition(
85 std::ranges::size(__r) >= _RV::size(),
86 "Input range is too small. Did you mean to use 'partial_load'?");
87
88 if consteval
89 {
90 return _RV([&](size_t __i) -> _Rp {
91 if (__i >= __rg_size)
92 return _Rp();
93 else
94 return static_cast<_Rp>(__r[__i]);
95 });
96 }
97 else
98 {
99 if constexpr ((__static_size != dynamic_extent && __static_size >= size_t(_RV::size()))
100 || !__allow_out_of_bounds)
101 return _RV(_LoadCtorTag(), __ptr);
102 else
103 return _RV::_S_partial_load(__ptr, __rg_size);
104 }
105 }
106
107 template <typename _Vp = void, __sized_contiguous_range _Rg, typename... _Flags>
108 [[__gnu__::__always_inline__]]
109 constexpr __vec_load_return_t<_Vp, ranges::range_value_t<_Rg>>
110 unchecked_load(_Rg&& __r, const __load_mask_type_t<_Vp, ranges::range_value_t<_Rg>>& __mask,
111 flags<_Flags...> __f = {})
112 {
113 using _Tp = ranges::range_value_t<_Rg>;
114 using _RV = __vec_load_return_t<_Vp, _Tp>;
115 using _Rp = typename _RV::value_type;
116 static_assert(__vectorizable<_Tp>);
117 static_assert(__explicitly_convertible_to<_Tp, _Rp>);
118 static_assert(__loadstore_convertible_to<_Tp, _Rp, _Flags...>,
119 "'flag_convert' must be used for conversions that are not value-preserving");
120
121 constexpr bool __allow_out_of_bounds = __f._S_test(__allow_partial_loadstore);
122 constexpr auto __static_size = __static_range_size(__r);
123
124 if constexpr (!__allow_out_of_bounds && __static_sized_range<_Rg>)
125 static_assert(ranges::size(__r) >= _RV::size(), "given range must have sufficient size");
126
127 const auto* __ptr = __f.template _S_adjust_pointer<_RV>(ranges::data(__r));
128
129 if constexpr (!__allow_out_of_bounds)
130 __glibcxx_simd_precondition(
131 ranges::size(__r) >= size_t(_RV::size()),
132 "Input range is too small. Did you mean to use 'partial_load'?");
133
134 const size_t __rg_size = ranges::size(__r);
135 if consteval
136 {
137 return _RV([&](size_t __i) -> _Rp {
138 if (__i >= __rg_size || !__mask[int(__i)])
139 return _Rp();
140 else
141 return static_cast<_Rp>(__r[__i]);
142 });
143 }
144 else
145 {
146 constexpr bool __no_size_check
147 = !__allow_out_of_bounds
148 || (__static_size != dynamic_extent
149 && __static_size >= size_t(_RV::size.value));
150 if constexpr (_RV::size() == 1)
151 return __mask[0] && (__no_size_check || __rg_size > 0) ? _RV(_LoadCtorTag(), __ptr)
152 : _RV();
153 else if constexpr (__no_size_check)
154 return _RV::_S_masked_load(__ptr, __mask);
155 else if (__rg_size >= size_t(_RV::size()))
156 return _RV::_S_masked_load(__ptr, __mask);
157 else if (__rg_size > 0)
158 return _RV::_S_masked_load(
159 __ptr, __mask && _RV::mask_type::_S_partial_mask_of_n(int(__rg_size)));
160 else
161 return _RV();
162 }
163 }
164
165 template <typename _Vp = void, contiguous_iterator _It, typename... _Flags>
166 [[__gnu__::__always_inline__]]
167 constexpr __vec_load_return_t<_Vp, iter_value_t<_It>>
168 unchecked_load(_It __first, iter_difference_t<_It> __n, flags<_Flags...> __f = {})
169 { return simd::unchecked_load<_Vp>(span<const iter_value_t<_It>>(__first, __n), __f); }
170
171 template <typename _Vp = void, contiguous_iterator _It, typename... _Flags>
172 [[__gnu__::__always_inline__]]
173 constexpr __vec_load_return_t<_Vp, iter_value_t<_It>>
174 unchecked_load(_It __first, iter_difference_t<_It> __n,
175 const __load_mask_type_t<_Vp, iter_value_t<_It>>& __mask,
176 flags<_Flags...> __f = {})
177 { return simd::unchecked_load<_Vp>(span<const iter_value_t<_It>>(__first, __n), __mask, __f); }
178
179 template <typename _Vp = void, contiguous_iterator _It, sized_sentinel_for<_It> _Sp,
180 typename... _Flags>
181 [[__gnu__::__always_inline__]]
182 constexpr __vec_load_return_t<_Vp, iter_value_t<_It>>
183 unchecked_load(_It __first, _Sp __last, flags<_Flags...> __f = {})
184 { return simd::unchecked_load<_Vp>(span<const iter_value_t<_It>>(__first, __last), __f); }
185
186 template <typename _Vp = void, contiguous_iterator _It, sized_sentinel_for<_It> _Sp,
187 typename... _Flags>
188 [[__gnu__::__always_inline__]]
189 constexpr __vec_load_return_t<_Vp, iter_value_t<_It>>
190 unchecked_load(_It __first, _Sp __last,
191 const __load_mask_type_t<_Vp, iter_value_t<_It>>& __mask,
192 flags<_Flags...> __f = {})
193 {
194 return simd::unchecked_load<_Vp>(span<const iter_value_t<_It>>(__first, __last), __mask, __f);
195 }
196
197 template <typename _Vp = void, __sized_contiguous_range _Rg, typename... _Flags>
198 [[__gnu__::__always_inline__]]
199 constexpr __vec_load_return_t<_Vp, ranges::range_value_t<_Rg>>
200 partial_load(_Rg&& __r, flags<_Flags...> __f = {})
201 { return simd::unchecked_load<_Vp>(__r, __f | __allow_partial_loadstore); }
202
203 template <typename _Vp = void, __sized_contiguous_range _Rg, typename... _Flags>
204 [[__gnu__::__always_inline__]]
205 constexpr __vec_load_return_t<_Vp, ranges::range_value_t<_Rg>>
206 partial_load(_Rg&& __r, const __load_mask_type_t<_Vp, ranges::range_value_t<_Rg>>& __mask,
207 flags<_Flags...> __f = {})
208 { return simd::unchecked_load<_Vp>(__r, __mask, __f | __allow_partial_loadstore); }
209
210 template <typename _Vp = void, contiguous_iterator _It, typename... _Flags>
211 [[__gnu__::__always_inline__]]
212 constexpr __vec_load_return_t<_Vp, iter_value_t<_It>>
213 partial_load(_It __first, iter_difference_t<_It> __n, flags<_Flags...> __f = {})
214 { return partial_load<_Vp>(span<const iter_value_t<_It>>(__first, __n), __f); }
215
216 template <typename _Vp = void, contiguous_iterator _It, typename... _Flags>
217 [[__gnu__::__always_inline__]]
218 constexpr __vec_load_return_t<_Vp, iter_value_t<_It>>
219 partial_load(_It __first, iter_difference_t<_It> __n,
220 const __load_mask_type_t<_Vp, iter_value_t<_It>>& __mask,
221 flags<_Flags...> __f = {})
222 { return partial_load<_Vp>(span<const iter_value_t<_It>>(__first, __n), __mask, __f); }
223
224 template <typename _Vp = void, contiguous_iterator _It, sized_sentinel_for<_It> _Sp,
225 typename... _Flags>
226 [[__gnu__::__always_inline__]]
227 constexpr __vec_load_return_t<_Vp, iter_value_t<_It>>
228 partial_load(_It __first, _Sp __last, flags<_Flags...> __f = {})
229 { return partial_load<_Vp>(span<const iter_value_t<_It>>(__first, __last), __f); }
230
231 template <typename _Vp = void, contiguous_iterator _It, sized_sentinel_for<_It> _Sp,
232 typename... _Flags>
233 [[__gnu__::__always_inline__]]
234 constexpr __vec_load_return_t<_Vp, iter_value_t<_It>>
235 partial_load(_It __first, _Sp __last, const __load_mask_type_t<_Vp, iter_value_t<_It>>& __mask,
236 flags<_Flags...> __f = {})
237 { return partial_load<_Vp>(span<const iter_value_t<_It>>(__first, __last), __mask, __f); }
238
239 template <typename _Tp, typename _Ap, __sized_contiguous_range _Rg, typename... _Flags>
240 requires indirectly_writable<ranges::iterator_t<_Rg>, _Tp>
241 [[__gnu__::__always_inline__]]
242 constexpr void
243 unchecked_store(const basic_vec<_Tp, _Ap>& __v, _Rg&& __r, flags<_Flags...> __f = {})
244 {
245 using _TV = basic_vec<_Tp, _Ap>;
246 static_assert(destructible<_TV>);
247 static_assert(__loadstore_convertible_to<_Tp, ranges::range_value_t<_Rg>, _Flags...>,
248 "'flag_convert' must be used for conversions that are not value-preserving");
249
250 constexpr bool __allow_out_of_bounds = __f._S_test(__allow_partial_loadstore);
251 if constexpr (!__allow_out_of_bounds && __static_sized_range<_Rg>)
252 static_assert(ranges::size(__r) >= _TV::size(), "given range must have sufficient size");
253
254 auto* __ptr = __f.template _S_adjust_pointer<_TV>(ranges::data(__r));
255 const auto __rg_size = ranges::size(__r);
256 if constexpr (!__allow_out_of_bounds)
257 __glibcxx_simd_precondition(
258 ranges::size(__r) >= _TV::size(),
259 "output range is too small. Did you mean to use 'partial_store'?");
260
261 if consteval
262 {
263 for (unsigned __i = 0; __i < __rg_size && __i < _TV::size(); ++__i)
264 __ptr[__i] = static_cast<ranges::range_value_t<_Rg>>(__v[__i]);
265 }
266 else
267 {
268 if constexpr (!__allow_out_of_bounds)
269 __v._M_store(__ptr);
270 else
271 _TV::_S_partial_store(__v, __ptr, __rg_size);
272 }
273 }
274
275 template <typename _Tp, typename _Ap, __sized_contiguous_range _Rg, typename... _Flags>
276 requires indirectly_writable<ranges::iterator_t<_Rg>, _Tp>
277 [[__gnu__::__always_inline__]]
278 constexpr void
279 unchecked_store(const basic_vec<_Tp, _Ap>& __v, _Rg&& __r,
280 const typename basic_vec<_Tp, _Ap>::mask_type& __mask,
281 flags<_Flags...> __f = {})
282 {
283 using _TV = basic_vec<_Tp, _Ap>;
284 static_assert(__loadstore_convertible_to<_Tp, ranges::range_value_t<_Rg>, _Flags...>,
285 "'flag_convert' must be used for conversions that are not value-preserving");
286
287 constexpr bool __allow_out_of_bounds = __f._S_test(__allow_partial_loadstore);
288 if constexpr (!__allow_out_of_bounds && __static_sized_range<_Rg>)
289 static_assert(ranges::size(__r) >= _TV::size(), "given range must have sufficient size");
290
291 auto* __ptr = __f.template _S_adjust_pointer<_TV>(ranges::data(__r));
292
293 if constexpr (!__allow_out_of_bounds)
294 __glibcxx_simd_precondition(
295 ranges::size(__r) >= size_t(_TV::size()),
296 "output range is too small. Did you mean to use 'partial_store'?");
297
298 const size_t __rg_size = ranges::size(__r);
299 if consteval
300 {
301 for (int __i = 0; __i < _TV::size(); ++__i)
302 {
303 if (__mask[__i] && (!__allow_out_of_bounds || size_t(__i) < __rg_size))
304 __ptr[__i] = static_cast<ranges::range_value_t<_Rg>>(__v[__i]);
305 }
306 }
307 else
308 {
309 if (__allow_out_of_bounds && __rg_size < size_t(_TV::size()))
310 _TV::_S_masked_store(__v, __ptr,
311 __mask && _TV::mask_type::_S_partial_mask_of_n(int(__rg_size)));
312 else
313 _TV::_S_masked_store(__v, __ptr, __mask);
314 }
315 }
316
317 template <typename _Tp, typename _Ap, contiguous_iterator _It, typename... _Flags>
318 requires indirectly_writable<_It, _Tp>
319 [[__gnu__::__always_inline__]]
320 constexpr void
321 unchecked_store(const basic_vec<_Tp, _Ap>& __v, _It __first,
322 iter_difference_t<_It> __n, flags<_Flags...> __f = {})
323 { simd::unchecked_store(__v, std::span<iter_value_t<_It>>(__first, __n), __f); }
324
325 template <typename _Tp, typename _Ap, contiguous_iterator _It, typename... _Flags>
326 requires indirectly_writable<_It, _Tp>
327 [[__gnu__::__always_inline__]]
328 constexpr void
329 unchecked_store(const basic_vec<_Tp, _Ap>& __v, _It __first, iter_difference_t<_It> __n,
330 const typename basic_vec<_Tp, _Ap>::mask_type& __mask,
331 flags<_Flags...> __f = {})
332 { simd::unchecked_store(__v, std::span<iter_value_t<_It>>(__first, __n), __mask, __f); }
333
334 template <typename _Tp, typename _Ap, contiguous_iterator _It, sized_sentinel_for<_It> _Sp,
335 typename... _Flags>
336 requires indirectly_writable<_It, _Tp>
337 [[__gnu__::__always_inline__]]
338 constexpr void
339 unchecked_store(const basic_vec<_Tp, _Ap>& __v, _It __first, _Sp __last,
340 flags<_Flags...> __f = {})
341 { simd::unchecked_store(__v, std::span<iter_value_t<_It>>(__first, __last), __f); }
342
343 template <typename _Tp, typename _Ap, contiguous_iterator _It, sized_sentinel_for<_It> _Sp,
344 typename... _Flags>
345 requires indirectly_writable<_It, _Tp>
346 [[__gnu__::__always_inline__]]
347 constexpr void
348 unchecked_store(const basic_vec<_Tp, _Ap>& __v, _It __first, _Sp __last,
349 const typename basic_vec<_Tp, _Ap>::mask_type& __mask,
350 flags<_Flags...> __f = {})
351 { simd::unchecked_store(__v, std::span<iter_value_t<_It>>(__first, __last), __mask, __f); }
352
353 template <typename _Tp, typename _Ap, __sized_contiguous_range _Rg, typename... _Flags>
354 requires indirectly_writable<ranges::iterator_t<_Rg>, _Tp>
355 [[__gnu__::__always_inline__]]
356 constexpr void
357 partial_store(const basic_vec<_Tp, _Ap>& __v, _Rg&& __r, flags<_Flags...> __f = {})
358 { simd::unchecked_store(__v, __r, __f | __allow_partial_loadstore); }
359
360 template <typename _Tp, typename _Ap, __sized_contiguous_range _Rg, typename... _Flags>
361 requires indirectly_writable<ranges::iterator_t<_Rg>, _Tp>
362 [[__gnu__::__always_inline__]]
363 constexpr void
364 partial_store(const basic_vec<_Tp, _Ap>& __v, _Rg&& __r,
365 const typename basic_vec<_Tp, _Ap>::mask_type& __mask,
366 flags<_Flags...> __f = {})
367 { simd::unchecked_store(__v, __r, __mask, __f | __allow_partial_loadstore); }
368
369 template <typename _Tp, typename _Ap, contiguous_iterator _It, typename... _Flags>
370 requires indirectly_writable<_It, _Tp>
371 [[__gnu__::__always_inline__]]
372 constexpr void
373 partial_store(const basic_vec<_Tp, _Ap>& __v, _It __first, iter_difference_t<_It> __n,
374 flags<_Flags...> __f = {})
375 { partial_store(__v, span(__first, __n), __f); }
376
377 template <typename _Tp, typename _Ap, contiguous_iterator _It, typename... _Flags>
378 requires indirectly_writable<_It, _Tp>
379 [[__gnu__::__always_inline__]]
380 constexpr void
381 partial_store(const basic_vec<_Tp, _Ap>& __v, _It __first, iter_difference_t<_It> __n,
382 const typename basic_vec<_Tp, _Ap>::mask_type& __mask, flags<_Flags...> __f = {})
383 { partial_store(__v, span(__first, __n), __mask, __f); }
384
385 template <typename _Tp, typename _Ap, contiguous_iterator _It, sized_sentinel_for<_It> _Sp,
386 typename... _Flags>
387 requires indirectly_writable<_It, _Tp>
388 [[__gnu__::__always_inline__]]
389 constexpr void
390 partial_store(const basic_vec<_Tp, _Ap>& __v, _It __first, _Sp __last,
391 flags<_Flags...> __f = {})
392 { partial_store(__v, span(__first, __last), __f); }
393
394 template <typename _Tp, typename _Ap, contiguous_iterator _It, sized_sentinel_for<_It> _Sp,
395 typename... _Flags>
396 requires indirectly_writable<_It, _Tp>
397 [[__gnu__::__always_inline__]]
398 constexpr void
399 partial_store(const basic_vec<_Tp, _Ap>& __v, _It __first, _Sp __last,
400 const typename basic_vec<_Tp, _Ap>::mask_type& __mask, flags<_Flags...> __f = {})
401 { partial_store(__v, span(__first, __last), __mask, __f); }
402} // namespace simd
403_GLIBCXX_END_NAMESPACE_VERSION
404} // namespace std
405
406#pragma GCC diagnostic pop
407#endif // C++26
408#endif // _GLIBCXX_SIMD_LOADSTORE_H
ISO C++ entities toplevel namespace is std.