libstdc++
simd_mask.h
1// Implementation of <simd> -*- C++ -*-
2
3// Copyright The GNU Toolchain Authors.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25#ifndef _GLIBCXX_SIMD_MASK_H
26#define _GLIBCXX_SIMD_MASK_H 1
27
28#ifdef _GLIBCXX_SYSHDR
29#pragma GCC system_header
30#endif
31
32#if __cplusplus >= 202400L
33
34#include "simd_iterator.h"
35#include "vec_ops.h"
36#if _GLIBCXX_X86
37#include "simd_x86.h"
38#endif
39
40#include <bit>
41#include <bitset>
42
43// psabi warnings are bogus because the ABI of the internal types never leaks into user code
44#pragma GCC diagnostic push
45#pragma GCC diagnostic ignored "-Wpsabi"
46
47namespace std _GLIBCXX_VISIBILITY(default)
48{
49_GLIBCXX_BEGIN_NAMESPACE_VERSION
50namespace simd
51{
52 template <unsigned _Np>
53 struct _SwapNeighbors
54 {
55 consteval unsigned
56 operator()(unsigned __i, unsigned __size) const
57 {
58 if (__size % (2 * _Np) != 0)
59 __builtin_abort(); // swap_neighbors<N> permutation requires a multiple of 2N elements
60 else if (std::has_single_bit(_Np))
61 return __i ^ _Np;
62 else if (__i % (2 * _Np) >= _Np)
63 return __i - _Np;
64 else
65 return __i + _Np;
66 }
67 };
68
69 template <size_t _Np, size_t _Mp>
70 constexpr auto
71 __bitset_split(const bitset<_Mp>& __b)
72 {
73 constexpr auto __bits_per_word = __CHAR_BIT__ * __SIZEOF_LONG__;
74 if constexpr (_Np % __bits_per_word == 0)
75 {
76 struct _Tmp
77 {
78 bitset<_Np> _M_lo;
79 bitset<_Mp - _Np> _M_hi;
80 };
81 return __builtin_bit_cast(_Tmp, __b);
82 }
83 else
84 {
85 constexpr auto __bits_per_ullong = __CHAR_BIT__ * __SIZEOF_LONG_LONG__;
86 static_assert(_Mp <= __bits_per_ullong);
87 using _Lo = _Bitmask<_Np>;
88 using _Hi = _Bitmask<_Mp - _Np>;
89 struct _Tmp
90 {
91 _Lo _M_lo;
92 _Hi _M_hi;
93 };
94 return _Tmp {static_cast<_Lo>(__b.to_ullong()), static_cast<_Hi>(__b.to_ullong() >> _Np)};
95 }
96 }
97
98 static_assert(__bitset_split<64>(bitset<128>(1))._M_lo == bitset<64>(1));
99 static_assert(__bitset_split<64>(bitset<128>(1))._M_hi == bitset<64>(0));
100
101 // [simd.traits]
102 // --- rebind ---
103 template <typename _Tp, typename _Vp, _ArchTraits _Traits = {}>
104 struct rebind
105 {};
106
107 /**
108 * Computes a member @c type `basic_vec<_Tp, Abi>`, where @c Abi is chosen such that the
109 * number of elements is equal to `_Vp::size()` and features of the ABI tag (such as the
110 * internal representation of masks, or storage order of complex components) are preserved.
111 */
112 template <__vectorizable _Tp, __simd_vec_type _Vp, _ArchTraits _Traits>
113 //requires requires { typename __deduce_abi_t<_Tp, _Vp::size()>; }
114 struct rebind<_Tp, _Vp, _Traits>
115 { using type = __similar_vec<_Tp, _Vp::size(), typename _Vp::abi_type>; };
116
117 /**
118 * As above, except for @c basic_mask.
119 */
120 template <__vectorizable _Tp, __simd_mask_type _Mp, _ArchTraits _Traits>
121 //requires requires { typename __deduce_abi_t<_Tp, _Mp::size()>; }
122 struct rebind<_Tp, _Mp, _Traits>
123 { using type = __similar_mask<_Tp, _Mp::size(), typename _Mp::abi_type>; };
124
125 template <typename _Tp, typename _Vp>
126 using rebind_t = typename rebind<_Tp, _Vp>::type;
127
128 // --- resize ---
129 template <__simd_size_type _Np, typename _Vp, _ArchTraits _Traits = {}>
130 struct resize
131 {};
132
133 template <__simd_size_type _Np, __simd_vec_type _Vp, _ArchTraits _Traits>
134 requires (_Np >= 1)
135 //requires requires { typename __deduce_abi_t<typename _Vp::value_type, _Np>; }
136 struct resize<_Np, _Vp, _Traits>
137 { using type = __similar_vec<typename _Vp::value_type, _Np, typename _Vp::abi_type>; };
138
139 template <__simd_size_type _Np, __simd_mask_type _Mp, _ArchTraits _Traits>
140 requires (_Np >= 1)
141 //requires requires { typename __deduce_abi_t<typename _Mp::value_type, _Np>; }
142 struct resize<_Np, _Mp, _Traits>
143 {
144 using _A1 = decltype(__abi_rebind<__mask_element_size<_Mp>, _Np, typename _Mp::abi_type,
145 true>());
146
147 static_assert(__abi_tag<_A1>);
148
149 static_assert(_Mp::abi_type::_S_variant == _A1::_S_variant || __scalar_abi_tag<_A1>
150 || __scalar_abi_tag<typename _Mp::abi_type>);
151
152 using type = basic_mask<__mask_element_size<_Mp>, _A1>;
153 };
154
155 template <__simd_size_type _Np, typename _Vp>
156 using resize_t = typename resize<_Np, _Vp>::type;
157
158 // [simd.syn]
159 inline constexpr __simd_size_type zero_element = numeric_limits<int>::min();
160
161 inline constexpr __simd_size_type uninit_element = zero_element + 1;
162
163 // [simd.permute.static]
164 template<__simd_size_type _Np = 0, __simd_vec_or_mask_type _Vp,
165 __index_permutation_function<_Vp> _IdxMap>
166 [[__gnu__::__always_inline__]]
167 constexpr resize_t<_Np == 0 ? _Vp::size() : _Np, _Vp>
168 permute(const _Vp& __v, _IdxMap&& __idxmap)
169 { return resize_t<_Np == 0 ? _Vp::size() : _Np, _Vp>::_S_static_permute(__v, __idxmap); }
170
171 // [simd.permute.dynamic]
172 template<__simd_vec_or_mask_type _Vp, __simd_integral _Ip>
173 [[__gnu__::__always_inline__]]
174 constexpr resize_t<_Ip::size(), _Vp>
175 permute(const _Vp& __v, const _Ip& __indices)
176 { return __v[__indices]; }
177
178 // [simd.creation] ----------------------------------------------------------
179 template<__simd_vec_type _Vp, typename _Ap>
180 [[__gnu__::__always_inline__]]
181 constexpr auto
182 chunk(const basic_vec<typename _Vp::value_type, _Ap>& __x) noexcept
183 { return __x.template _M_chunk<_Vp>(); }
184
185 template<__simd_mask_type _Mp, typename _Ap>
186 [[__gnu__::__always_inline__]]
187 constexpr auto
188 chunk(const basic_mask<__mask_element_size<_Mp>, _Ap>& __x) noexcept
189 { return __x.template _M_chunk<_Mp>(); }
190
191 template<__simd_size_type _Np, typename _Tp, typename _Ap>
192 [[__gnu__::__always_inline__]]
193 constexpr auto
194 chunk(const basic_vec<_Tp, _Ap>& __x) noexcept
195 -> decltype(chunk<resize_t<_Np, basic_vec<_Tp, _Ap>>>(__x))
196 { return chunk<resize_t<_Np, basic_vec<_Tp, _Ap>>>(__x); }
197
198 template<__simd_size_type _Np, size_t _Bytes, typename _Ap>
199 [[__gnu__::__always_inline__]]
200 constexpr auto
201 chunk(const basic_mask<_Bytes, _Ap>& __x) noexcept
202 -> decltype(chunk<resize_t<_Np, basic_mask<_Bytes, _Ap>>>(__x))
203 { return chunk<resize_t<_Np, basic_mask<_Bytes, _Ap>>>(__x); }
204
205 // LWG???? (reported 2025-11-25)
206 template<typename _Tp, typename _A0, typename... _Abis>
207 constexpr resize_t<(_A0::_S_size + ... + _Abis::_S_size), basic_vec<_Tp, _A0>>
208 cat(const basic_vec<_Tp, _A0>& __x0, const basic_vec<_Tp, _Abis>&... __xs) noexcept
209 {
210 return resize_t<(_A0::_S_size + ... + _Abis::_S_size), basic_vec<_Tp, _A0>>
211 ::_S_concat(__x0, __xs...);
212 }
213
214 // LWG???? (reported 2025-11-25)
215 template<size_t _Bytes, typename _A0, typename... _Abis>
216 constexpr resize_t<(_A0::_S_size + ... + _Abis::_S_size), basic_mask<_Bytes, _A0>>
217 cat(const basic_mask<_Bytes, _A0>& __x0, const basic_mask<_Bytes, _Abis>&... __xs) noexcept
218 {
219 return resize_t<(_A0::_S_size + ... + _Abis::_S_size), basic_mask<_Bytes, _A0>>
220 ::_S_concat(__x0, __xs...);
221 }
222
223 // implementation helper for chunk and cat
224 consteval int
225 __packs_to_skip_at_front(int __offset, initializer_list<int> __sizes)
226 {
227 int __i = 0;
228 int __n = 0;
229 for (int __s : __sizes)
230 {
231 __n += __s;
232 if (__n > __offset)
233 return __i;
234 ++__i;
235 }
236 __builtin_trap(); // called out of contract
237 }
238
239 consteval int
240 __packs_to_skip_at_back(int __offset, int __max, initializer_list<int> __sizes)
241 {
242 int __i = 0;
243 int __n = -__offset;
244 for (int __s : __sizes)
245 {
246 ++__i;
247 __n += __s;
248 if (__n >= __max)
249 return int(__sizes.size()) - __i;
250 }
251 return 0;
252 }
253
254 // in principle, this overload allows conversions to _Dst - and it wouldn't be wrong - but the
255 // general overload below is still a better candidate in overload resolution
256 template <typename _Dst>
257 [[__gnu__::__always_inline__]]
258 constexpr _Dst
259 __extract_simd_at(auto _Offset, const _Dst& __r, const auto&...)
260 requires(_Offset.value == 0)
261 { return __r; }
262
263 template <typename _Dst, typename _V0>
264 [[__gnu__::__always_inline__]]
265 constexpr _Dst
266 __extract_simd_at(auto _Offset, const _V0&, const _Dst& __r, const auto&...)
267 requires(_Offset.value == _V0::size.value)
268 { return __r; }
269
270 template <typename _Dst, typename... _Vs>
271 [[__gnu__::__always_inline__]]
272 constexpr _Dst
273 __extract_simd_at(auto _Offset, const _Vs&... __xs)
274 {
275 using _Adst = typename _Dst::abi_type;
276 if constexpr (_Adst::_S_nreg >= 2)
277 {
278 using _Dst0 = remove_cvref_t<decltype(declval<_Dst>()._M_get_low())>;
279 using _Dst1 = remove_cvref_t<decltype(declval<_Dst>()._M_get_high())>;
280 return _Dst::_S_init(__extract_simd_at<_Dst0>(_Offset, __xs...),
281 __extract_simd_at<_Dst1>(_Offset + _Dst0::size, __xs...));
282 }
283 else
284 {
285 using _Ret = remove_cvref_t<decltype(declval<_Dst>()._M_get())>;
286 constexpr bool __use_bitmask = __simd_mask_type<_Dst> && _Adst::_S_is_bitmask;
287 constexpr int __dst_full_size = __bit_ceil(unsigned(_Adst::_S_size));
288 constexpr int __nargs = sizeof...(__xs);
289 using _Afirst = typename _Vs...[0]::abi_type;
290 using _Alast = typename _Vs...[__nargs - 1]::abi_type;
291 const auto& __x0 = __xs...[0];
292 const auto& __xlast = __xs...[__nargs - 1];
293 constexpr int __ninputs = (_Vs::size.value + ...);
294 if constexpr (_Offset.value >= _Afirst::_S_size
295 || __ninputs - _Offset.value - _Alast::_S_size >= _Adst::_S_size)
296 { // can drop inputs at the front and/or back of the pack
297 constexpr int __skip_front = __packs_to_skip_at_front(_Offset.value,
298 {_Vs::size.value...});
299 constexpr int __skip_back = __packs_to_skip_at_back(_Offset.value, _Adst::_S_size,
300 {_Vs::size.value...});
301 static_assert(__skip_front > 0 || __skip_back > 0);
302 constexpr auto [...__skip] = _IotaArray<__skip_front>;
303 constexpr auto [...__is] = _IotaArray<__nargs - __skip_front - __skip_back>;
304 constexpr int __new_offset = _Offset.value - (0 + ... + _Vs...[__skip]::size.value);
305 return __extract_simd_at<_Dst>(cw<__new_offset>, __xs...[__is + __skip_front]...);
306 }
307 else if constexpr (_Adst::_S_size == 1)
308 { // trivial conversion to one value_type
309 return _Dst(__x0[_Offset.value]);
310 }
311 else if constexpr (_Afirst::_S_nreg >= 2 || _Alast::_S_nreg >= 2)
312 { // flatten first and/or last multi-register argument
313 constexpr bool __flatten_first = _Afirst::_S_nreg >= 2;
314 constexpr bool __flatten_last = __nargs > 1 && _Alast::_S_nreg >= 2;
315 constexpr auto [...__is] = _IotaArray<__nargs - __flatten_first - __flatten_last>;
316 if constexpr (__flatten_first && __flatten_last)
317 return __extract_simd_at<_Dst>(
318 _Offset, __x0._M_get_low(), __x0._M_get_high(), __xs...[__is + 1]...,
319 __xlast._M_get_low(), __xlast._M_get_high());
320 else if constexpr (__flatten_first)
321 return __extract_simd_at<_Dst>(
322 _Offset, __x0._M_get_low(), __x0._M_get_high(), __xs...[__is + 1]...);
323 else
324 return __extract_simd_at<_Dst>(
325 _Offset, __xs...[__is]..., __xlast._M_get_low(), __xlast._M_get_high());
326 }
327 else if constexpr (__simd_mask_type<_Dst>
328 && ((_Adst::_S_variant != _Vs::abi_type::_S_variant
329 && !__scalar_abi_tag<typename _Vs::abi_type>) || ...))
330 { // convert ABI tag if incompatible
331 return __extract_simd_at<_Dst>(
332 _Offset, static_cast<const resize_t<_Vs::size.value, _Dst>&>(__xs)...);
333 }
334
335 // at this point __xs should be as small as possible; there may be some corner cases left
336
337 else if constexpr (__nargs == 1)
338 { // simple and optimal
339 if constexpr (__use_bitmask)
340 return _Dst(_Ret(__x0._M_to_uint() >> _Offset.value));
341 else
342 return _VecOps<_Ret>::_S_extract(__x0._M_concat_data(false), _Offset);
343 }
344 else if constexpr (__use_bitmask)
345 { // fairly simple and optimal bit shifting solution
346 static_assert(_Afirst::_S_nreg == 1);
347 static_assert(_Offset.value < _Afirst::_S_size);
348 int __offset = -_Offset.value;
349 _Ret __r;
350 template for (const auto& __x : {__xs...})
351 {
352 if (__offset <= 0)
353 __r = _Ret(__x._M_to_uint() >> -__offset);
354 else if (__offset < _Adst::_S_size)
355 __r |= _Ret(_Ret(__x._M_to_uint()) << __offset);
356 __offset += __x.size.value;
357 }
358 return _Dst(__r);
359 }
360 else if constexpr (__nargs == 2 && _Offset == 0 && _Adst::_S_nreg == 1
361 && _Afirst::_S_size >= _Alast::_S_size
362 && __has_single_bit(unsigned(_Afirst::_S_size)))
363 { // simple __vec_concat
364 if constexpr (_Afirst::_S_size == 1)
365 // even simpler init from two values
366 return _Ret{__x0._M_concat_data()[0], __xlast._M_concat_data()[0]};
367 else
368 {
369 const auto __v0 = __x0._M_concat_data();
370 const auto __v1 = __vec_zero_pad_to<sizeof(__v0)>(__xlast._M_concat_data());
371 return __vec_concat(__v0, __v1);
372 }
373 }
374 else if constexpr (__nargs == 2 && _Adst::_S_nreg == 1 && _Offset == 0
375 && _Afirst::_S_nreg == 1 && _Alast::_S_size == 1)
376 { // optimize insertion of one element at the end
377 _Ret __r = __vec_zero_pad_to<sizeof(_Ret)>(__x0._M_get());
378 __vec_set(__r, _Afirst::_S_size, __xlast._M_concat_data()[0]);
379 return __r;
380 }
381 else if constexpr (__nargs == 2 && _Adst::_S_nreg == 1 && _Offset == 0
382 && _Afirst::_S_nreg == 1 && _Alast::_S_size == 2)
383 { // optimize insertion of two elements at the end
384 _Ret __r = __vec_zero_pad_to<sizeof(_Ret)>(__x0._M_concat_data());
385 const auto __x1 = __xlast._M_concat_data();
386 if constexpr (sizeof(__x1) <= sizeof(double) && (_Afirst::_S_size & 1) == 0)
387 { // can use a single insert instruction
388 using _Up = __conditional_t<
389 is_floating_point_v<__vec_value_type<_Ret>>,
390 __conditional_t<sizeof(__x1) == sizeof(double), double, float>,
391 __integer_from<sizeof(__x1)>>;
392 auto __r2 = __vec_bit_cast<_Up>(__r);
393 __vec_set(__r2, _Afirst::_S_size / 2, __vec_bit_cast<_Up>(__x1)[0]);
394 __r = reinterpret_cast<_Ret>(__r2);
395 }
396 else
397 {
398 __vec_set(__r, _Afirst::_S_size, __x1[0]);
399 __vec_set(__r, _Afirst::_S_size + 1, __x1[1]);
400 }
401 return __r;
402 }
403 else if constexpr (__nargs == 2 && _Afirst::_S_nreg == 1 && _Alast::_S_nreg == 1)
404 { // optimize concat of two input vectors (e.g. using palignr)
405 constexpr auto [...__is] = _IotaArray<__dst_full_size>;
406 constexpr int __v2_offset = __width_of<decltype(__x0._M_concat_data())>;
407 return __builtin_shufflevector(
408 __x0._M_concat_data(), __xlast._M_concat_data(), [](int __i) consteval {
409 if (__i < _Afirst::_S_size)
410 return __i;
411 __i -= _Afirst::_S_size;
412 if (__i < _Alast::_S_size)
413 return __i + __v2_offset;
414 else
415 return -1;
416 }(__is + _Offset.value)...);
417 }
418 else if (__is_const_known(__xs...) || __ninputs == _Adst::_S_size)
419 { // hard to optimize for the compiler, but necessary in constant expressions
420 return _VecOps<_Ret>::_S_extract(
421 __vec_concat_sized<__xs.size.value...>(__xs._M_concat_data(false)...),
422 _Offset);
423 }
424 else
425 { // fallback to concatenation in memory => load the result
426 alignas(_Ret) __vec_value_type<_Ret>
427 __tmp[std::max(__ninputs, _Offset.value + __dst_full_size)] = {};
428 int __offset = 0;
429 template for (const auto& __x : {__xs...})
430 {
431 if constexpr (__simd_mask_type<_Dst>)
432 (-__x)._M_store(__tmp + __offset);
433 else
434 __x._M_store(__tmp + __offset);
435 __offset += __x.size.value;
436 }
437 _Ret __r;
438 __builtin_memcpy(&__r, __tmp + _Offset.value, sizeof(_Ret));
439 return __r;
440 }
441 }
442 }
443
444 // [simd.mask] --------------------------------------------------------------
445 template <size_t _Bytes, typename _Ap>
446 class basic_mask
447 {
448 public:
449 using value_type = bool;
450
451 using abi_type = _Ap;
452
453#define _GLIBCXX_DELETE_SIMD "This specialization is disabled because of an invalid combination " \
454 "of template arguments to basic_mask."
455
456 basic_mask() = delete(_GLIBCXX_DELETE_SIMD);
457
458 ~basic_mask() = delete(_GLIBCXX_DELETE_SIMD);
459
460 basic_mask(const basic_mask&) = delete(_GLIBCXX_DELETE_SIMD);
461
462 basic_mask& operator=(const basic_mask&) = delete(_GLIBCXX_DELETE_SIMD);
463
464#undef _GLIBCXX_DELETE_SIMD
465 };
466
467 template <size_t _Bytes, typename _Ap>
468 class _MaskBase
469 {
470 using _Mp = basic_mask<_Bytes, _Ap>;
471
472 protected:
473 using _VecType = __simd_vec_from_mask_t<_Bytes, _Ap>;
474
475 static_assert(destructible<_VecType> || _Bytes > sizeof(0ull));
476
477 public:
478 using iterator = __iterator<_Mp>;
479
480 using const_iterator = __iterator<const _Mp>;
481
482 constexpr iterator
483 begin() noexcept
484 { return {static_cast<_Mp&>(*this), 0}; }
485
486 constexpr const_iterator
487 begin() const noexcept
488 { return cbegin(); }
489
490 constexpr const_iterator
491 cbegin() const noexcept
492 { return {static_cast<const _Mp&>(*this), 0}; }
493
494 constexpr default_sentinel_t
495 end() const noexcept
496 { return {}; }
497
498 constexpr default_sentinel_t
499 cend() const noexcept
500 { return {}; }
501
502 static constexpr auto size = __simd_size_c<_Ap::_S_size>;
503
504 _MaskBase() = default;
505
506 // LWG issue from 2026-03-04 / P4042R0
507 template <size_t _UBytes, typename _UAbi>
508 requires (_Ap::_S_size != _UAbi::_S_size)
509 explicit
510 _MaskBase(const basic_mask<_UBytes, _UAbi>&) = delete("size mismatch");
511
512 template <typename _Up, typename _UAbi>
513 explicit
514 _MaskBase(const basic_vec<_Up, _UAbi>&)
515 = delete("use operator! or a comparison to convert a vec into a mask");
516
517 template <typename _Up, typename _UAbi>
518 requires (_Ap::_S_size != _UAbi::_S_size)
519 operator basic_vec<_Up, _UAbi>() const
520 = delete("size mismatch");
521 };
522
523 template <size_t _Bytes, __abi_tag _Ap>
524 requires (_Ap::_S_nreg == 1)
525 class basic_mask<_Bytes, _Ap>
526 : public _MaskBase<_Bytes, _Ap>
527 {
528 using _Base = _MaskBase<_Bytes, _Ap>;
529
530 using _VecType = _Base::_VecType;
531
532 template <size_t, typename>
533 friend class basic_mask;
534
535 template <typename, typename>
536 friend class basic_vec;
537
538 static constexpr int _S_size = _Ap::_S_size;
539
540 using _DataType = typename _Ap::template _MaskDataType<_Bytes>;
541
542 static constexpr bool _S_has_bool_member = is_same_v<_DataType, bool>;
543
544 static constexpr bool _S_is_scalar = _S_has_bool_member;
545
546 static constexpr bool _S_use_bitmask = _Ap::_S_is_bitmask;
547
548 static constexpr int _S_full_size = [] {
549 if constexpr (_S_is_scalar)
550 return _S_size;
551 else if constexpr (_S_use_bitmask && _S_size < __CHAR_BIT__)
552 return __CHAR_BIT__;
553 else
554 return __bit_ceil(unsigned(_S_size));
555 }();
556
557 static constexpr bool _S_is_partial = _S_size != _S_full_size;
558
559 static constexpr _DataType _S_implicit_mask = [] {
560 if constexpr (_S_is_scalar)
561 return true;
562 else if (!_S_is_partial)
563 return _DataType(~_DataType());
564 else if constexpr (_S_use_bitmask)
565 return _DataType((_DataType(1) << _S_size) - 1);
566 else
567 {
568 constexpr auto [...__is] = _IotaArray<_S_full_size>;
569 return _DataType{ (__is < _S_size ? -1 : 0)... };
570 }
571 }();
572
573 // Actual padding bytes, not padding elements.
574 // => _S_padding_bytes is 0 even if _S_is_partial is true.
575 static constexpr size_t _S_padding_bytes = 0;
576
577 _DataType _M_data;
578
579 public:
580 using value_type = bool;
581
582 using abi_type = _Ap;
583
584 using iterator = _Base::iterator;
585
586 using const_iterator = _Base::const_iterator;
587
588 // internal but public API ----------------------------------------------
589 [[__gnu__::__always_inline__]]
590 static constexpr basic_mask
591 _S_init(_DataType __x)
592 {
593 basic_mask __r;
594 __r._M_data = __x;
595 return __r;
596 }
597
598 [[__gnu__::__always_inline__]]
599 static constexpr basic_mask
600 _S_init(unsigned_integral auto __bits)
601 { return basic_mask(__bits); }
602
603 [[__gnu__::__always_inline__]]
604 constexpr const _DataType&
605 _M_get() const
606 { return _M_data; }
607
608 /** @internal
609 * Bit-cast the given object @p __x to basic_mask.
610 *
611 * This is necessary for _S_nreg > 1 where the last element can be bool or when the sizeof
612 * doesn't match because of different alignment requirements of the sub-masks.
613 */
614 template <size_t _UBytes, typename _UAbi>
615 [[__gnu__::__always_inline__]]
616 static constexpr basic_mask
617 _S_recursive_bit_cast(const basic_mask<_UBytes, _UAbi>& __x)
618 { return __builtin_bit_cast(basic_mask, __x._M_concat_data()); }
619
620 [[__gnu__::__always_inline__]]
621 constexpr auto
622 _M_concat_data(bool __do_sanitize = _S_is_partial) const
623 {
624 if constexpr (_S_is_scalar)
625 return __vec_builtin_type<__integer_from<_Bytes>, 1>{__integer_from<_Bytes>(-_M_data)};
626 else
627 {
628 if constexpr (_S_is_partial)
629 if (__do_sanitize)
630 return _DataType(_M_data & _S_implicit_mask);
631 return _M_data;
632 }
633 }
634
635 /** @internal
636 * Returns a mask where the first @p __n elements are true. All remaining elements are false.
637 *
638 * @pre @p __n > 0 && @p __n < _S_size
639 */
640 template <_ArchTraits _Traits = {}>
641 [[__gnu__::__always_inline__]]
642 static constexpr basic_mask
643 _S_partial_mask_of_n(int __n)
644 {
645 static_assert(!_S_is_scalar);
646 if constexpr (!_S_use_bitmask)
647 {
648 using _Ip = __integer_from<_Bytes>;
649 __glibcxx_simd_precondition(__n >= 0 && __n <= numeric_limits<_Ip>::max(),
650 "_S_partial_mask_of_n without _S_use_bitmask requires "
651 "positive __n that does not overflow.");
652 constexpr _DataType __0123
653 = __builtin_bit_cast(_DataType, _IotaArray<_Ip(_S_full_size)>);
654 return basic_mask(__0123 < _Ip(__n));
655 }
656 else
657 {
658 __glibcxx_simd_precondition(__n >= 0 && __n <= 255,
659 "The x86 BZHI instruction requires __n to "
660 "only use bits 0:7");
661#if __has_builtin(__builtin_ia32_bzhi_si)
662 if constexpr (_S_size <= 32 && _Traits._M_have_bmi2())
663 return _S_init(_Bitmask<_S_size>(
664 __builtin_ia32_bzhi_si(~0u >> (32 - _S_size), unsigned(__n))));
665#endif
666#if __has_builtin(__builtin_ia32_bzhi_di)
667 else if constexpr (_S_size <= 64 && _Traits._M_have_bmi2())
668 return _S_init(__builtin_ia32_bzhi_di(~0ull >> (64 - _S_size), unsigned(__n)));
669#endif
670 if constexpr (_S_size <= 32)
671 {
672 __glibcxx_simd_precondition(__n < 32, "invalid shift");
673 return _S_init(_Bitmask<_S_size>((1u << unsigned(__n)) - 1));
674 }
675 else if constexpr (_S_size <= 64)
676 {
677 __glibcxx_simd_precondition(__n < 64, "invalid shift");
678 return _S_init((1ull << unsigned(__n)) - 1);
679 }
680 else
681 static_assert(false);
682 }
683 }
684
685 [[__gnu__::__always_inline__]]
686 constexpr basic_mask&
687 _M_and_neighbors()
688 {
689 if constexpr (_S_use_bitmask)
690 _M_data &= ((_M_data >> 1) & 0x5555'5555'5555'5555ull)
691 | ((_M_data << 1) & ~0x5555'5555'5555'5555ull);
692 else
693 _M_data &= _VecOps<_DataType>::_S_swap_neighbors(_M_data);
694 return *this;
695 }
696
697 [[__gnu__::__always_inline__]]
698 constexpr basic_mask&
699 _M_or_neighbors()
700 {
701 if constexpr (_S_use_bitmask)
702 _M_data |= ((_M_data >> 1) & 0x5555'5555'5555'5555ull)
703 | ((_M_data << 1) & ~0x5555'5555'5555'5555ull);
704 else
705 _M_data |= _VecOps<_DataType>::_S_swap_neighbors(_M_data);
706 return *this;
707 }
708
709 template <typename _Mp>
710 [[__gnu__::__always_inline__]]
711 constexpr auto _M_chunk() const noexcept
712 {
713 constexpr int __n = _S_size / _Mp::_S_size;
714 constexpr int __rem = _S_size % _Mp::_S_size;
715 constexpr auto [...__is] = _IotaArray<__n>;
716 if constexpr (__rem == 0)
717 return array<_Mp, __n>{__extract_simd_at<_Mp>(cw<_Mp::_S_size * __is>, *this)...};
718 else
719 {
720 using _Rest = resize_t<__rem, _Mp>;
721 return tuple(__extract_simd_at<_Mp>(cw<_Mp::_S_size * __is>, *this)...,
722 __extract_simd_at<_Rest>(cw<_Mp::_S_size * __n>, *this));
723 }
724 }
725
726 [[__gnu__::__always_inline__]]
727 static constexpr const basic_mask&
728 _S_concat(const basic_mask& __x0) noexcept
729 { return __x0; }
730
731 template <typename... _As>
732 requires (sizeof...(_As) > 1)
733 [[__gnu__::__always_inline__]]
734 static constexpr basic_mask
735 _S_concat(const basic_mask<_Bytes, _As>&... __xs) noexcept
736 {
737 static_assert(_S_size == (_As::_S_size + ...));
738 return __extract_simd_at<basic_mask>(cw<0>, __xs...);
739 }
740
741 // [simd.mask.overview] default constructor -----------------------------
742 basic_mask() = default;
743
744 // [simd.mask.overview] conversion extensions ---------------------------
745 [[__gnu__::__always_inline__]]
746 constexpr
747 basic_mask(_DataType __x) requires(!_S_is_scalar && !_S_use_bitmask)
748 : _M_data(__x)
749 {}
750
751 [[__gnu__::__always_inline__]]
752 constexpr
753 operator _DataType() requires(!_S_is_scalar && !_S_use_bitmask)
754 { return _M_data; }
755
756 // [simd.mask.ctor] broadcast constructor -------------------------------
757 [[__gnu__::__always_inline__]]
758 constexpr explicit
759 basic_mask(same_as<bool> auto __x) noexcept // LWG 4382.
760 : _M_data(__x ? _S_implicit_mask : _DataType())
761 {}
762
763 // [simd.mask.ctor] conversion constructor ------------------------------
764 template <size_t _UBytes, typename _UAbi>
765 requires (_S_size == _UAbi::_S_size)
766 [[__gnu__::__always_inline__]]
767 constexpr explicit(__is_mask_conversion_explicit<_Ap, _UAbi>(_Bytes, _UBytes))
768 basic_mask(const basic_mask<_UBytes, _UAbi>& __x) noexcept
769 : _M_data([&] [[__gnu__::__always_inline__]] {
770 using _UV = basic_mask<_UBytes, _UAbi>;
771 // bool to bool
772 if constexpr (_S_is_scalar)
773 return __x[0];
774
775 // converting from an "array of bool"
776 else if constexpr (_UV::_S_is_scalar)
777 {
778 constexpr auto [...__is] = _IotaArray<_S_size>;
779 if constexpr (_S_use_bitmask)
780 return ((_DataType(__x[__is]) << __is) | ...);
781 else
782 return _DataType{__vec_value_type<_DataType>(-__x[__is])...};
783 }
784
785 // vec-/bit-mask to bit-mask | bit-mask to vec-mask
786 else if constexpr (_S_use_bitmask || _UV::_S_use_bitmask)
787 return basic_mask(__x.to_bitset())._M_data;
788
789 // vec-mask to vec-mask
790 else if constexpr (_Bytes == _UBytes)
791 return _S_recursive_bit_cast(__x)._M_data;
792
793 else
794 {
795#if _GLIBCXX_X86
796 // TODO: turn this into a __vec_mask_cast overload in simd_x86.h
797 if constexpr (_Bytes == 1 && _UBytes == 2)
798 if (!__is_const_known(__x))
799 {
800 if constexpr (_UAbi::_S_nreg == 1)
801 return __x86_cvt_vecmask<_DataType>(__x._M_data);
802 else if constexpr (_UAbi::_S_nreg == 2)
803 {
804 auto __lo = __x._M_data0._M_data;
805 auto __hi = __vec_zero_pad_to<sizeof(__lo)>(
806 __x._M_data1._M_concat_data());
807 return __x86_cvt_vecmask<_DataType>(__lo, __hi);
808 }
809 }
810#endif
811 return __vec_mask_cast<_DataType>(__x._M_concat_data());
812 }
813 }())
814 {}
815
816 using _Base::_MaskBase;
817
818 // [simd.mask.ctor] generator constructor -------------------------------
819 template <__simd_generator_invokable<bool, _S_size> _Fp>
820 [[__gnu__::__always_inline__]]
821 constexpr explicit
822 basic_mask(_Fp&& __gen)
823 : _M_data([&] [[__gnu__::__always_inline__]] {
824 constexpr auto [...__is] = _IotaArray<_S_size>;
825 if constexpr (_S_is_scalar)
826 return __gen(__simd_size_c<0>);
827 else if constexpr (_S_use_bitmask)
828 return _DataType(((_DataType(__gen(__simd_size_c<__is>)) << __is)
829 | ...));
830 else
831 return _DataType{__vec_value_type<_DataType>(
832 __gen(__simd_size_c<__is>) ? -1 : 0)...};
833 }())
834 {}
835
836 // [simd.mask.ctor] bitset constructor ----------------------------------
837 [[__gnu__::__always_inline__]]
838 constexpr
839 basic_mask(const same_as<bitset<_S_size>> auto& __b) noexcept // LWG 4382.
840 : basic_mask(static_cast<_Bitmask<_S_size>>(__b.to_ullong()))
841 {
842 // more than 64 elements in one register? not yet.
843 static_assert(_S_size <= numeric_limits<unsigned long long>::digits);
844 }
845
846 // [simd.mask.ctor] uint constructor ------------------------------------
847 template <unsigned_integral _Tp>
848 requires (!same_as<_Tp, bool>) // LWG 4382.
849 [[__gnu__::__always_inline__]]
850 constexpr explicit
851 basic_mask(_Tp __val) noexcept
852 : _M_data([&] [[__gnu__::__always_inline__]] () {
853 if constexpr (_S_use_bitmask)
854 return __val;
855 else if constexpr (_S_is_scalar)
856 return bool(__val & 1);
857 else if (__is_const_known(__val))
858 {
859 constexpr auto [...__is] = _IotaArray<_S_size>;
860 return _DataType {__vec_value_type<_DataType>((__val & (1ull << __is)) == 0
861 ? 0 : -1)...};
862 }
863 else
864 {
865 using _Ip = typename _VecType::value_type;
866 _VecType __v0 = _Ip(__val);
867 constexpr int __bits_per_element = sizeof(_Ip) * __CHAR_BIT__;
868 constexpr _VecType __pow2 = _VecType(1) << (__iota<_VecType> % __bits_per_element);
869 if constexpr (_S_size < __bits_per_element)
870 return ((__v0 & __pow2) > 0)._M_concat_data();
871 else if constexpr (_S_size == __bits_per_element)
872 return ((__v0 & __pow2) != 0)._M_concat_data();
873 else
874 {
875 static_assert(_Bytes == 1);
876 static_assert(sizeof(_Ip) == 1);
877 _Bitmask<_S_size> __bits = __val;
878 static_assert(sizeof(_VecType) % sizeof(__bits) == 0);
879 if constexpr (sizeof(_DataType) == 32)
880 {
881 __vec_builtin_type<_UInt<8>, 4> __v1 = {
882 0xffu & (__bits >> (0 * __CHAR_BIT__)),
883 0xffu & (__bits >> (1 * __CHAR_BIT__)),
884 0xffu & (__bits >> (2 * __CHAR_BIT__)),
885 0xffu & (__bits >> (3 * __CHAR_BIT__)),
886 };
887 __v1 *= 0x0101'0101'0101'0101ull;
888 __v0 = __builtin_bit_cast(_VecType, __v1);
889 return ((__v0 & __pow2) != 0)._M_data;
890 }
891 else
892 {
893 using _V1 = vec<_Ip, sizeof(__bits)>;
894 _V1 __v1 = __builtin_bit_cast(_V1, __bits);
895 __v0 = _VecType::_S_static_permute(__v1, [](int __i) {
896 return __i / __CHAR_BIT__;
897 });
898 return ((__v0 & __pow2) != 0)._M_data;
899 }
900 }
901 }
902 }())
903 {}
904
905 //Effects: Initializes the first M elements to the corresponding bit values in val, where M is
906 //the smaller of size() and the number of bits in the value representation
907 //([basic.types.general]) of the type of val. If M is less than size(), the remaining elements
908 //are initialized to zero.
909
910
911 // [simd.mask.subscr] ---------------------------------------------------
912 [[__gnu__::__always_inline__]]
913 constexpr value_type
914 operator[](__simd_size_type __i) const
915 {
916 __glibcxx_simd_precondition(__i >= 0 && __i < _S_size, "subscript is out of bounds");
917 if constexpr (_S_is_scalar)
918 return _M_data;
919 else if constexpr (_S_use_bitmask)
920 return bool((_M_data >> __i) & 1);
921 else
922 return _M_data[__i] & 1;
923 }
924
925 // [simd.mask.unary] ----------------------------------------------------
926 [[__gnu__::__always_inline__]]
927 constexpr basic_mask
928 operator!() const noexcept
929 {
930 if constexpr (_S_is_scalar)
931 return _S_init(!_M_data);
932 else
933 return _S_init(~_M_data);
934 }
935
936 [[__gnu__::__always_inline__]]
937 constexpr _VecType
938 operator+() const noexcept requires destructible<_VecType>
939 { return operator _VecType(); }
940
941 constexpr _VecType
942 operator+() const noexcept = delete;
943
944 [[__gnu__::__always_inline__]]
945 constexpr _VecType
946 operator-() const noexcept requires destructible<_VecType>
947 {
948 using _Ip = typename _VecType::value_type;
949 if constexpr (_S_is_scalar)
950 return _Ip(-int(_M_data));
951 else if constexpr (_S_use_bitmask)
952 return __select_impl(*this, _Ip(-1), _Ip());
953 else
954 {
955 static_assert(sizeof(_VecType) == sizeof(_M_data));
956 return __builtin_bit_cast(_VecType, _M_data);
957 }
958 }
959
960 constexpr _VecType
961 operator-() const noexcept = delete;
962
963 [[__gnu__::__always_inline__]]
964 constexpr _VecType
965 operator~() const noexcept requires destructible<_VecType>
966 {
967 using _Ip = typename _VecType::value_type;
968 if constexpr (_S_is_scalar)
969 return _Ip(~int(_M_data));
970 else if constexpr (_S_use_bitmask)
971 return __select_impl(*this, _Ip(-2), _Ip(-1));
972 else
973 {
974 static_assert(sizeof(_VecType) == sizeof(_M_data));
975 return __builtin_bit_cast(_VecType, _M_data) - _Ip(1);
976 }
977 }
978
979 constexpr _VecType
980 operator~() const noexcept = delete;
981
982 // [simd.mask.conv] -----------------------------------------------------
983 template <typename _Up, typename _UAbi>
984 requires (_UAbi::_S_size == _S_size)
985 [[__gnu__::__always_inline__]]
986 constexpr explicit(sizeof(_Up) != _Bytes)
987 operator basic_vec<_Up, _UAbi>() const noexcept
988 {
989 if constexpr (_S_is_scalar)
990 return _Up(_M_data);
991 else
992 {
993 using _UV = basic_vec<_Up, _UAbi>;
994 return __select_impl(static_cast<_UV::mask_type>(*this), _UV(1), _UV(0));
995 }
996 }
997
998 using _Base::operator basic_vec;
999
1000 // [simd.mask.namedconv] ------------------------------------------------
1001 [[__gnu__::__always_inline__]]
1002 constexpr bitset<_S_size>
1003 to_bitset() const noexcept
1004 {
1005 // more than 64 elements in one register? not yet.
1006 static_assert(_S_size <= numeric_limits<unsigned long long>::digits);
1007 return to_ullong();
1008 }
1009
1010 /** @internal
1011 * Return the mask as the smallest possible unsigned integer (up to 64 bits).
1012 *
1013 * @tparam _Offset Adjust the return type & value to start at bit @p _Offset.
1014 * @tparam _Use_2_for_1 Store the value of every second element into one bit of the result.
1015 * (precondition: each even/odd pair stores the same value)
1016 */
1017 template <int _Offset = 0, _ArchTraits _Traits = {}>
1018 [[__gnu__::__always_inline__]]
1019 constexpr _Bitmask<_S_size + _Offset>
1020 _M_to_uint() const
1021 {
1022 constexpr int __nbits = _S_size;
1023 static_assert(__nbits + _Offset <= numeric_limits<unsigned long long>::digits);
1024 // before shifting
1025 using _U0 = _Bitmask<__nbits>;
1026 // potentially wider type needed for shift by _Offset
1027 using _Ur = _Bitmask<__nbits + _Offset>;
1028 if constexpr (_S_is_scalar || _S_use_bitmask)
1029 {
1030 auto __bits = _M_data;
1031 if constexpr (_S_is_partial)
1032 __bits &= _S_implicit_mask;
1033 return _Ur(__bits) << _Offset;
1034 }
1035 else
1036 {
1037#if _GLIBCXX_X86
1038 if (!__is_const_known(*this))
1039 {
1040 _U0 __uint;
1041 if constexpr (_Bytes != 2) // movmskb would duplicate each bit
1042 __uint = _U0(__x86_movmsk(_M_data));
1043 else if constexpr (_Bytes == 2 && _Traits._M_have_bmi2())
1044 __uint = __bit_extract_even<__nbits>(__x86_movmsk(_M_data));
1045 else if constexpr (_Bytes == 2)
1046 return __similar_mask<char, __nbits, _Ap>(*this).template _M_to_uint<_Offset>();
1047 else
1048 static_assert(false);
1049 // TODO: with AVX512 use __builtin_ia32_cvt[bwdq]2mask(128|256|512)
1050 // TODO: Ask for compiler builtin to do the best of the above. This should also
1051 // combine with a preceding vector-mask compare to produce a bit-mask compare (on
1052 // AVX512)
1053 if constexpr (_S_is_partial)
1054 __uint &= (_U0(1) << _S_size) - 1;
1055 return _Ur(__uint) << _Offset;
1056 }
1057#endif
1058 using _IV = _VecType;
1059 static_assert(destructible<_IV>);
1060 const typename _IV::mask_type& __k = [&] [[__gnu__::__always_inline__]] () {
1061 if constexpr (is_same_v<typename _IV::mask_type, basic_mask>)
1062 return *this;
1063 else
1064 return typename _IV::mask_type(*this);
1065 }();
1066 constexpr int __n = _IV::size();
1067 if constexpr (_Bytes * __CHAR_BIT__ >= __n) // '1 << __iota' cannot overflow
1068 { // reduce(select(k, powers_of_2, 0))
1069 constexpr _IV __pow2 = _IV(1) << __iota<_IV>;
1070 return _Ur(_U0(__select_impl(__k, __pow2, _IV())
1071 ._M_reduce(bit_or<>()))) << _Offset;
1072 }
1073 else if constexpr (__n % __CHAR_BIT__ != 0)
1074 { // recurse after splitting in two
1075 constexpr int __n_lo = __n - __n % __CHAR_BIT__;
1076 const auto [__lo, __hi] = chunk<__n_lo>(__k);
1077 _Ur __bits = __hi.template _M_to_uint<_Offset + __n_lo>();
1078 return __bits | __lo.template _M_to_uint<_Offset>();
1079 }
1080 else
1081 { // limit powers_of_2 to 1, 2, 4, ..., 128
1082 constexpr _IV __pow2 = _IV(1) << (__iota<_IV> % _IV(__CHAR_BIT__));
1083 _IV __x = __select_impl(__k, __pow2, _IV());
1084 // partial reductions of 8 neighboring elements
1085 __x |= _IV::_S_static_permute(__x, _SwapNeighbors<4>());
1086 __x |= _IV::_S_static_permute(__x, _SwapNeighbors<2>());
1087 __x |= _IV::_S_static_permute(__x, _SwapNeighbors<1>());
1088 // permute partial reduction results to the front
1089 __x = _IV::_S_static_permute(__x, [](int __i) {
1090 return __i * 8 < __n ? __i * 8 : uninit_element;
1091 });
1092 // extract front as scalar unsigned
1093 _U0 __bits = __builtin_bit_cast(
1094 __similar_vec<_U0, __n * _Bytes / sizeof(_U0), _Ap>, __x)[0];
1095 // mask off unused bits
1096 if constexpr (!__has_single_bit(unsigned(__nbits)))
1097 __bits &= (_U0(1) << __nbits) - 1;
1098 return _Ur(__bits) << _Offset;
1099 }
1100 }
1101 }
1102
1103 [[__gnu__::__always_inline__]]
1104 constexpr unsigned long long
1105 to_ullong() const
1106 { return _M_to_uint(); }
1107
1108 // [simd.mask.binary] ---------------------------------------------------
1109 [[__gnu__::__always_inline__]]
1110 friend constexpr basic_mask
1111 operator&&(const basic_mask& __x, const basic_mask& __y) noexcept
1112 { return _S_init(__x._M_data & __y._M_data); }
1113
1114 [[__gnu__::__always_inline__]]
1115 friend constexpr basic_mask
1116 operator||(const basic_mask& __x, const basic_mask& __y) noexcept
1117 { return _S_init(__x._M_data | __y._M_data); }
1118
1119 [[__gnu__::__always_inline__]]
1120 friend constexpr basic_mask
1121 operator&(const basic_mask& __x, const basic_mask& __y) noexcept
1122 { return _S_init(__x._M_data & __y._M_data); }
1123
1124 [[__gnu__::__always_inline__]]
1125 friend constexpr basic_mask
1126 operator|(const basic_mask& __x, const basic_mask& __y) noexcept
1127 { return _S_init(__x._M_data | __y._M_data); }
1128
1129 [[__gnu__::__always_inline__]]
1130 friend constexpr basic_mask
1131 operator^(const basic_mask& __x, const basic_mask& __y) noexcept
1132 { return _S_init(__x._M_data ^ __y._M_data); }
1133
1134 // [simd.mask.cassign] --------------------------------------------------
1135 [[__gnu__::__always_inline__]]
1136 friend constexpr basic_mask&
1137 operator&=(basic_mask& __x, const basic_mask& __y) noexcept
1138 {
1139 __x._M_data &= __y._M_data;
1140 return __x;
1141 }
1142
1143 [[__gnu__::__always_inline__]]
1144 friend constexpr basic_mask&
1145 operator|=(basic_mask& __x, const basic_mask& __y) noexcept
1146 {
1147 __x._M_data |= __y._M_data;
1148 return __x;
1149 }
1150
1151 [[__gnu__::__always_inline__]]
1152 friend constexpr basic_mask&
1153 operator^=(basic_mask& __x, const basic_mask& __y) noexcept
1154 {
1155 __x._M_data ^= __y._M_data;
1156 return __x;
1157 }
1158
1159 // [simd.mask.comparison] -----------------------------------------------
1160 [[__gnu__::__always_inline__]]
1161 friend constexpr basic_mask
1162 operator==(const basic_mask& __x, const basic_mask& __y) noexcept
1163 { return !(__x ^ __y); }
1164
1165 [[__gnu__::__always_inline__]]
1166 friend constexpr basic_mask
1167 operator!=(const basic_mask& __x, const basic_mask& __y) noexcept
1168 { return __x ^ __y; }
1169
1170 [[__gnu__::__always_inline__]]
1171 friend constexpr basic_mask
1172 operator>=(const basic_mask& __x, const basic_mask& __y) noexcept
1173 { return __x || !__y; }
1174
1175 [[__gnu__::__always_inline__]]
1176 friend constexpr basic_mask
1177 operator<=(const basic_mask& __x, const basic_mask& __y) noexcept
1178 { return !__x || __y; }
1179
1180 [[__gnu__::__always_inline__]]
1181 friend constexpr basic_mask
1182 operator>(const basic_mask& __x, const basic_mask& __y) noexcept
1183 { return __x && !__y; }
1184
1185 [[__gnu__::__always_inline__]]
1186 friend constexpr basic_mask
1187 operator<(const basic_mask& __x, const basic_mask& __y) noexcept
1188 { return !__x && __y; }
1189
1190 // [simd.mask.cond] -----------------------------------------------------
1191 [[__gnu__::__always_inline__]]
1192 friend constexpr basic_mask
1193 __select_impl(const basic_mask& __k, const basic_mask& __t, const basic_mask& __f) noexcept
1194 {
1195 if constexpr (!_S_use_bitmask)
1196 {
1197#if _GLIBCXX_X86
1198 // this works around bad code-gen when the compiler can't see that __k is a vector-mask.
1199 // This pattern, is recognized to match the x86 blend instructions, which only consider
1200 // the sign bit of the mask register. Also, without SSE4, if the compiler knows that __k
1201 // is a vector-mask, then the '< 0' is elided.
1202 return __k._M_data < 0 ? __t._M_data : __f._M_data;
1203#endif
1204 return __k._M_data ? __t._M_data : __f._M_data;
1205 }
1206 else
1207 return (__k._M_data & __t._M_data) | (~__k._M_data & __f._M_data);
1208 }
1209
1210 [[__gnu__::__always_inline__]]
1211 friend constexpr basic_mask
1212 __select_impl(const basic_mask& __k, same_as<bool> auto __t, same_as<bool> auto __f) noexcept
1213 {
1214 if (__t == __f)
1215 return basic_mask(__t);
1216 else
1217 return __t ? __k : !__k;
1218 }
1219
1220 template <__vectorizable _T0, same_as<_T0> _T1>
1221 requires (sizeof(_T0) == _Bytes)
1222 [[__gnu__::__always_inline__]]
1223 friend constexpr vec<_T0, _S_size>
1224 __select_impl(const basic_mask& __k, const _T0& __t, const _T1& __f) noexcept
1225 {
1226 if constexpr (_S_is_scalar)
1227 return __k._M_data ? __t : __f;
1228 else
1229 {
1230 using _Vp = vec<_T0, _S_size>;
1231 using _Mp = typename _Vp::mask_type;
1232 return __select_impl(_Mp(__k), _Vp(__t), _Vp(__f));
1233 }
1234 }
1235
1236 // [simd.mask.reductions] implementation --------------------------------
1237 [[__gnu__::__always_inline__]]
1238 constexpr bool
1239 _M_all_of() const noexcept
1240 {
1241 if constexpr (_S_is_scalar)
1242 return _M_data;
1243 else if constexpr (_S_use_bitmask)
1244 {
1245 if constexpr (_S_is_partial)
1246 // PR120925 (partial kortest pattern not recognized)
1247 return (_M_data & _S_implicit_mask) == _S_implicit_mask;
1248 else
1249 return _M_data == _S_implicit_mask;
1250 }
1251#if _GLIBCXX_X86
1252 else if (!__is_const_known(_M_data))
1253 return __x86_vecmask_all<_S_size>(_M_data);
1254#endif
1255 else
1256 return _VecOps<_DataType, _S_size>::_S_all_of(_M_data);
1257 }
1258
1259 [[__gnu__::__always_inline__]]
1260 constexpr bool
1261 _M_any_of() const noexcept
1262 {
1263 if constexpr (_S_is_scalar)
1264 return _M_data;
1265 else if constexpr (_S_use_bitmask)
1266 {
1267 if constexpr (_S_is_partial)
1268 // PR120925 (partial kortest pattern not recognized)
1269 return (_M_data & _S_implicit_mask) != 0;
1270 else
1271 return _M_data != 0;
1272 }
1273#if _GLIBCXX_X86
1274 else if (!__is_const_known(_M_data))
1275 return __x86_vecmask_any<_S_size>(_M_data);
1276#endif
1277 else
1278 return _VecOps<_DataType, _S_size>::_S_any_of(_M_data);
1279 }
1280
1281 [[__gnu__::__always_inline__]]
1282 constexpr bool
1283 _M_none_of() const noexcept
1284 {
1285 if constexpr (_S_is_scalar)
1286 return !_M_data;
1287 else if constexpr (_S_use_bitmask)
1288 {
1289 if constexpr (_S_is_partial)
1290 // PR120925 (partial kortest pattern not recognized)
1291 return (_M_data & _S_implicit_mask) == 0;
1292 else
1293 return _M_data == 0;
1294 }
1295#if _GLIBCXX_X86
1296 else if (!__is_const_known(_M_data))
1297 return __x86_vecmask_none<_S_size>(_M_data);
1298#endif
1299 else
1300 return _VecOps<_DataType, _S_size>::_S_none_of(_M_data);
1301 }
1302
1303 [[__gnu__::__always_inline__]]
1304 constexpr __simd_size_type
1305 _M_reduce_count() const noexcept
1306 {
1307 if constexpr (_S_is_scalar)
1308 return int(_M_data);
1309 else if constexpr (_S_size <= numeric_limits<unsigned>::digits)
1310 return __builtin_popcount(_M_to_uint());
1311 else
1312 return __builtin_popcountll(to_ullong());
1313 }
1314
1315 [[__gnu__::__always_inline__]]
1316 constexpr __simd_size_type
1317 _M_reduce_min_index() const
1318 {
1319 const auto __bits = _M_to_uint();
1320 __glibcxx_simd_precondition(__bits, "An empty mask does not have a min_index.");
1321 if constexpr (_S_size == 1)
1322 return 0;
1323 else
1324 return __countr_zero(__bits);
1325 }
1326
1327 [[__gnu__::__always_inline__]]
1328 constexpr __simd_size_type
1329 _M_reduce_max_index() const
1330 {
1331 const auto __bits = _M_to_uint();
1332 __glibcxx_simd_precondition(__bits, "An empty mask does not have a max_index.");
1333 if constexpr (_S_size == 1)
1334 return 0;
1335 else
1336 return __highest_bit(__bits);
1337 }
1338
1339 [[__gnu__::__always_inline__]]
1340 friend constexpr bool
1341 __is_const_known(const basic_mask& __x)
1342 { return __builtin_constant_p(__x._M_data); }
1343 };
1344
1345 template <size_t _Bytes, __abi_tag _Ap>
1346 requires (_Ap::_S_nreg > 1)
1347 class basic_mask<_Bytes, _Ap>
1348 : public _MaskBase<_Bytes, _Ap>
1349 {
1350 using _Base = _MaskBase<_Bytes, _Ap>;
1351
1352 using _VecType = _Base::_VecType;
1353
1354 template <size_t, typename>
1355 friend class basic_mask;
1356
1357 template <typename, typename>
1358 friend class basic_vec;
1359
1360 static constexpr int _S_size = _Ap::_S_size;
1361
1362 static constexpr int _N0 = __bit_ceil(unsigned(_S_size)) / 2;
1363
1364 static constexpr int _N1 = _S_size - _N0;
1365
1366 static constexpr int _Nreg0 = __bit_ceil(unsigned(_Ap::_S_nreg)) / 2;
1367
1368 static constexpr int _Nreg1 = _Ap::_S_nreg - _Nreg0;
1369
1370 // explicitly request _Nreg0 rather than use __abi_rebind. This way _Float16 can use half
1371 // of native registers (since they convert to full float32 registers).
1372 using _Abi0 = decltype(_Ap::template _S_resize<_N0, _Nreg0>());
1373
1374 using _Abi1 = decltype(_Ap::template _S_resize<_N1, _Nreg1>());
1375
1376 using _Mask0 = basic_mask<_Bytes, _Abi0>;
1377
1378 // the implementation (and users) depend on elements being contiguous in memory
1379 static_assert(_Mask0::_S_padding_bytes == 0 && !_Mask0::_S_is_partial);
1380
1381 using _Mask1 = basic_mask<_Bytes, _Abi1>;
1382
1383 static constexpr bool _S_is_partial = _Mask1::_S_is_partial;
1384
1385 // _Ap::_S_nreg determines how deep the recursion goes. E.g. basic_mask<4, _Abi<8, 4>> cannot
1386 // use basic_mask<4, _Abi<4, 1>> as _Mask0/1 types.
1387 static_assert(_Mask0::abi_type::_S_nreg + _Mask1::abi_type::_S_nreg == _Ap::_S_nreg);
1388
1389 static constexpr bool _S_use_bitmask = _Mask0::_S_use_bitmask;
1390
1391 static constexpr bool _S_is_scalar = _Mask0::_S_is_scalar;
1392
1393 _Mask0 _M_data0;
1394
1395 _Mask1 _M_data1;
1396
1397 static constexpr bool _S_has_bool_member = _Mask1::_S_has_bool_member;
1398
1399 // by construction _N0 >= _N1
1400 // => sizeof(_Mask0) >= sizeof(_Mask1)
1401 // and __alignof__(_Mask0) >= __alignof__(_Mask1)
1402 static constexpr size_t _S_padding_bytes
1403 = (__alignof__(_Mask0) == __alignof__(_Mask1)
1404 ? 0 : __alignof__(_Mask0) - (sizeof(_Mask1) % __alignof__(_Mask0)))
1405 + _Mask1::_S_padding_bytes;
1406
1407 public:
1408 using value_type = bool;
1409
1410 using abi_type = _Ap;
1411
1412 using iterator = _Base::iterator;
1413
1414 using const_iterator = _Base::const_iterator;
1415
1416 [[__gnu__::__always_inline__]]
1417 static constexpr basic_mask
1418 _S_init(const _Mask0& __x, const _Mask1& __y)
1419 {
1420 basic_mask __r;
1421 __r._M_data0 = __x;
1422 __r._M_data1 = __y;
1423 return __r;
1424 }
1425
1426 [[__gnu__::__always_inline__]]
1427 static constexpr basic_mask
1428 _S_init(unsigned_integral auto __bits)
1429 { return basic_mask(__bits); }
1430
1431 template <typename _U0, typename _U1>
1432 [[__gnu__::__always_inline__]]
1433 static constexpr basic_mask
1434 _S_init(const __trivial_pair<_U0, _U1>& __bits)
1435 {
1436 if constexpr (is_unsigned_v<_U0>)
1437 {
1438 static_assert(is_unsigned_v<_U1>);
1439 return _S_init(_Mask0(__bits._M_first), _Mask1(__bits._M_second));
1440 }
1441 else if constexpr (is_unsigned_v<_U1>)
1442 return _S_init(_Mask0::_S_init(__bits._M_first), _Mask1(__bits._M_second));
1443 else
1444 return _S_init(_Mask0::_S_init(__bits._M_first), _Mask1::_S_init(__bits._M_second));
1445 }
1446
1447 [[__gnu__::__always_inline__]]
1448 constexpr const _Mask0&
1449 _M_get_low() const
1450 { return _M_data0; }
1451
1452 [[__gnu__::__always_inline__]]
1453 constexpr const _Mask1&
1454 _M_get_high() const
1455 { return _M_data1; }
1456
1457 template <size_t _UBytes, typename _UAbi>
1458 [[__gnu__::__always_inline__]]
1459 static constexpr basic_mask
1460 _S_recursive_bit_cast(const basic_mask<_UBytes, _UAbi>& __x)
1461 {
1462 using _Mp = basic_mask<_UBytes, _UAbi>;
1463 if constexpr (_Mp::_S_has_bool_member || sizeof(basic_mask) > sizeof(__x)
1464 || _Mp::_S_padding_bytes != 0)
1465 return _S_init(__builtin_bit_cast(_Mask0, __x._M_data0),
1466 _Mask1::_S_recursive_bit_cast(__x._M_data1));
1467 else if constexpr (sizeof(basic_mask) == sizeof(__x))
1468 return __builtin_bit_cast(basic_mask, __x);
1469 else
1470 { // e.g. on IvyBridge (different alignment => different sizeof)
1471 struct _Tmp { alignas(_Mp) basic_mask _M_data; };
1472 return __builtin_bit_cast(_Tmp, __x)._M_data;
1473 }
1474 }
1475
1476 [[__gnu__::__always_inline__]]
1477 constexpr auto
1478 _M_concat_data(bool __do_sanitize = _S_is_partial) const
1479 {
1480 if constexpr (_S_use_bitmask)
1481 {
1482 static_assert(_S_size <= numeric_limits<unsigned long long>::digits,
1483 "cannot concat more than 64 bits");
1484 using _Up = _Bitmask<_S_size>;
1485 return _Up(_M_data0._M_concat_data() | (_Up(_M_data1._M_concat_data(__do_sanitize)) << _N0));
1486 }
1487 else
1488 {
1489 auto __lo = _M_data0._M_concat_data();
1490 auto __hi = __vec_zero_pad_to<sizeof(__lo)>(_M_data1._M_concat_data(__do_sanitize));
1491 return __vec_concat(__lo, __hi);
1492 }
1493 }
1494
1495 template <_ArchTraits _Traits = {}>
1496 [[__gnu__::__always_inline__]]
1497 static constexpr basic_mask
1498 _S_partial_mask_of_n(int __n)
1499 {
1500#if __has_builtin(__builtin_ia32_bzhi_di)
1501 if constexpr (_S_use_bitmask && _S_size <= 64 && _Traits._M_have_bmi2())
1502 return basic_mask(__builtin_ia32_bzhi_di(~0ull >> (64 - _S_size), unsigned(__n)));
1503#endif
1504 if constexpr (_N0 == 1)
1505 {
1506 static_assert(_S_size == 2); // => __n == 1
1507 return _S_init(_Mask0(true), _Mask1(false));
1508 }
1509 else if (__n < _N0)
1510 return _S_init(_Mask0::_S_partial_mask_of_n(__n), _Mask1(false));
1511 else if (__n == _N0 || _N1 == 1)
1512 return _S_init(_Mask0(true), _Mask1(false));
1513 else if constexpr (_N1 != 1)
1514 return _S_init(_Mask0(true), _Mask1::_S_partial_mask_of_n(__n - _N0));
1515 }
1516
1517 [[__gnu__::__always_inline__]]
1518 constexpr basic_mask&
1519 _M_and_neighbors()
1520 {
1521 _M_data0._M_and_neighbors();
1522 _M_data1._M_and_neighbors();
1523 return *this;
1524 }
1525
1526 [[__gnu__::__always_inline__]]
1527 constexpr basic_mask&
1528 _M_or_neighbors()
1529 {
1530 _M_data0._M_or_neighbors();
1531 _M_data1._M_or_neighbors();
1532 return *this;
1533 }
1534
1535 template <typename _Mp>
1536 [[__gnu__::__always_inline__]]
1537 constexpr auto
1538 _M_chunk() const noexcept
1539 {
1540 constexpr int __n = _S_size / _Mp::_S_size;
1541 constexpr int __rem = _S_size % _Mp::_S_size;
1542 constexpr auto [...__is] = _IotaArray<__n>;
1543 if constexpr (__rem == 0)
1544 return array<_Mp, __n>{__extract_simd_at<_Mp>(cw<_Mp::_S_size * __is>,
1545 _M_data0, _M_data1)...};
1546 else
1547 {
1548 using _Rest = resize_t<__rem, _Mp>;
1549 return tuple(__extract_simd_at<_Mp>(cw<_Mp::_S_size * __is>, _M_data0, _M_data1)...,
1550 __extract_simd_at<_Rest>(cw<_Mp::_S_size * __n>, _M_data0, _M_data1));
1551 }
1552 }
1553
1554 [[__gnu__::__always_inline__]]
1555 static constexpr basic_mask
1556 _S_concat(const basic_mask& __x0) noexcept
1557 { return __x0; }
1558
1559 template <typename... _As>
1560 requires (sizeof...(_As) >= 2)
1561 [[__gnu__::__always_inline__]]
1562 static constexpr basic_mask
1563 _S_concat(const basic_mask<_Bytes, _As>&... __xs) noexcept
1564 {
1565 static_assert(_S_size == (_As::_S_size + ...));
1566 return _S_init(__extract_simd_at<_Mask0>(cw<0>, __xs...),
1567 __extract_simd_at<_Mask1>(cw<_N0>, __xs...));
1568 }
1569
1570 // [simd.mask.overview] default constructor -----------------------------
1571 basic_mask() = default;
1572
1573 // [simd.mask.overview] conversion extensions ---------------------------
1574 // TODO: any?
1575
1576 // [simd.mask.ctor] broadcast constructor -------------------------------
1577 [[__gnu__::__always_inline__]]
1578 constexpr explicit
1579 basic_mask(same_as<bool> auto __x) noexcept // LWG 4382.
1580 : _M_data0(__x), _M_data1(__x)
1581 {}
1582
1583 // [simd.mask.ctor] conversion constructor ------------------------------
1584 template <size_t _UBytes, typename _UAbi>
1585 requires (_S_size == _UAbi::_S_size)
1586 [[__gnu__::__always_inline__]]
1587 constexpr explicit(__is_mask_conversion_explicit<_Ap, _UAbi>(_Bytes, _UBytes))
1588 basic_mask(const basic_mask<_UBytes, _UAbi>& __x) noexcept
1589 : _M_data0([&] {
1590 if constexpr (_UAbi::_S_nreg > 1)
1591 {
1592 return __x._M_data0;
1593 }
1594 else if constexpr (_N0 == 1)
1595 return _Mask0(__x[0]);
1596 else
1597 return get<0>(chunk<_N0>(__x));
1598 }()),
1599 _M_data1([&] {
1600 if constexpr (_UAbi::_S_nreg > 1)
1601 {
1602 return __x._M_data1;
1603 }
1604 else if constexpr (_N1 == 1)
1605 return _Mask1(__x[_N0]);
1606 else
1607 return get<1>(chunk<_N0>(__x));
1608 }())
1609 {}
1610
1611 using _Base::_MaskBase;
1612
1613 // [simd.mask.ctor] generator constructor -------------------------------
1614 template <__simd_generator_invokable<bool, _S_size> _Fp>
1615 [[__gnu__::__always_inline__]]
1616 constexpr explicit
1617 basic_mask(_Fp&& __gen)
1618 : _M_data0(__gen), _M_data1([&] [[__gnu__::__always_inline__]] (auto __i) {
1619 return __gen(__simd_size_c<__i + _N0>);
1620 })
1621 {}
1622
1623 // [simd.mask.ctor] bitset constructor ----------------------------------
1624 [[__gnu__::__always_inline__]]
1625 constexpr
1626 basic_mask(const same_as<bitset<_S_size>> auto& __b) noexcept // LWG 4382.
1627 : _M_data0(__bitset_split<_N0>(__b)._M_lo), _M_data1(__bitset_split<_N0>(__b)._M_hi)
1628 {}
1629
1630 // [simd.mask.ctor] uint constructor ------------------------------------------
1631 template <unsigned_integral _Tp>
1632 requires (!same_as<_Tp, bool>) // LWG 4382.
1633 [[__gnu__::__always_inline__]]
1634 constexpr explicit
1635 basic_mask(_Tp __val) noexcept
1636 : _M_data0(static_cast<_Bitmask<_N0>>(__val)),
1637 _M_data1(sizeof(_Tp) * __CHAR_BIT__ > _N0
1638 ? static_cast<_Bitmask<_N1>>(__val >> _N0) : _Bitmask<_N1>())
1639 {}
1640
1641 // [simd.mask.subscr] ---------------------------------------------------
1642 [[__gnu__::__always_inline__]]
1643 constexpr value_type
1644 operator[](__simd_size_type __i) const
1645 {
1646 __glibcxx_simd_precondition(__i >= 0 && __i < _S_size, "subscript is out of bounds");
1647 if (__is_const_known(__i))
1648 return __i < _N0 ? _M_data0[__i] : _M_data1[__i - _N0];
1649 else if constexpr (_M_data1._S_has_bool_member)
1650 // in some cases the last element can be 'bool' instead of bit-/vector-mask;
1651 // e.g. mask<short, 17> is {mask<short, 16>, mask<short, 1>}, where the latter uses
1652 // _ScalarAbi<1>, which is stored as 'bool'
1653 return __i < _N0 ? _M_data0[__i] : _M_data1[__i - _N0];
1654 else if constexpr (abi_type::_S_is_bitmask)
1655 {
1656 using _AliasingByte [[__gnu__::__may_alias__]] = unsigned char;
1657 return bool((reinterpret_cast<const _AliasingByte*>(this)
1658 [__i / __CHAR_BIT__] >> (__i % __CHAR_BIT__)) & 1);
1659 }
1660 else
1661 {
1662 using _AliasingInt [[__gnu__::__may_alias__]] = __integer_from<_Bytes>;
1663 return reinterpret_cast<const _AliasingInt*>(this)[__i] != 0;
1664 }
1665 }
1666
1667 // [simd.mask.unary] ----------------------------------------------------
1668 [[__gnu__::__always_inline__]]
1669 constexpr basic_mask
1670 operator!() const noexcept
1671 { return _S_init(!_M_data0, !_M_data1); }
1672
1673 [[__gnu__::__always_inline__]]
1674 constexpr _VecType
1675 operator+() const noexcept requires destructible<_VecType>
1676 { return _VecType::_S_concat(+_M_data0, +_M_data1); }
1677
1678 constexpr _VecType
1679 operator+() const noexcept = delete;
1680
1681 [[__gnu__::__always_inline__]]
1682 constexpr _VecType
1683 operator-() const noexcept requires destructible<_VecType>
1684 { return _VecType::_S_concat(-_M_data0, -_M_data1); }
1685
1686 constexpr _VecType
1687 operator-() const noexcept = delete;
1688
1689 [[__gnu__::__always_inline__]]
1690 constexpr _VecType
1691 operator~() const noexcept requires destructible<_VecType>
1692 { return _VecType::_S_concat(~_M_data0, ~_M_data1); }
1693
1694 constexpr _VecType
1695 operator~() const noexcept = delete;
1696
1697 // [simd.mask.conv] -----------------------------------------------------
1698 template <typename _Up, typename _UAbi>
1699 requires (_UAbi::_S_size == _S_size)
1700 [[__gnu__::__always_inline__]]
1701 constexpr explicit(sizeof(_Up) != _Bytes)
1702 operator basic_vec<_Up, _UAbi>() const noexcept
1703 {
1704 using _Rp = basic_vec<_Up, _UAbi>;
1705 return _Rp::_S_init(static_cast<_Rp::_DataType0>(_M_data0),
1706 static_cast<_Rp::_DataType1>(_M_data1));
1707 }
1708
1709 using _Base::operator basic_vec;
1710
1711 // [simd.mask.namedconv] ------------------------------------------------
1712 [[__gnu__::__always_inline__]]
1713 constexpr bitset<_S_size>
1714 to_bitset() const noexcept
1715 {
1716 if constexpr (_S_size <= numeric_limits<unsigned long long>::digits)
1717 return to_ullong();
1718 else
1719 {
1720 static_assert(_N0 % numeric_limits<unsigned long long>::digits == 0);
1721 struct _Tmp
1722 {
1723 bitset<_N0> _M_lo;
1724 bitset<_N1> _M_hi;
1725 } __tmp = {_M_data0.to_bitset(), _M_data1.to_bitset()};
1726 return __builtin_bit_cast(bitset<_S_size>, __tmp);
1727 }
1728 }
1729
1730 template <int _Offset = 0, _ArchTraits _Traits = {}>
1731 [[__gnu__::__always_inline__]]
1732 constexpr auto
1733 _M_to_uint() const
1734 {
1735 constexpr int _N0x = _N0;
1736 if constexpr (_N0x >= numeric_limits<unsigned long long>::digits)
1737 {
1738 static_assert(_Offset == 0);
1739 return __trivial_pair {
1740 _M_data0.template _M_to_uint<0>(),
1741 _M_data1.template _M_to_uint<0>()
1742 };
1743 }
1744 else
1745 {
1746#if _GLIBCXX_X86
1747 if constexpr (_Bytes == 2 && !_Traits._M_have_bmi2() && _Ap::_S_nreg == 2
1748 && !_S_use_bitmask)
1749 return __similar_mask<char, _S_size, _Ap>(*this).template _M_to_uint<_Offset>();
1750#endif
1751 auto __uint = _M_data1.template _M_to_uint<_N0x + _Offset>();
1752 __uint |= _M_data0.template _M_to_uint<_Offset>();
1753 return __uint;
1754 }
1755 }
1756
1757 [[__gnu__::__always_inline__]]
1758 constexpr unsigned long long
1759 to_ullong() const
1760 {
1761 if constexpr (_S_size <= numeric_limits<unsigned long long>::digits)
1762 return _M_to_uint();
1763 else
1764 {
1765 __glibcxx_simd_precondition(_M_data1.to_ullong() == 0,
1766 "to_ullong called on mask with 'true' elements at indices"
1767 "higher than representable in a ullong");
1768 return _M_data0.to_ullong();
1769 }
1770 }
1771
1772 // [simd.mask.binary]
1773 [[__gnu__::__always_inline__]]
1774 friend constexpr basic_mask
1775 operator&&(const basic_mask& __x, const basic_mask& __y) noexcept
1776 { return _S_init(__x._M_data0 && __y._M_data0, __x._M_data1 && __y._M_data1); }
1777
1778 [[__gnu__::__always_inline__]]
1779 friend constexpr basic_mask
1780 operator||(const basic_mask& __x, const basic_mask& __y) noexcept
1781 { return _S_init(__x._M_data0 || __y._M_data0, __x._M_data1 || __y._M_data1); }
1782
1783 [[__gnu__::__always_inline__]]
1784 friend constexpr basic_mask
1785 operator&(const basic_mask& __x, const basic_mask& __y) noexcept
1786 { return _S_init(__x._M_data0 & __y._M_data0, __x._M_data1 & __y._M_data1); }
1787
1788 [[__gnu__::__always_inline__]]
1789 friend constexpr basic_mask
1790 operator|(const basic_mask& __x, const basic_mask& __y) noexcept
1791 { return _S_init(__x._M_data0 | __y._M_data0, __x._M_data1 | __y._M_data1); }
1792
1793 [[__gnu__::__always_inline__]]
1794 friend constexpr basic_mask
1795 operator^(const basic_mask& __x, const basic_mask& __y) noexcept
1796 { return _S_init(__x._M_data0 ^ __y._M_data0, __x._M_data1 ^ __y._M_data1); }
1797
1798 // [simd.mask.cassign]
1799 [[__gnu__::__always_inline__]]
1800 friend constexpr basic_mask&
1801 operator&=(basic_mask& __x, const basic_mask& __y) noexcept
1802 {
1803 __x._M_data0 &= __y._M_data0;
1804 __x._M_data1 &= __y._M_data1;
1805 return __x;
1806 }
1807
1808 [[__gnu__::__always_inline__]]
1809 friend constexpr basic_mask&
1810 operator|=(basic_mask& __x, const basic_mask& __y) noexcept
1811 {
1812 __x._M_data0 |= __y._M_data0;
1813 __x._M_data1 |= __y._M_data1;
1814 return __x;
1815 }
1816
1817 [[__gnu__::__always_inline__]]
1818 friend constexpr basic_mask&
1819 operator^=(basic_mask& __x, const basic_mask& __y) noexcept
1820 {
1821 __x._M_data0 ^= __y._M_data0;
1822 __x._M_data1 ^= __y._M_data1;
1823 return __x;
1824 }
1825
1826 // [simd.mask.comparison] -----------------------------------------------
1827 [[__gnu__::__always_inline__]]
1828 friend constexpr basic_mask
1829 operator==(const basic_mask& __x, const basic_mask& __y) noexcept
1830 { return !(__x ^ __y); }
1831
1832 [[__gnu__::__always_inline__]]
1833 friend constexpr basic_mask
1834 operator!=(const basic_mask& __x, const basic_mask& __y) noexcept
1835 { return __x ^ __y; }
1836
1837 [[__gnu__::__always_inline__]]
1838 friend constexpr basic_mask
1839 operator>=(const basic_mask& __x, const basic_mask& __y) noexcept
1840 { return __x || !__y; }
1841
1842 [[__gnu__::__always_inline__]]
1843 friend constexpr basic_mask
1844 operator<=(const basic_mask& __x, const basic_mask& __y) noexcept
1845 { return !__x || __y; }
1846
1847 [[__gnu__::__always_inline__]]
1848 friend constexpr basic_mask
1849 operator>(const basic_mask& __x, const basic_mask& __y) noexcept
1850 { return __x && !__y; }
1851
1852 [[__gnu__::__always_inline__]]
1853 friend constexpr basic_mask
1854 operator<(const basic_mask& __x, const basic_mask& __y) noexcept
1855 { return !__x && __y; }
1856
1857 // [simd.mask.cond] -----------------------------------------------------
1858 [[__gnu__::__always_inline__]]
1859 friend constexpr basic_mask
1860 __select_impl(const basic_mask& __k, const basic_mask& __t, const basic_mask& __f) noexcept
1861 {
1862 return _S_init(__select_impl(__k._M_data0, __t._M_data0, __f._M_data0),
1863 __select_impl(__k._M_data1, __t._M_data1, __f._M_data1));
1864 }
1865
1866 [[__gnu__::__always_inline__]]
1867 friend constexpr basic_mask
1868 __select_impl(const basic_mask& __k, same_as<bool> auto __t, same_as<bool> auto __f) noexcept
1869 {
1870 if (__t == __f)
1871 return basic_mask(__t);
1872 else
1873 return __t ? __k : !__k;
1874 }
1875
1876 template <__vectorizable _T0, same_as<_T0> _T1>
1877 requires (sizeof(_T0) == _Bytes)
1878 [[__gnu__::__always_inline__]]
1879 friend constexpr vec<_T0, _S_size>
1880 __select_impl(const basic_mask& __k, const _T0& __t, const _T1& __f) noexcept
1881 {
1882 using _Vp = vec<_T0, _S_size>;
1883 if constexpr (!is_same_v<basic_mask, typename _Vp::mask_type>)
1884 return __select_impl(static_cast<_Vp::mask_type>(__k), __t, __f);
1885 else
1886 return _Vp::_S_init(__select_impl(__k._M_data0, __t, __f),
1887 __select_impl(__k._M_data1, __t, __f));
1888 }
1889
1890 template <_ArchTraits _Traits = {}>
1891 [[__gnu__::__always_inline__]]
1892 constexpr bool
1893 _M_all_of() const
1894 {
1895 if constexpr (_N0 == _N1)
1896 return (_M_data0 && _M_data1)._M_all_of();
1897 else
1898 return _M_data0._M_all_of() && _M_data1._M_all_of();
1899 }
1900
1901 template <_ArchTraits _Traits = {}>
1902 [[__gnu__::__always_inline__]]
1903 constexpr bool
1904 _M_any_of() const
1905 {
1906 if constexpr (_N0 == _N1)
1907 return (_M_data0 || _M_data1)._M_any_of();
1908 else
1909 return _M_data0._M_any_of() || _M_data1._M_any_of();
1910 }
1911
1912 template <_ArchTraits _Traits = {}>
1913 [[__gnu__::__always_inline__]]
1914 constexpr bool
1915 _M_none_of() const
1916 {
1917 if constexpr (_N0 == _N1)
1918 return (_M_data0 || _M_data1)._M_none_of();
1919 else
1920 return _M_data0._M_none_of() && _M_data1._M_none_of();
1921 }
1922
1923 [[__gnu__::__always_inline__]]
1924 constexpr __simd_size_type
1925 _M_reduce_min_index() const
1926 {
1927 if constexpr (_S_size <= numeric_limits<unsigned long long>::digits)
1928 {
1929 const auto __bits = _M_to_uint();
1930 __glibcxx_simd_precondition(__bits, "An empty mask does not have a min_index.");
1931 if constexpr (_S_size == 1)
1932 return 0;
1933 else
1934 return __countr_zero(_M_to_uint());
1935 }
1936 else if (_M_data0._M_none_of())
1937 return _M_data1._M_reduce_min_index() + _N0;
1938 else
1939 return _M_data0._M_reduce_min_index();
1940 }
1941
1942 [[__gnu__::__always_inline__]]
1943 constexpr __simd_size_type
1944 _M_reduce_max_index() const
1945 {
1946 if constexpr (_S_size <= numeric_limits<unsigned long long>::digits)
1947 {
1948 const auto __bits = _M_to_uint();
1949 __glibcxx_simd_precondition(__bits, "An empty mask does not have a max_index.");
1950 if constexpr (_S_size == 1)
1951 return 0;
1952 else
1953 return __highest_bit(_M_to_uint());
1954 }
1955 else if (_M_data1._M_none_of())
1956 return _M_data0._M_reduce_max_index();
1957 else
1958 return _M_data1._M_reduce_max_index() + _N0;
1959 }
1960
1961 [[__gnu__::__always_inline__]]
1962 friend constexpr bool
1963 __is_const_known(const basic_mask& __x)
1964 { return __is_const_known(__x._M_data0) && __is_const_known(__x._M_data1); }
1965 };
1966} // namespace simd
1967_GLIBCXX_END_NAMESPACE_VERSION
1968} // namespace std
1969
1970#pragma GCC diagnostic pop
1971#endif // C++26
1972#endif // _GLIBCXX_SIMD_MASK_H
constexpr bool operator<=(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
Definition chrono.h:859
constexpr bool operator>=(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
Definition chrono.h:873
constexpr bool operator<(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
Definition chrono.h:826
constexpr bool operator>(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
Definition chrono.h:866
constexpr complex< _Tp > operator-(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x minus y.
Definition complex:404
constexpr complex< _Tp > operator+(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x plus y.
Definition complex:374
_Tp * end(valarray< _Tp > &__va) noexcept
Return an iterator pointing to one past the last element of the valarray.
Definition valarray:1251
_Tp * begin(valarray< _Tp > &__va) noexcept
Return an iterator pointing to the first element of the valarray.
Definition valarray:1229
constexpr const _Tp & max(const _Tp &, const _Tp &)
This does what you think it does.
ISO C++ entities toplevel namespace is std.
constexpr auto cend(const _Container &__cont) noexcept(noexcept(std::end(__cont))) -> decltype(std::end(__cont))
Return an iterator pointing to one past the last element of the const container.
constexpr auto size(const _Container &__cont) noexcept(noexcept(__cont.size())) -> decltype(__cont.size())
Return the size of a container.
constexpr bitset< _Nb > operator^(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition bitset:1638
constexpr auto cbegin(const _Container &__cont) noexcept(noexcept(std::begin(__cont))) -> decltype(std::begin(__cont))
Return an iterator pointing to the first element of the const container.
constexpr bitset< _Nb > operator|(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition bitset:1628
constexpr bitset< _Nb > operator&(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition bitset:1618
static constexpr _Tp max() noexcept
Definition limits:328
static constexpr _Tp min() noexcept
Definition limits:324