25#ifndef _GLIBCXX_SIMD_MASK_H
26#define _GLIBCXX_SIMD_MASK_H 1
29#pragma GCC system_header
32#if __cplusplus >= 202400L
34#include "simd_iterator.h"
44#pragma GCC diagnostic push
45#pragma GCC diagnostic ignored "-Wpsabi"
47namespace std _GLIBCXX_VISIBILITY(default)
49_GLIBCXX_BEGIN_NAMESPACE_VERSION
52 template <
unsigned _Np>
56 operator()(
unsigned __i,
unsigned __size)
const
58 if (__size % (2 * _Np) != 0)
60 else if (std::has_single_bit(_Np))
62 else if (__i % (2 * _Np) >= _Np)
69 template <
size_t _Np,
size_t _Mp>
71 __bitset_split(
const bitset<_Mp>& __b)
73 constexpr auto __bits_per_word = __CHAR_BIT__ * __SIZEOF_LONG__;
74 if constexpr (_Np % __bits_per_word == 0)
79 bitset<_Mp - _Np> _M_hi;
81 return __builtin_bit_cast(_Tmp, __b);
85 constexpr auto __bits_per_ullong = __CHAR_BIT__ * __SIZEOF_LONG_LONG__;
86 static_assert(_Mp <= __bits_per_ullong);
87 using _Lo = _Bitmask<_Np>;
88 using _Hi = _Bitmask<_Mp - _Np>;
94 return _Tmp {
static_cast<_Lo
>(__b.to_ullong()),
static_cast<_Hi
>(__b.to_ullong() >> _Np)};
98 static_assert(__bitset_split<64>(bitset<128>(1))._M_lo == bitset<64>(1));
99 static_assert(__bitset_split<64>(bitset<128>(1))._M_hi == bitset<64>(0));
103 template <
typename _Tp,
typename _Vp, _ArchTraits _Traits = {}>
112 template <__vectorizable _Tp, __simd_vec_type _Vp, _ArchTraits _Traits>
114 struct rebind<_Tp, _Vp, _Traits>
115 {
using type = __similar_vec<_Tp, _Vp::size(),
typename _Vp::abi_type>; };
120 template <__vectorizable _Tp, __simd_mask_type _Mp, _ArchTraits _Traits>
122 struct rebind<_Tp, _Mp, _Traits>
123 {
using type = __similar_mask<_Tp, _Mp::size(),
typename _Mp::abi_type>; };
125 template <
typename _Tp,
typename _Vp>
126 using rebind_t =
typename rebind<_Tp, _Vp>::type;
129 template <__simd_size_type _Np,
typename _Vp, _ArchTraits _Traits = {}>
133 template <__simd_
size_type _Np, __simd_vec_type _Vp, _ArchTraits _Traits>
136 struct resize<_Np, _Vp, _Traits>
137 {
using type = __similar_vec<typename _Vp::value_type, _Np, typename _Vp::abi_type>; };
139 template <__simd_
size_type _Np, __simd_mask_type _Mp, _ArchTraits _Traits>
142 struct resize<_Np, _Mp, _Traits>
144 using _A1 =
decltype(__abi_rebind<__mask_element_size<_Mp>, _Np,
typename _Mp::abi_type,
147 static_assert(__abi_tag<_A1>);
149 static_assert(_Mp::abi_type::_S_variant == _A1::_S_variant || __scalar_abi_tag<_A1>
150 || __scalar_abi_tag<typename _Mp::abi_type>);
152 using type = basic_mask<__mask_element_size<_Mp>, _A1>;
155 template <__simd_
size_type _Np,
typename _Vp>
156 using resize_t =
typename resize<_Np, _Vp>::type;
161 inline constexpr __simd_size_type uninit_element = zero_element + 1;
164 template<__simd_size_type _Np = 0, __simd_vec_or_mask_type _Vp,
165 __index_permutation_function<_Vp> _IdxMap>
166 [[__gnu__::__always_inline__]]
167 constexpr resize_t<_Np == 0 ? _Vp::size() : _Np, _Vp>
168 permute(const _Vp& __v, _IdxMap&& __idxmap)
169 {
return resize_t<_Np == 0 ? _Vp::size() : _Np, _Vp>::_S_static_permute(__v, __idxmap); }
172 template<__simd_vec_or_mask_type _Vp, __simd_
integral _Ip>
173 [[__gnu__::__always_inline__]]
174 constexpr resize_t<_Ip::size(), _Vp>
175 permute(
const _Vp& __v,
const _Ip& __indices)
176 {
return __v[__indices]; }
179 template<__simd_vec_type _Vp,
typename _Ap>
180 [[__gnu__::__always_inline__]]
182 chunk(
const basic_vec<typename _Vp::value_type, _Ap>& __x)
noexcept
183 {
return __x.template _M_chunk<_Vp>(); }
185 template<__simd_mask_type _Mp,
typename _Ap>
186 [[__gnu__::__always_inline__]]
188 chunk(
const basic_mask<__mask_element_size<_Mp>, _Ap>& __x)
noexcept
189 {
return __x.template _M_chunk<_Mp>(); }
191 template<__simd_
size_type _Np,
typename _Tp,
typename _Ap>
192 [[__gnu__::__always_inline__]]
194 chunk(
const basic_vec<_Tp, _Ap>& __x)
noexcept
195 ->
decltype(chunk<resize_t<_Np, basic_vec<_Tp, _Ap>>>(__x))
196 {
return chunk<resize_t<_Np, basic_vec<_Tp, _Ap>>>(__x); }
198 template<__simd_
size_type _Np,
size_t _Bytes,
typename _Ap>
199 [[__gnu__::__always_inline__]]
201 chunk(
const basic_mask<_Bytes, _Ap>& __x)
noexcept
202 ->
decltype(chunk<resize_t<_Np, basic_mask<_Bytes, _Ap>>>(__x))
203 {
return chunk<resize_t<_Np, basic_mask<_Bytes, _Ap>>>(__x); }
206 template<
typename _Tp,
typename _A0,
typename... _Abis>
207 constexpr resize_t<(_A0::_S_size + ... + _Abis::_S_size), basic_vec<_Tp, _A0>>
208 cat(
const basic_vec<_Tp, _A0>& __x0,
const basic_vec<_Tp, _Abis>&... __xs)
noexcept
210 return resize_t<(_A0::_S_size + ... + _Abis::_S_size), basic_vec<_Tp, _A0>>
211 ::_S_concat(__x0, __xs...);
215 template<
size_t _Bytes,
typename _A0,
typename... _Abis>
216 constexpr resize_t<(_A0::_S_size + ... + _Abis::_S_size), basic_mask<_Bytes, _A0>>
217 cat(
const basic_mask<_Bytes, _A0>& __x0,
const basic_mask<_Bytes, _Abis>&... __xs)
noexcept
219 return resize_t<(_A0::_S_size + ... + _Abis::_S_size), basic_mask<_Bytes, _A0>>
220 ::_S_concat(__x0, __xs...);
225 __packs_to_skip_at_front(
int __offset, initializer_list<int> __sizes)
229 for (
int __s : __sizes)
240 __packs_to_skip_at_back(
int __offset,
int __max, initializer_list<int> __sizes)
244 for (
int __s : __sizes)
249 return int(__sizes.size()) - __i;
256 template <
typename _Dst>
257 [[__gnu__::__always_inline__]]
259 __extract_simd_at(
auto _Offset,
const _Dst& __r,
const auto&...)
260 requires(_Offset.value == 0)
263 template <
typename _Dst,
typename _V0>
264 [[__gnu__::__always_inline__]]
266 __extract_simd_at(
auto _Offset,
const _V0&,
const _Dst& __r,
const auto&...)
267 requires(_Offset.value == _V0::size.value)
270 template <
typename _Dst,
typename... _Vs>
271 [[__gnu__::__always_inline__]]
273 __extract_simd_at(
auto _Offset,
const _Vs&... __xs)
275 using _Adst =
typename _Dst::abi_type;
276 if constexpr (_Adst::_S_nreg >= 2)
278 using _Dst0 = remove_cvref_t<decltype(declval<_Dst>()._M_get_low())>;
279 using _Dst1 = remove_cvref_t<decltype(declval<_Dst>()._M_get_high())>;
280 return _Dst::_S_init(__extract_simd_at<_Dst0>(_Offset, __xs...),
281 __extract_simd_at<_Dst1>(_Offset + _Dst0::size, __xs...));
285 using _Ret = remove_cvref_t<decltype(declval<_Dst>()._M_get())>;
286 constexpr bool __use_bitmask = __simd_mask_type<_Dst> && _Adst::_S_is_bitmask;
287 constexpr int __dst_full_size = __bit_ceil(
unsigned(_Adst::_S_size));
288 constexpr int __nargs =
sizeof...(__xs);
289 using _Afirst =
typename _Vs...[0]::abi_type;
290 using _Alast =
typename _Vs...[__nargs - 1]::abi_type;
291 const auto& __x0 = __xs...[0];
292 const auto& __xlast = __xs...[__nargs - 1];
293 constexpr int __ninputs = (_Vs::size.value + ...);
294 if constexpr (_Offset.value >= _Afirst::_S_size
295 || __ninputs - _Offset.value - _Alast::_S_size >= _Adst::_S_size)
297 constexpr int __skip_front = __packs_to_skip_at_front(_Offset.value,
298 {_Vs::size.value...});
299 constexpr int __skip_back = __packs_to_skip_at_back(_Offset.value, _Adst::_S_size,
300 {_Vs::size.value...});
301 static_assert(__skip_front > 0 || __skip_back > 0);
302 constexpr auto [...__skip] = _IotaArray<__skip_front>;
303 constexpr auto [...__is] = _IotaArray<__nargs - __skip_front - __skip_back>;
304 constexpr int __new_offset = _Offset.value - (0 + ... + _Vs...[__skip]
::size.value);
305 return __extract_simd_at<_Dst>(cw<__new_offset>, __xs...[__is + __skip_front]...);
307 else if constexpr (_Adst::_S_size == 1)
309 return _Dst(__x0[_Offset.value]);
311 else if constexpr (_Afirst::_S_nreg >= 2 || _Alast::_S_nreg >= 2)
313 constexpr bool __flatten_first = _Afirst::_S_nreg >= 2;
314 constexpr bool __flatten_last = __nargs > 1 && _Alast::_S_nreg >= 2;
315 constexpr auto [...__is] = _IotaArray<__nargs - __flatten_first - __flatten_last>;
316 if constexpr (__flatten_first && __flatten_last)
317 return __extract_simd_at<_Dst>(
318 _Offset, __x0._M_get_low(), __x0._M_get_high(), __xs...[__is + 1]...,
319 __xlast._M_get_low(), __xlast._M_get_high());
320 else if constexpr (__flatten_first)
321 return __extract_simd_at<_Dst>(
322 _Offset, __x0._M_get_low(), __x0._M_get_high(), __xs...[__is + 1]...);
324 return __extract_simd_at<_Dst>(
325 _Offset, __xs...[__is]..., __xlast._M_get_low(), __xlast._M_get_high());
327 else if constexpr (__simd_mask_type<_Dst>
328 && ((_Adst::_S_variant != _Vs::abi_type::_S_variant
329 && !__scalar_abi_tag<typename _Vs::abi_type>) || ...))
331 return __extract_simd_at<_Dst>(
332 _Offset,
static_cast<const resize_t<_Vs::size.value, _Dst
>&>(__xs)...);
337 else if constexpr (__nargs == 1)
339 if constexpr (__use_bitmask)
340 return _Dst(_Ret(__x0._M_to_uint() >> _Offset.value));
342 return _VecOps<_Ret>::_S_extract(__x0._M_concat_data(
false), _Offset);
344 else if constexpr (__use_bitmask)
346 static_assert(_Afirst::_S_nreg == 1);
347 static_assert(_Offset.value < _Afirst::_S_size);
348 int __offset = -_Offset.value;
350 template for (
const auto& __x : {__xs...})
353 __r = _Ret(__x._M_to_uint() >> -__offset);
354 else if (__offset < _Adst::_S_size)
355 __r |= _Ret(_Ret(__x._M_to_uint()) << __offset);
356 __offset += __x.size.value;
360 else if constexpr (__nargs == 2 && _Offset == 0 && _Adst::_S_nreg == 1
361 && _Afirst::_S_size >= _Alast::_S_size
362 && __has_single_bit(
unsigned(_Afirst::_S_size)))
364 if constexpr (_Afirst::_S_size == 1)
366 return _Ret{__x0._M_concat_data()[0], __xlast._M_concat_data()[0]};
369 const auto __v0 = __x0._M_concat_data();
370 const auto __v1 = __vec_zero_pad_to<sizeof(__v0)>(__xlast._M_concat_data());
371 return __vec_concat(__v0, __v1);
374 else if constexpr (__nargs == 2 && _Adst::_S_nreg == 1 && _Offset == 0
375 && _Afirst::_S_nreg == 1 && _Alast::_S_size == 1)
377 _Ret __r = __vec_zero_pad_to<sizeof(_Ret)>(__x0._M_get());
378 __vec_set(__r, _Afirst::_S_size, __xlast._M_concat_data()[0]);
381 else if constexpr (__nargs == 2 && _Adst::_S_nreg == 1 && _Offset == 0
382 && _Afirst::_S_nreg == 1 && _Alast::_S_size == 2)
384 _Ret __r = __vec_zero_pad_to<sizeof(_Ret)>(__x0._M_concat_data());
385 const auto __x1 = __xlast._M_concat_data();
386 if constexpr (
sizeof(__x1) <=
sizeof(double) && (_Afirst::_S_size & 1) == 0)
388 using _Up = __conditional_t<
389 is_floating_point_v<__vec_value_type<_Ret>>,
390 __conditional_t<
sizeof(__x1) ==
sizeof(
double), double,
float>,
391 __integer_from<
sizeof(__x1)>>;
392 auto __r2 = __vec_bit_cast<_Up>(__r);
393 __vec_set(__r2, _Afirst::_S_size / 2, __vec_bit_cast<_Up>(__x1)[0]);
394 __r =
reinterpret_cast<_Ret
>(__r2);
398 __vec_set(__r, _Afirst::_S_size, __x1[0]);
399 __vec_set(__r, _Afirst::_S_size + 1, __x1[1]);
403 else if constexpr (__nargs == 2 && _Afirst::_S_nreg == 1 && _Alast::_S_nreg == 1)
405 constexpr auto [...__is] = _IotaArray<__dst_full_size>;
406 constexpr int __v2_offset = __width_of<
decltype(__x0._M_concat_data())>;
407 return __builtin_shufflevector(
408 __x0._M_concat_data(), __xlast._M_concat_data(), [](
int __i)
consteval {
409 if (__i < _Afirst::_S_size)
411 __i -= _Afirst::_S_size;
412 if (__i < _Alast::_S_size)
413 return __i + __v2_offset;
416 }(__is + _Offset.value)...);
418 else if (__is_const_known(__xs...) || __ninputs == _Adst::_S_size)
420 return _VecOps<_Ret>::_S_extract(
421 __vec_concat_sized<__xs.size.value...>(__xs._M_concat_data(
false)...),
426 alignas(_Ret) __vec_value_type<_Ret>
427 __tmp[
std::max(__ninputs, _Offset.value + __dst_full_size)] = {};
429 template for (
const auto& __x : {__xs...})
431 if constexpr (__simd_mask_type<_Dst>)
432 (-__x)._M_store(__tmp + __offset);
434 __x._M_store(__tmp + __offset);
435 __offset += __x.size.value;
438 __builtin_memcpy(&__r, __tmp + _Offset.value,
sizeof(_Ret));
445 template <
size_t _Bytes,
typename _Ap>
449 using value_type = bool;
451 using abi_type = _Ap;
453#define _GLIBCXX_DELETE_SIMD "This specialization is disabled because of an invalid combination " \
454 "of template arguments to basic_mask."
456 basic_mask() =
delete(_GLIBCXX_DELETE_SIMD);
458 ~basic_mask() =
delete(_GLIBCXX_DELETE_SIMD);
460 basic_mask(
const basic_mask&) =
delete(_GLIBCXX_DELETE_SIMD);
462 basic_mask& operator=(
const basic_mask&) =
delete(_GLIBCXX_DELETE_SIMD);
464#undef _GLIBCXX_DELETE_SIMD
467 template <
size_t _Bytes,
typename _Ap>
470 using _Mp = basic_mask<_Bytes, _Ap>;
473 using _VecType = __simd_vec_from_mask_t<_Bytes, _Ap>;
475 static_assert(destructible<_VecType> || _Bytes >
sizeof(0ull));
478 using iterator = __iterator<_Mp>;
480 using const_iterator = __iterator<const _Mp>;
484 {
return {
static_cast<_Mp&
>(*this), 0}; }
486 constexpr const_iterator
487 begin() const noexcept
490 constexpr const_iterator
492 {
return {
static_cast<const _Mp&
>(*this), 0}; }
494 constexpr default_sentinel_t
498 constexpr default_sentinel_t
499 cend() const noexcept
502 static constexpr auto size = __simd_size_c<_Ap::_S_size>;
504 _MaskBase() =
default;
507 template <
size_t _UBytes,
typename _UAbi>
508 requires (_Ap::_S_size != _UAbi::_S_size)
510 _MaskBase(
const basic_mask<_UBytes, _UAbi>&) =
delete(
"size mismatch");
512 template <
typename _Up,
typename _UAbi>
514 _MaskBase(
const basic_vec<_Up, _UAbi>&)
515 =
delete(
"use operator! or a comparison to convert a vec into a mask");
517 template <
typename _Up,
typename _UAbi>
518 requires (_Ap::_S_size != _UAbi::_S_size)
519 operator basic_vec<_Up, _UAbi>()
const
520 =
delete(
"size mismatch");
523 template <
size_t _Bytes, __abi_tag _Ap>
524 requires (_Ap::_S_nreg == 1)
525 class basic_mask<_Bytes, _Ap>
526 : public _MaskBase<_Bytes, _Ap>
528 using _Base = _MaskBase<_Bytes, _Ap>;
530 using _VecType = _Base::_VecType;
532 template <
size_t,
typename>
533 friend class basic_mask;
535 template <
typename,
typename>
536 friend class basic_vec;
538 static constexpr int _S_size = _Ap::_S_size;
540 using _DataType =
typename _Ap::template _MaskDataType<_Bytes>;
542 static constexpr bool _S_has_bool_member = is_same_v<_DataType, bool>;
544 static constexpr bool _S_is_scalar = _S_has_bool_member;
546 static constexpr bool _S_use_bitmask = _Ap::_S_is_bitmask;
548 static constexpr int _S_full_size = [] {
549 if constexpr (_S_is_scalar)
551 else if constexpr (_S_use_bitmask && _S_size < __CHAR_BIT__)
554 return __bit_ceil(
unsigned(_S_size));
557 static constexpr bool _S_is_partial = _S_size != _S_full_size;
559 static constexpr _DataType _S_implicit_mask = [] {
560 if constexpr (_S_is_scalar)
562 else if (!_S_is_partial)
563 return _DataType(~_DataType());
564 else if constexpr (_S_use_bitmask)
565 return _DataType((_DataType(1) << _S_size) - 1);
568 constexpr auto [...__is] = _IotaArray<_S_full_size>;
569 return _DataType{ (__is < _S_size ? -1 : 0)... };
575 static constexpr size_t _S_padding_bytes = 0;
580 using value_type = bool;
582 using abi_type = _Ap;
584 using iterator = _Base::iterator;
586 using const_iterator = _Base::const_iterator;
589 [[__gnu__::__always_inline__]]
590 static constexpr basic_mask
591 _S_init(_DataType __x)
598 [[__gnu__::__always_inline__]]
599 static constexpr basic_mask
600 _S_init(unsigned_integral
auto __bits)
601 {
return basic_mask(__bits); }
603 [[__gnu__::__always_inline__]]
604 constexpr const _DataType&
614 template <
size_t _UBytes,
typename _UAbi>
615 [[__gnu__::__always_inline__]]
616 static constexpr basic_mask
617 _S_recursive_bit_cast(
const basic_mask<_UBytes, _UAbi>& __x)
618 {
return __builtin_bit_cast(basic_mask, __x._M_concat_data()); }
620 [[__gnu__::__always_inline__]]
622 _M_concat_data(
bool __do_sanitize = _S_is_partial)
const
624 if constexpr (_S_is_scalar)
625 return __vec_builtin_type<__integer_from<_Bytes>, 1>{__integer_from<_Bytes>(-_M_data)};
628 if constexpr (_S_is_partial)
630 return _DataType(_M_data & _S_implicit_mask);
640 template <_ArchTraits _Traits = {}>
641 [[__gnu__::__always_inline__]]
642 static constexpr basic_mask
643 _S_partial_mask_of_n(
int __n)
645 static_assert(!_S_is_scalar);
646 if constexpr (!_S_use_bitmask)
648 using _Ip = __integer_from<_Bytes>;
650 "_S_partial_mask_of_n without _S_use_bitmask requires "
651 "positive __n that does not overflow.");
652 constexpr _DataType __0123
653 = __builtin_bit_cast(_DataType, _IotaArray<_Ip(_S_full_size)>);
654 return basic_mask(__0123 < _Ip(__n));
658 __glibcxx_simd_precondition(__n >= 0 && __n <= 255,
659 "The x86 BZHI instruction requires __n to "
660 "only use bits 0:7");
661#if __has_builtin(__builtin_ia32_bzhi_si)
662 if constexpr (_S_size <= 32 && _Traits._M_have_bmi2())
663 return _S_init(_Bitmask<_S_size>(
664 __builtin_ia32_bzhi_si(~0u >> (32 - _S_size),
unsigned(__n))));
666#if __has_builtin(__builtin_ia32_bzhi_di)
667 else if constexpr (_S_size <= 64 && _Traits._M_have_bmi2())
668 return _S_init(__builtin_ia32_bzhi_di(~0ull >> (64 - _S_size),
unsigned(__n)));
670 if constexpr (_S_size <= 32)
672 __glibcxx_simd_precondition(__n < 32,
"invalid shift");
673 return _S_init(_Bitmask<_S_size>((1u <<
unsigned(__n)) - 1));
675 else if constexpr (_S_size <= 64)
677 __glibcxx_simd_precondition(__n < 64,
"invalid shift");
678 return _S_init((1ull <<
unsigned(__n)) - 1);
681 static_assert(
false);
685 [[__gnu__::__always_inline__]]
686 constexpr basic_mask&
689 if constexpr (_S_use_bitmask)
690 _M_data &= ((_M_data >> 1) & 0x5555'5555'5555'5555ull)
691 | ((_M_data << 1) & ~0x5555'5555'5555'5555ull);
693 _M_data &= _VecOps<_DataType>::_S_swap_neighbors(_M_data);
697 [[__gnu__::__always_inline__]]
698 constexpr basic_mask&
701 if constexpr (_S_use_bitmask)
702 _M_data |= ((_M_data >> 1) & 0x5555'5555'5555'5555ull)
703 | ((_M_data << 1) & ~0x5555'5555'5555'5555ull);
705 _M_data |= _VecOps<_DataType>::_S_swap_neighbors(_M_data);
709 template <
typename _Mp>
710 [[__gnu__::__always_inline__]]
711 constexpr auto _M_chunk() const noexcept
713 constexpr int __n = _S_size / _Mp::_S_size;
714 constexpr int __rem = _S_size % _Mp::_S_size;
715 constexpr auto [...__is] = _IotaArray<__n>;
716 if constexpr (__rem == 0)
717 return array<_Mp, __n>{__extract_simd_at<_Mp>(cw<_Mp::_S_size * __is>, *
this)...};
720 using _Rest = resize_t<__rem, _Mp>;
721 return tuple(__extract_simd_at<_Mp>(cw<_Mp::_S_size * __is>, *
this)...,
722 __extract_simd_at<_Rest>(cw<_Mp::_S_size * __n>, *
this));
726 [[__gnu__::__always_inline__]]
727 static constexpr const basic_mask&
728 _S_concat(
const basic_mask& __x0)
noexcept
731 template <
typename... _As>
732 requires (
sizeof...(_As) > 1)
733 [[__gnu__::__always_inline__]]
734 static constexpr basic_mask
735 _S_concat(
const basic_mask<_Bytes, _As>&... __xs)
noexcept
737 static_assert(_S_size == (_As::_S_size + ...));
738 return __extract_simd_at<basic_mask>(cw<0>, __xs...);
742 basic_mask() =
default;
745 [[__gnu__::__always_inline__]]
747 basic_mask(_DataType __x)
requires(!_S_is_scalar && !_S_use_bitmask)
751 [[__gnu__::__always_inline__]]
753 operator _DataType()
requires(!_S_is_scalar && !_S_use_bitmask)
757 [[__gnu__::__always_inline__]]
759 basic_mask(same_as<bool>
auto __x)
noexcept
760 : _M_data(__x ? _S_implicit_mask : _DataType())
764 template <
size_t _UBytes,
typename _UAbi>
765 requires (_S_size == _UAbi::_S_size)
766 [[__gnu__::__always_inline__]]
767 constexpr explicit(__is_mask_conversion_explicit<_Ap, _UAbi>(_Bytes, _UBytes))
768 basic_mask(
const basic_mask<_UBytes, _UAbi>& __x) noexcept
769 : _M_data([&] [[__gnu__::__always_inline__]] {
770 using _UV = basic_mask<_UBytes, _UAbi>;
772 if constexpr (_S_is_scalar)
776 else if constexpr (_UV::_S_is_scalar)
778 constexpr auto [...__is] = _IotaArray<_S_size>;
779 if constexpr (_S_use_bitmask)
780 return ((_DataType(__x[__is]) << __is) | ...);
782 return _DataType{__vec_value_type<_DataType>(-__x[__is])...};
786 else if constexpr (_S_use_bitmask || _UV::_S_use_bitmask)
787 return basic_mask(__x.to_bitset())._M_data;
790 else if constexpr (_Bytes == _UBytes)
791 return _S_recursive_bit_cast(__x)._M_data;
797 if constexpr (_Bytes == 1 && _UBytes == 2)
798 if (!__is_const_known(__x))
800 if constexpr (_UAbi::_S_nreg == 1)
801 return __x86_cvt_vecmask<_DataType>(__x._M_data);
802 else if constexpr (_UAbi::_S_nreg == 2)
804 auto __lo = __x._M_data0._M_data;
805 auto __hi = __vec_zero_pad_to<sizeof(__lo)>(
806 __x._M_data1._M_concat_data());
807 return __x86_cvt_vecmask<_DataType>(__lo, __hi);
811 return __vec_mask_cast<_DataType>(__x._M_concat_data());
816 using _Base::_MaskBase;
819 template <__simd_generator_invokable<
bool, _S_size> _Fp>
820 [[__gnu__::__always_inline__]]
822 basic_mask(_Fp&& __gen)
823 : _M_data([&] [[__gnu__::__always_inline__]] {
824 constexpr auto [...__is] = _IotaArray<_S_size>;
825 if constexpr (_S_is_scalar)
826 return __gen(__simd_size_c<0>);
827 else if constexpr (_S_use_bitmask)
828 return _DataType(((_DataType(__gen(__simd_size_c<__is>)) << __is)
831 return _DataType{__vec_value_type<_DataType>(
832 __gen(__simd_size_c<__is>) ? -1 : 0)...};
837 [[__gnu__::__always_inline__]]
839 basic_mask(
const same_as<bitset<_S_size>>
auto& __b)
noexcept
840 : basic_mask(
static_cast<_Bitmask<_S_size>
>(__b.to_ullong()))
843 static_assert(_S_size <= numeric_limits<unsigned long long>::digits);
847 template <
unsigned_
integral _Tp>
848 requires (!same_as<_Tp, bool>)
849 [[__gnu__::__always_inline__]]
851 basic_mask(_Tp __val) noexcept
852 : _M_data([&] [[__gnu__::__always_inline__]] () {
853 if constexpr (_S_use_bitmask)
855 else if constexpr (_S_is_scalar)
856 return bool(__val & 1);
857 else if (__is_const_known(__val))
859 constexpr auto [...__is] = _IotaArray<_S_size>;
860 return _DataType {__vec_value_type<_DataType>((__val & (1ull << __is)) == 0
865 using _Ip =
typename _VecType::value_type;
866 _VecType __v0 = _Ip(__val);
867 constexpr int __bits_per_element =
sizeof(_Ip) * __CHAR_BIT__;
868 constexpr _VecType __pow2 = _VecType(1) << (__iota<_VecType> % __bits_per_element);
869 if constexpr (_S_size < __bits_per_element)
870 return ((__v0 & __pow2) > 0)._M_concat_data();
871 else if constexpr (_S_size == __bits_per_element)
872 return ((__v0 & __pow2) != 0)._M_concat_data();
875 static_assert(_Bytes == 1);
876 static_assert(
sizeof(_Ip) == 1);
877 _Bitmask<_S_size> __bits = __val;
878 static_assert(
sizeof(_VecType) %
sizeof(__bits) == 0);
879 if constexpr (
sizeof(_DataType) == 32)
881 __vec_builtin_type<_UInt<8>, 4> __v1 = {
882 0xffu & (__bits >> (0 * __CHAR_BIT__)),
883 0xffu & (__bits >> (1 * __CHAR_BIT__)),
884 0xffu & (__bits >> (2 * __CHAR_BIT__)),
885 0xffu & (__bits >> (3 * __CHAR_BIT__)),
887 __v1 *= 0x0101'0101'0101'0101ull;
888 __v0 = __builtin_bit_cast(_VecType, __v1);
889 return ((__v0 & __pow2) != 0)._M_data;
893 using _V1 = vec<_Ip,
sizeof(__bits)>;
894 _V1 __v1 = __builtin_bit_cast(_V1, __bits);
895 __v0 = _VecType::_S_static_permute(__v1, [](
int __i) {
896 return __i / __CHAR_BIT__;
898 return ((__v0 & __pow2) != 0)._M_data;
912 [[__gnu__::__always_inline__]]
914 operator[](__simd_size_type __i)
const
916 __glibcxx_simd_precondition(__i >= 0 && __i < _S_size,
"subscript is out of bounds");
917 if constexpr (_S_is_scalar)
919 else if constexpr (_S_use_bitmask)
920 return bool((_M_data >> __i) & 1);
922 return _M_data[__i] & 1;
926 [[__gnu__::__always_inline__]]
928 operator!() const noexcept
930 if constexpr (_S_is_scalar)
931 return _S_init(!_M_data);
933 return _S_init(~_M_data);
936 [[__gnu__::__always_inline__]]
938 operator+() const noexcept requires destructible<_VecType>
939 {
return operator _VecType(); }
944 [[__gnu__::__always_inline__]]
946 operator-() const noexcept requires destructible<_VecType>
948 using _Ip =
typename _VecType::value_type;
949 if constexpr (_S_is_scalar)
950 return _Ip(-
int(_M_data));
951 else if constexpr (_S_use_bitmask)
952 return __select_impl(*
this, _Ip(-1), _Ip());
955 static_assert(
sizeof(_VecType) ==
sizeof(_M_data));
956 return __builtin_bit_cast(_VecType, _M_data);
963 [[__gnu__::__always_inline__]]
965 operator~() const noexcept requires destructible<_VecType>
967 using _Ip =
typename _VecType::value_type;
968 if constexpr (_S_is_scalar)
969 return _Ip(~
int(_M_data));
970 else if constexpr (_S_use_bitmask)
971 return __select_impl(*
this, _Ip(-2), _Ip(-1));
974 static_assert(
sizeof(_VecType) ==
sizeof(_M_data));
975 return __builtin_bit_cast(_VecType, _M_data) - _Ip(1);
980 operator~() const noexcept = delete;
983 template <typename _Up, typename _UAbi>
984 requires (_UAbi::_S_size == _S_size)
985 [[__gnu__::__always_inline__]]
986 constexpr explicit(sizeof(_Up) != _Bytes)
987 operator basic_vec<_Up, _UAbi>() const noexcept
989 if constexpr (_S_is_scalar)
993 using _UV = basic_vec<_Up, _UAbi>;
994 return __select_impl(
static_cast<_UV::mask_type
>(*
this), _UV(1), _UV(0));
998 using _Base::operator basic_vec;
1001 [[__gnu__::__always_inline__]]
1002 constexpr bitset<_S_size>
1003 to_bitset() const noexcept
1006 static_assert(_S_size <= numeric_limits<unsigned long long>::digits);
1017 template <
int _Offset = 0, _ArchTraits _Traits = {}>
1018 [[__gnu__::__always_inline__]]
1019 constexpr _Bitmask<_S_size + _Offset>
1022 constexpr int __nbits = _S_size;
1023 static_assert(__nbits + _Offset <= numeric_limits<unsigned long long>::digits);
1025 using _U0 = _Bitmask<__nbits>;
1027 using _Ur = _Bitmask<__nbits + _Offset>;
1028 if constexpr (_S_is_scalar || _S_use_bitmask)
1030 auto __bits = _M_data;
1031 if constexpr (_S_is_partial)
1032 __bits &= _S_implicit_mask;
1033 return _Ur(__bits) << _Offset;
1038 if (!__is_const_known(*
this))
1041 if constexpr (_Bytes != 2)
1042 __uint = _U0(__x86_movmsk(_M_data));
1043 else if constexpr (_Bytes == 2 && _Traits._M_have_bmi2())
1044 __uint = __bit_extract_even<__nbits>(__x86_movmsk(_M_data));
1045 else if constexpr (_Bytes == 2)
1046 return __similar_mask<char, __nbits, _Ap>(*this).template _M_to_uint<_Offset>();
1048 static_assert(
false);
1053 if constexpr (_S_is_partial)
1054 __uint &= (_U0(1) << _S_size) - 1;
1055 return _Ur(__uint) << _Offset;
1058 using _IV = _VecType;
1059 static_assert(destructible<_IV>);
1060 const typename _IV::mask_type& __k = [&] [[__gnu__::__always_inline__]] () {
1061 if constexpr (is_same_v<typename _IV::mask_type, basic_mask>)
1064 return typename _IV::mask_type(*
this);
1066 constexpr int __n = _IV::size();
1067 if constexpr (_Bytes * __CHAR_BIT__ >= __n)
1069 constexpr _IV __pow2 = _IV(1) << __iota<_IV>;
1070 return _Ur(_U0(__select_impl(__k, __pow2, _IV())
1071 ._M_reduce(bit_or<>()))) << _Offset;
1073 else if constexpr (__n % __CHAR_BIT__ != 0)
1075 constexpr int __n_lo = __n - __n % __CHAR_BIT__;
1076 const auto [__lo, __hi] = chunk<__n_lo>(__k);
1077 _Ur __bits = __hi.template _M_to_uint<_Offset + __n_lo>();
1078 return __bits | __lo.template _M_to_uint<_Offset>();
1082 constexpr _IV __pow2 = _IV(1) << (__iota<_IV> % _IV(__CHAR_BIT__));
1083 _IV __x = __select_impl(__k, __pow2, _IV());
1085 __x |= _IV::_S_static_permute(__x, _SwapNeighbors<4>());
1086 __x |= _IV::_S_static_permute(__x, _SwapNeighbors<2>());
1087 __x |= _IV::_S_static_permute(__x, _SwapNeighbors<1>());
1089 __x = _IV::_S_static_permute(__x, [](
int __i) {
1090 return __i * 8 < __n ? __i * 8 : uninit_element;
1093 _U0 __bits = __builtin_bit_cast(
1094 __similar_vec<_U0, __n * _Bytes /
sizeof(_U0), _Ap>, __x)[0];
1096 if constexpr (!__has_single_bit(
unsigned(__nbits)))
1097 __bits &= (_U0(1) << __nbits) - 1;
1098 return _Ur(__bits) << _Offset;
1103 [[__gnu__::__always_inline__]]
1104 constexpr unsigned long long
1106 {
return _M_to_uint(); }
1109 [[__gnu__::__always_inline__]]
1110 friend constexpr basic_mask
1111 operator&&(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1112 {
return _S_init(__x._M_data & __y._M_data); }
1114 [[__gnu__::__always_inline__]]
1115 friend constexpr basic_mask
1116 operator||(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1117 {
return _S_init(__x._M_data | __y._M_data); }
1119 [[__gnu__::__always_inline__]]
1120 friend constexpr basic_mask
1121 operator&(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1122 {
return _S_init(__x._M_data & __y._M_data); }
1124 [[__gnu__::__always_inline__]]
1125 friend constexpr basic_mask
1126 operator|(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1127 {
return _S_init(__x._M_data | __y._M_data); }
1129 [[__gnu__::__always_inline__]]
1130 friend constexpr basic_mask
1131 operator^(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1132 {
return _S_init(__x._M_data ^ __y._M_data); }
1135 [[__gnu__::__always_inline__]]
1136 friend constexpr basic_mask&
1137 operator&=(basic_mask& __x,
const basic_mask& __y)
noexcept
1139 __x._M_data &= __y._M_data;
1143 [[__gnu__::__always_inline__]]
1144 friend constexpr basic_mask&
1145 operator|=(basic_mask& __x,
const basic_mask& __y)
noexcept
1147 __x._M_data |= __y._M_data;
1151 [[__gnu__::__always_inline__]]
1152 friend constexpr basic_mask&
1153 operator^=(basic_mask& __x,
const basic_mask& __y)
noexcept
1155 __x._M_data ^= __y._M_data;
1160 [[__gnu__::__always_inline__]]
1161 friend constexpr basic_mask
1162 operator==(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1163 {
return !(__x ^ __y); }
1165 [[__gnu__::__always_inline__]]
1166 friend constexpr basic_mask
1167 operator!=(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1168 {
return __x ^ __y; }
1170 [[__gnu__::__always_inline__]]
1171 friend constexpr basic_mask
1172 operator>=(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1173 {
return __x || !__y; }
1175 [[__gnu__::__always_inline__]]
1176 friend constexpr basic_mask
1177 operator<=(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1178 {
return !__x || __y; }
1180 [[__gnu__::__always_inline__]]
1181 friend constexpr basic_mask
1182 operator>(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1183 {
return __x && !__y; }
1185 [[__gnu__::__always_inline__]]
1186 friend constexpr basic_mask
1187 operator<(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1188 {
return !__x && __y; }
1191 [[__gnu__::__always_inline__]]
1192 friend constexpr basic_mask
1193 __select_impl(
const basic_mask& __k,
const basic_mask& __t,
const basic_mask& __f)
noexcept
1195 if constexpr (!_S_use_bitmask)
1202 return __k._M_data < 0 ? __t._M_data : __f._M_data;
1204 return __k._M_data ? __t._M_data : __f._M_data;
1207 return (__k._M_data & __t._M_data) | (~__k._M_data & __f._M_data);
1210 [[__gnu__::__always_inline__]]
1211 friend constexpr basic_mask
1212 __select_impl(
const basic_mask& __k, same_as<bool>
auto __t, same_as<bool>
auto __f)
noexcept
1215 return basic_mask(__t);
1217 return __t ? __k : !__k;
1220 template <__vectorizable _T0, same_as<_T0> _T1>
1221 requires (
sizeof(_T0) == _Bytes)
1222 [[__gnu__::__always_inline__]]
1223 friend constexpr vec<_T0, _S_size>
1224 __select_impl(
const basic_mask& __k,
const _T0& __t,
const _T1& __f)
noexcept
1226 if constexpr (_S_is_scalar)
1227 return __k._M_data ? __t : __f;
1230 using _Vp = vec<_T0, _S_size>;
1231 using _Mp =
typename _Vp::mask_type;
1232 return __select_impl(_Mp(__k), _Vp(__t), _Vp(__f));
1237 [[__gnu__::__always_inline__]]
1239 _M_all_of() const noexcept
1241 if constexpr (_S_is_scalar)
1243 else if constexpr (_S_use_bitmask)
1245 if constexpr (_S_is_partial)
1247 return (_M_data & _S_implicit_mask) == _S_implicit_mask;
1249 return _M_data == _S_implicit_mask;
1252 else if (!__is_const_known(_M_data))
1253 return __x86_vecmask_all<_S_size>(_M_data);
1256 return _VecOps<_DataType, _S_size>::_S_all_of(_M_data);
1259 [[__gnu__::__always_inline__]]
1261 _M_any_of() const noexcept
1263 if constexpr (_S_is_scalar)
1265 else if constexpr (_S_use_bitmask)
1267 if constexpr (_S_is_partial)
1269 return (_M_data & _S_implicit_mask) != 0;
1271 return _M_data != 0;
1274 else if (!__is_const_known(_M_data))
1275 return __x86_vecmask_any<_S_size>(_M_data);
1278 return _VecOps<_DataType, _S_size>::_S_any_of(_M_data);
1281 [[__gnu__::__always_inline__]]
1283 _M_none_of() const noexcept
1285 if constexpr (_S_is_scalar)
1287 else if constexpr (_S_use_bitmask)
1289 if constexpr (_S_is_partial)
1291 return (_M_data & _S_implicit_mask) == 0;
1293 return _M_data == 0;
1296 else if (!__is_const_known(_M_data))
1297 return __x86_vecmask_none<_S_size>(_M_data);
1300 return _VecOps<_DataType, _S_size>::_S_none_of(_M_data);
1303 [[__gnu__::__always_inline__]]
1304 constexpr __simd_size_type
1305 _M_reduce_count() const noexcept
1307 if constexpr (_S_is_scalar)
1308 return int(_M_data);
1309 else if constexpr (_S_size <= numeric_limits<unsigned>::digits)
1310 return __builtin_popcount(_M_to_uint());
1312 return __builtin_popcountll(to_ullong());
1315 [[__gnu__::__always_inline__]]
1316 constexpr __simd_size_type
1317 _M_reduce_min_index()
const
1319 const auto __bits = _M_to_uint();
1320 __glibcxx_simd_precondition(__bits,
"An empty mask does not have a min_index.");
1321 if constexpr (_S_size == 1)
1324 return __countr_zero(__bits);
1327 [[__gnu__::__always_inline__]]
1328 constexpr __simd_size_type
1329 _M_reduce_max_index()
const
1331 const auto __bits = _M_to_uint();
1332 __glibcxx_simd_precondition(__bits,
"An empty mask does not have a max_index.");
1333 if constexpr (_S_size == 1)
1336 return __highest_bit(__bits);
1339 [[__gnu__::__always_inline__]]
1340 friend constexpr bool
1341 __is_const_known(
const basic_mask& __x)
1342 {
return __builtin_constant_p(__x._M_data); }
1345 template <
size_t _Bytes, __abi_tag _Ap>
1346 requires (_Ap::_S_nreg > 1)
1347 class basic_mask<_Bytes, _Ap>
1348 : public _MaskBase<_Bytes, _Ap>
1350 using _Base = _MaskBase<_Bytes, _Ap>;
1352 using _VecType = _Base::_VecType;
1354 template <
size_t,
typename>
1355 friend class basic_mask;
1357 template <
typename,
typename>
1358 friend class basic_vec;
1360 static constexpr int _S_size = _Ap::_S_size;
1362 static constexpr int _N0 = __bit_ceil(
unsigned(_S_size)) / 2;
1364 static constexpr int _N1 = _S_size - _N0;
1366 static constexpr int _Nreg0 = __bit_ceil(
unsigned(_Ap::_S_nreg)) / 2;
1368 static constexpr int _Nreg1 = _Ap::_S_nreg - _Nreg0;
1372 using _Abi0 =
decltype(_Ap::template _S_resize<_N0, _Nreg0>());
1374 using _Abi1 =
decltype(_Ap::template _S_resize<_N1, _Nreg1>());
1376 using _Mask0 = basic_mask<_Bytes, _Abi0>;
1379 static_assert(_Mask0::_S_padding_bytes == 0 && !_Mask0::_S_is_partial);
1381 using _Mask1 = basic_mask<_Bytes, _Abi1>;
1383 static constexpr bool _S_is_partial = _Mask1::_S_is_partial;
1387 static_assert(_Mask0::abi_type::_S_nreg + _Mask1::abi_type::_S_nreg == _Ap::_S_nreg);
1389 static constexpr bool _S_use_bitmask = _Mask0::_S_use_bitmask;
1391 static constexpr bool _S_is_scalar = _Mask0::_S_is_scalar;
1397 static constexpr bool _S_has_bool_member = _Mask1::_S_has_bool_member;
1402 static constexpr size_t _S_padding_bytes
1403 = (__alignof__(_Mask0) == __alignof__(_Mask1)
1404 ? 0 : __alignof__(_Mask0) - (
sizeof(_Mask1) % __alignof__(_Mask0)))
1405 + _Mask1::_S_padding_bytes;
1408 using value_type = bool;
1410 using abi_type = _Ap;
1412 using iterator = _Base::iterator;
1414 using const_iterator = _Base::const_iterator;
1416 [[__gnu__::__always_inline__]]
1417 static constexpr basic_mask
1418 _S_init(
const _Mask0& __x,
const _Mask1& __y)
1426 [[__gnu__::__always_inline__]]
1427 static constexpr basic_mask
1428 _S_init(unsigned_integral
auto __bits)
1429 {
return basic_mask(__bits); }
1431 template <
typename _U0,
typename _U1>
1432 [[__gnu__::__always_inline__]]
1433 static constexpr basic_mask
1434 _S_init(
const __trivial_pair<_U0, _U1>& __bits)
1436 if constexpr (is_unsigned_v<_U0>)
1438 static_assert(is_unsigned_v<_U1>);
1439 return _S_init(_Mask0(__bits._M_first), _Mask1(__bits._M_second));
1441 else if constexpr (is_unsigned_v<_U1>)
1442 return _S_init(_Mask0::_S_init(__bits._M_first), _Mask1(__bits._M_second));
1444 return _S_init(_Mask0::_S_init(__bits._M_first), _Mask1::_S_init(__bits._M_second));
1447 [[__gnu__::__always_inline__]]
1448 constexpr const _Mask0&
1450 {
return _M_data0; }
1452 [[__gnu__::__always_inline__]]
1453 constexpr const _Mask1&
1455 {
return _M_data1; }
1457 template <
size_t _UBytes,
typename _UAbi>
1458 [[__gnu__::__always_inline__]]
1459 static constexpr basic_mask
1460 _S_recursive_bit_cast(
const basic_mask<_UBytes, _UAbi>& __x)
1462 using _Mp = basic_mask<_UBytes, _UAbi>;
1463 if constexpr (_Mp::_S_has_bool_member ||
sizeof(basic_mask) >
sizeof(__x)
1464 || _Mp::_S_padding_bytes != 0)
1465 return _S_init(__builtin_bit_cast(_Mask0, __x._M_data0),
1466 _Mask1::_S_recursive_bit_cast(__x._M_data1));
1467 else if constexpr (
sizeof(basic_mask) ==
sizeof(__x))
1468 return __builtin_bit_cast(basic_mask, __x);
1471 struct _Tmp {
alignas(_Mp) basic_mask _M_data; };
1472 return __builtin_bit_cast(_Tmp, __x)._M_data;
1476 [[__gnu__::__always_inline__]]
1478 _M_concat_data(
bool __do_sanitize = _S_is_partial)
const
1480 if constexpr (_S_use_bitmask)
1482 static_assert(_S_size <= numeric_limits<unsigned long long>::digits,
1483 "cannot concat more than 64 bits");
1484 using _Up = _Bitmask<_S_size>;
1485 return _Up(_M_data0._M_concat_data() | (_Up(_M_data1._M_concat_data(__do_sanitize)) << _N0));
1489 auto __lo = _M_data0._M_concat_data();
1490 auto __hi = __vec_zero_pad_to<sizeof(__lo)>(_M_data1._M_concat_data(__do_sanitize));
1491 return __vec_concat(__lo, __hi);
1495 template <_ArchTraits _Traits = {}>
1496 [[__gnu__::__always_inline__]]
1497 static constexpr basic_mask
1498 _S_partial_mask_of_n(
int __n)
1500#if __has_builtin(__builtin_ia32_bzhi_di)
1501 if constexpr (_S_use_bitmask && _S_size <= 64 && _Traits._M_have_bmi2())
1502 return basic_mask(__builtin_ia32_bzhi_di(~0ull >> (64 - _S_size),
unsigned(__n)));
1504 if constexpr (_N0 == 1)
1506 static_assert(_S_size == 2);
1507 return _S_init(_Mask0(
true), _Mask1(
false));
1510 return _S_init(_Mask0::_S_partial_mask_of_n(__n), _Mask1(
false));
1511 else if (__n == _N0 || _N1 == 1)
1512 return _S_init(_Mask0(
true), _Mask1(
false));
1513 else if constexpr (_N1 != 1)
1514 return _S_init(_Mask0(
true), _Mask1::_S_partial_mask_of_n(__n - _N0));
1517 [[__gnu__::__always_inline__]]
1518 constexpr basic_mask&
1521 _M_data0._M_and_neighbors();
1522 _M_data1._M_and_neighbors();
1526 [[__gnu__::__always_inline__]]
1527 constexpr basic_mask&
1530 _M_data0._M_or_neighbors();
1531 _M_data1._M_or_neighbors();
1535 template <
typename _Mp>
1536 [[__gnu__::__always_inline__]]
1538 _M_chunk() const noexcept
1540 constexpr int __n = _S_size / _Mp::_S_size;
1541 constexpr int __rem = _S_size % _Mp::_S_size;
1542 constexpr auto [...__is] = _IotaArray<__n>;
1543 if constexpr (__rem == 0)
1544 return array<_Mp, __n>{__extract_simd_at<_Mp>(cw<_Mp::_S_size * __is>,
1545 _M_data0, _M_data1)...};
1548 using _Rest = resize_t<__rem, _Mp>;
1549 return tuple(__extract_simd_at<_Mp>(cw<_Mp::_S_size * __is>, _M_data0, _M_data1)...,
1550 __extract_simd_at<_Rest>(cw<_Mp::_S_size * __n>, _M_data0, _M_data1));
1554 [[__gnu__::__always_inline__]]
1555 static constexpr basic_mask
1556 _S_concat(
const basic_mask& __x0)
noexcept
1559 template <
typename... _As>
1560 requires (
sizeof...(_As) >= 2)
1561 [[__gnu__::__always_inline__]]
1562 static constexpr basic_mask
1563 _S_concat(
const basic_mask<_Bytes, _As>&... __xs)
noexcept
1565 static_assert(_S_size == (_As::_S_size + ...));
1566 return _S_init(__extract_simd_at<_Mask0>(cw<0>, __xs...),
1567 __extract_simd_at<_Mask1>(cw<_N0>, __xs...));
1571 basic_mask() =
default;
1577 [[__gnu__::__always_inline__]]
1579 basic_mask(same_as<bool>
auto __x)
noexcept
1580 : _M_data0(__x), _M_data1(__x)
1584 template <
size_t _UBytes,
typename _UAbi>
1585 requires (_S_size == _UAbi::_S_size)
1586 [[__gnu__::__always_inline__]]
1587 constexpr explicit(__is_mask_conversion_explicit<_Ap, _UAbi>(_Bytes, _UBytes))
1588 basic_mask(
const basic_mask<_UBytes, _UAbi>& __x) noexcept
1590 if constexpr (_UAbi::_S_nreg > 1)
1592 return __x._M_data0;
1594 else if constexpr (_N0 == 1)
1595 return _Mask0(__x[0]);
1597 return get<0>(chunk<_N0>(__x));
1600 if constexpr (_UAbi::_S_nreg > 1)
1602 return __x._M_data1;
1604 else if constexpr (_N1 == 1)
1605 return _Mask1(__x[_N0]);
1607 return get<1>(chunk<_N0>(__x));
1611 using _Base::_MaskBase;
1614 template <__simd_generator_invokable<
bool, _S_size> _Fp>
1615 [[__gnu__::__always_inline__]]
1617 basic_mask(_Fp&& __gen)
1618 : _M_data0(__gen), _M_data1([&] [[__gnu__::__always_inline__]] (auto __i) {
1619 return __gen(__simd_size_c<__i + _N0>);
1624 [[__gnu__::__always_inline__]]
1626 basic_mask(
const same_as<bitset<_S_size>>
auto& __b)
noexcept
1627 : _M_data0(__bitset_split<_N0>(__b)._M_lo), _M_data1(__bitset_split<_N0>(__b)._M_hi)
1631 template <
unsigned_
integral _Tp>
1632 requires (!same_as<_Tp, bool>)
1633 [[__gnu__::__always_inline__]]
1635 basic_mask(_Tp __val) noexcept
1636 : _M_data0(
static_cast<_Bitmask<_N0>
>(__val)),
1637 _M_data1(
sizeof(_Tp) * __CHAR_BIT__ > _N0
1638 ?
static_cast<_Bitmask<_N1>
>(__val >> _N0) : _Bitmask<_N1>())
1642 [[__gnu__::__always_inline__]]
1643 constexpr value_type
1644 operator[](__simd_size_type __i)
const
1646 __glibcxx_simd_precondition(__i >= 0 && __i < _S_size,
"subscript is out of bounds");
1647 if (__is_const_known(__i))
1648 return __i < _N0 ? _M_data0[__i] : _M_data1[__i - _N0];
1649 else if constexpr (_M_data1._S_has_bool_member)
1653 return __i < _N0 ? _M_data0[__i] : _M_data1[__i - _N0];
1654 else if constexpr (abi_type::_S_is_bitmask)
1656 using _AliasingByte [[__gnu__::__may_alias__]] =
unsigned char;
1657 return bool((
reinterpret_cast<const _AliasingByte*
>(
this)
1658 [__i / __CHAR_BIT__] >> (__i % __CHAR_BIT__)) & 1);
1662 using _AliasingInt [[__gnu__::__may_alias__]] = __integer_from<_Bytes>;
1663 return reinterpret_cast<const _AliasingInt*
>(
this)[__i] != 0;
1668 [[__gnu__::__always_inline__]]
1669 constexpr basic_mask
1670 operator!() const noexcept
1671 {
return _S_init(!_M_data0, !_M_data1); }
1673 [[__gnu__::__always_inline__]]
1675 operator+() const noexcept requires destructible<_VecType>
1676 {
return _VecType::_S_concat(+_M_data0, +_M_data1); }
1681 [[__gnu__::__always_inline__]]
1683 operator-() const noexcept requires destructible<_VecType>
1684 {
return _VecType::_S_concat(-_M_data0, -_M_data1); }
1689 [[__gnu__::__always_inline__]]
1691 operator~() const noexcept requires destructible<_VecType>
1692 {
return _VecType::_S_concat(~_M_data0, ~_M_data1); }
1695 operator~() const noexcept = delete;
1698 template <typename _Up, typename _UAbi>
1699 requires (_UAbi::_S_size == _S_size)
1700 [[__gnu__::__always_inline__]]
1701 constexpr explicit(sizeof(_Up) != _Bytes)
1702 operator basic_vec<_Up, _UAbi>() const noexcept
1704 using _Rp = basic_vec<_Up, _UAbi>;
1705 return _Rp::_S_init(
static_cast<_Rp::_DataType0
>(_M_data0),
1706 static_cast<_Rp::_DataType1
>(_M_data1));
1709 using _Base::operator basic_vec;
1712 [[__gnu__::__always_inline__]]
1713 constexpr bitset<_S_size>
1714 to_bitset() const noexcept
1716 if constexpr (_S_size <= numeric_limits<unsigned long long>::digits)
1725 } __tmp = {_M_data0.to_bitset(), _M_data1.to_bitset()};
1726 return __builtin_bit_cast(bitset<_S_size>, __tmp);
1730 template <
int _Offset = 0, _ArchTraits _Traits = {}>
1731 [[__gnu__::__always_inline__]]
1735 constexpr int _N0x = _N0;
1738 static_assert(_Offset == 0);
1739 return __trivial_pair {
1740 _M_data0.template _M_to_uint<0>(),
1741 _M_data1.template _M_to_uint<0>()
1747 if constexpr (_Bytes == 2 && !_Traits._M_have_bmi2() && _Ap::_S_nreg == 2
1749 return __similar_mask<char, _S_size, _Ap>(*this).template _M_to_uint<_Offset>();
1751 auto __uint = _M_data1.template _M_to_uint<_N0x + _Offset>();
1752 __uint |= _M_data0.template _M_to_uint<_Offset>();
1757 [[__gnu__::__always_inline__]]
1758 constexpr unsigned long long
1761 if constexpr (_S_size <= numeric_limits<unsigned long long>::digits)
1762 return _M_to_uint();
1765 __glibcxx_simd_precondition(_M_data1.to_ullong() == 0,
1766 "to_ullong called on mask with 'true' elements at indices"
1767 "higher than representable in a ullong");
1768 return _M_data0.to_ullong();
1773 [[__gnu__::__always_inline__]]
1774 friend constexpr basic_mask
1775 operator&&(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1776 {
return _S_init(__x._M_data0 && __y._M_data0, __x._M_data1 && __y._M_data1); }
1778 [[__gnu__::__always_inline__]]
1779 friend constexpr basic_mask
1780 operator||(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1781 {
return _S_init(__x._M_data0 || __y._M_data0, __x._M_data1 || __y._M_data1); }
1783 [[__gnu__::__always_inline__]]
1784 friend constexpr basic_mask
1785 operator&(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1786 {
return _S_init(__x._M_data0 & __y._M_data0, __x._M_data1 & __y._M_data1); }
1788 [[__gnu__::__always_inline__]]
1789 friend constexpr basic_mask
1790 operator|(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1791 {
return _S_init(__x._M_data0 | __y._M_data0, __x._M_data1 | __y._M_data1); }
1793 [[__gnu__::__always_inline__]]
1794 friend constexpr basic_mask
1795 operator^(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1796 {
return _S_init(__x._M_data0 ^ __y._M_data0, __x._M_data1 ^ __y._M_data1); }
1799 [[__gnu__::__always_inline__]]
1800 friend constexpr basic_mask&
1801 operator&=(basic_mask& __x,
const basic_mask& __y)
noexcept
1803 __x._M_data0 &= __y._M_data0;
1804 __x._M_data1 &= __y._M_data1;
1808 [[__gnu__::__always_inline__]]
1809 friend constexpr basic_mask&
1810 operator|=(basic_mask& __x,
const basic_mask& __y)
noexcept
1812 __x._M_data0 |= __y._M_data0;
1813 __x._M_data1 |= __y._M_data1;
1817 [[__gnu__::__always_inline__]]
1818 friend constexpr basic_mask&
1819 operator^=(basic_mask& __x,
const basic_mask& __y)
noexcept
1821 __x._M_data0 ^= __y._M_data0;
1822 __x._M_data1 ^= __y._M_data1;
1827 [[__gnu__::__always_inline__]]
1828 friend constexpr basic_mask
1829 operator==(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1830 {
return !(__x ^ __y); }
1832 [[__gnu__::__always_inline__]]
1833 friend constexpr basic_mask
1834 operator!=(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1835 {
return __x ^ __y; }
1837 [[__gnu__::__always_inline__]]
1838 friend constexpr basic_mask
1839 operator>=(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1840 {
return __x || !__y; }
1842 [[__gnu__::__always_inline__]]
1843 friend constexpr basic_mask
1844 operator<=(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1845 {
return !__x || __y; }
1847 [[__gnu__::__always_inline__]]
1848 friend constexpr basic_mask
1849 operator>(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1850 {
return __x && !__y; }
1852 [[__gnu__::__always_inline__]]
1853 friend constexpr basic_mask
1854 operator<(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1855 {
return !__x && __y; }
1858 [[__gnu__::__always_inline__]]
1859 friend constexpr basic_mask
1860 __select_impl(
const basic_mask& __k,
const basic_mask& __t,
const basic_mask& __f)
noexcept
1862 return _S_init(__select_impl(__k._M_data0, __t._M_data0, __f._M_data0),
1863 __select_impl(__k._M_data1, __t._M_data1, __f._M_data1));
1866 [[__gnu__::__always_inline__]]
1867 friend constexpr basic_mask
1868 __select_impl(
const basic_mask& __k, same_as<bool>
auto __t, same_as<bool>
auto __f)
noexcept
1871 return basic_mask(__t);
1873 return __t ? __k : !__k;
1876 template <__vectorizable _T0, same_as<_T0> _T1>
1877 requires (
sizeof(_T0) == _Bytes)
1878 [[__gnu__::__always_inline__]]
1879 friend constexpr vec<_T0, _S_size>
1880 __select_impl(
const basic_mask& __k,
const _T0& __t,
const _T1& __f)
noexcept
1882 using _Vp = vec<_T0, _S_size>;
1883 if constexpr (!is_same_v<basic_mask, typename _Vp::mask_type>)
1884 return __select_impl(
static_cast<_Vp::mask_type
>(__k), __t, __f);
1886 return _Vp::_S_init(__select_impl(__k._M_data0, __t, __f),
1887 __select_impl(__k._M_data1, __t, __f));
1890 template <_ArchTraits _Traits = {}>
1891 [[__gnu__::__always_inline__]]
1895 if constexpr (_N0 == _N1)
1896 return (_M_data0 && _M_data1)._M_all_of();
1898 return _M_data0._M_all_of() && _M_data1._M_all_of();
1901 template <_ArchTraits _Traits = {}>
1902 [[__gnu__::__always_inline__]]
1906 if constexpr (_N0 == _N1)
1907 return (_M_data0 || _M_data1)._M_any_of();
1909 return _M_data0._M_any_of() || _M_data1._M_any_of();
1912 template <_ArchTraits _Traits = {}>
1913 [[__gnu__::__always_inline__]]
1917 if constexpr (_N0 == _N1)
1918 return (_M_data0 || _M_data1)._M_none_of();
1920 return _M_data0._M_none_of() && _M_data1._M_none_of();
1923 [[__gnu__::__always_inline__]]
1924 constexpr __simd_size_type
1925 _M_reduce_min_index()
const
1927 if constexpr (_S_size <= numeric_limits<unsigned long long>::digits)
1929 const auto __bits = _M_to_uint();
1930 __glibcxx_simd_precondition(__bits,
"An empty mask does not have a min_index.");
1931 if constexpr (_S_size == 1)
1934 return __countr_zero(_M_to_uint());
1936 else if (_M_data0._M_none_of())
1937 return _M_data1._M_reduce_min_index() + _N0;
1939 return _M_data0._M_reduce_min_index();
1942 [[__gnu__::__always_inline__]]
1943 constexpr __simd_size_type
1944 _M_reduce_max_index()
const
1946 if constexpr (_S_size <= numeric_limits<unsigned long long>::digits)
1948 const auto __bits = _M_to_uint();
1949 __glibcxx_simd_precondition(__bits,
"An empty mask does not have a max_index.");
1950 if constexpr (_S_size == 1)
1953 return __highest_bit(_M_to_uint());
1955 else if (_M_data1._M_none_of())
1956 return _M_data0._M_reduce_max_index();
1958 return _M_data1._M_reduce_max_index() + _N0;
1961 [[__gnu__::__always_inline__]]
1962 friend constexpr bool
1963 __is_const_known(
const basic_mask& __x)
1964 {
return __is_const_known(__x._M_data0) && __is_const_known(__x._M_data1); }
1967_GLIBCXX_END_NAMESPACE_VERSION
1970#pragma GCC diagnostic pop
constexpr bool operator<=(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
constexpr bool operator>=(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
constexpr bool operator<(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
constexpr bool operator>(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
constexpr complex< _Tp > operator-(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x minus y.
constexpr complex< _Tp > operator+(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x plus y.
_Tp * end(valarray< _Tp > &__va) noexcept
Return an iterator pointing to one past the last element of the valarray.
_Tp * begin(valarray< _Tp > &__va) noexcept
Return an iterator pointing to the first element of the valarray.
constexpr const _Tp & max(const _Tp &, const _Tp &)
This does what you think it does.
ISO C++ entities toplevel namespace is std.
constexpr auto cend(const _Container &__cont) noexcept(noexcept(std::end(__cont))) -> decltype(std::end(__cont))
Return an iterator pointing to one past the last element of the const container.
constexpr auto size(const _Container &__cont) noexcept(noexcept(__cont.size())) -> decltype(__cont.size())
Return the size of a container.
constexpr bitset< _Nb > operator^(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
constexpr auto cbegin(const _Container &__cont) noexcept(noexcept(std::begin(__cont))) -> decltype(std::begin(__cont))
Return an iterator pointing to the first element of the const container.
constexpr bitset< _Nb > operator|(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
constexpr bitset< _Nb > operator&(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
static constexpr int digits
static constexpr _Tp max() noexcept
static constexpr _Tp min() noexcept