25#ifndef _GLIBCXX_SIMD_VEC_H
26#define _GLIBCXX_SIMD_VEC_H 1
29#pragma GCC system_header
32#if __cplusplus >= 202400L
35#include "simd_flags.h"
42#pragma GCC diagnostic push
43#pragma GCC diagnostic ignored "-Wpsabi"
45namespace std _GLIBCXX_VISIBILITY(default)
47_GLIBCXX_BEGIN_NAMESPACE_VERSION
51 template <
typename _Tp,
typename _Ap>
55 using value_type = _Tp;
59 using mask_type = basic_mask<0, void>;
61#define _GLIBCXX_DELETE_SIMD "This specialization is disabled because of an invalid combination " \
62 "of template arguments to basic_vec."
64 basic_vec() =
delete(_GLIBCXX_DELETE_SIMD);
66 ~basic_vec() =
delete(_GLIBCXX_DELETE_SIMD);
68 basic_vec(
const basic_vec&) =
delete(_GLIBCXX_DELETE_SIMD);
70 basic_vec& operator=(
const basic_vec&) =
delete(_GLIBCXX_DELETE_SIMD);
72#undef _GLIBCXX_DELETE_SIMD
75 template <
typename _Tp,
typename _Ap>
78 using _Vp = basic_vec<_Tp, _Ap>;
81 using value_type = _Tp;
85 using mask_type = basic_mask<
sizeof(_Tp), abi_type>;
87 using iterator = __iterator<_Vp>;
89 using const_iterator = __iterator<const _Vp>;
93 {
return {
static_cast<_Vp&
>(*this), 0}; }
95 constexpr const_iterator
96 begin() const noexcept
99 constexpr const_iterator
100 cbegin() const noexcept
101 {
return {
static_cast<const _Vp&
>(*this), 0}; }
103 constexpr default_sentinel_t
107 constexpr default_sentinel_t
108 cend() const noexcept
111 static constexpr auto size = __simd_size_c<_Ap::_S_size>;
113 _VecBase() =
default;
116 template <
typename _Up,
typename _UAbi>
117 requires (_Ap::_S_size != _UAbi::_S_size)
118 _VecBase(
const basic_vec<_Up, _UAbi>&) =
delete(
"size mismatch");
120 template <
typename _Up,
typename _UAbi>
121 requires (_Ap::_S_size == _UAbi::_S_size) && (!__explicitly_convertible_to<_Up, _Tp>)
123 _VecBase(
const basic_vec<_Up, _UAbi>&)
124 =
delete(
"the value types are not convertible");
126 [[__gnu__::__always_inline__]]
128 operator+(
const _Vp& __x,
const _Vp& __y)
noexcept
135 [[__gnu__::__always_inline__]]
137 operator-(
const _Vp& __x,
const _Vp& __y)
noexcept
144 [[__gnu__::__always_inline__]]
146 operator*(
const _Vp& __x,
const _Vp& __y)
noexcept
153 [[__gnu__::__always_inline__]]
155 operator/(
const _Vp& __x,
const _Vp& __y)
noexcept
162 [[__gnu__::__always_inline__]]
164 operator%(
const _Vp& __x,
const _Vp& __y)
noexcept
165 requires requires (_Tp __a) { __a % __a; }
172 [[__gnu__::__always_inline__]]
174 operator&(
const _Vp& __x,
const _Vp& __y)
noexcept
175 requires requires (_Tp __a) { __a & __a; }
182 [[__gnu__::__always_inline__]]
184 operator|(
const _Vp& __x,
const _Vp& __y)
noexcept
185 requires requires (_Tp __a) { __a | __a; }
192 [[__gnu__::__always_inline__]]
194 operator^(
const _Vp& __x,
const _Vp& __y)
noexcept
195 requires requires (_Tp __a) { __a ^ __a; }
202 [[__gnu__::__always_inline__]]
204 operator<<(
const _Vp& __x,
const _Vp& __y) _GLIBCXX_SIMD_NOEXCEPT
205 requires requires (_Tp __a) { __a << __a; }
212 [[__gnu__::__always_inline__]]
214 operator<<(
const _Vp& __x, __simd_size_type __y) _GLIBCXX_SIMD_NOEXCEPT
215 requires requires (_Tp __a, __simd_size_type __b) { __a << __b; }
222 [[__gnu__::__always_inline__]]
224 operator>>(
const _Vp& __x,
const _Vp& __y) _GLIBCXX_SIMD_NOEXCEPT
225 requires requires (_Tp __a) { __a >> __a; }
232 [[__gnu__::__always_inline__]]
234 operator>>(
const _Vp& __x, __simd_size_type __y) _GLIBCXX_SIMD_NOEXCEPT
235 requires requires (_Tp __a, __simd_size_type __b) { __a >> __b; }
246 template <
integral _Tp>
247 inline constexpr _Tp __max_shift
248 = (
sizeof(_Tp) <
sizeof(int) ?
sizeof(int) :
sizeof(_Tp)) * __CHAR_BIT__;
250 template <__vectorizable _Tp, __abi_tag _Ap>
251 requires (_Ap::_S_nreg == 1)
252 class basic_vec<_Tp, _Ap>
253 : public _VecBase<_Tp, _Ap>
255 template <
typename,
typename>
256 friend class basic_vec;
258 template <
size_t,
typename>
259 friend class basic_mask;
261 static constexpr int _S_size = _Ap::_S_size;
263 static constexpr int _S_full_size = __bit_ceil(
unsigned(_S_size));
265 static constexpr bool _S_is_scalar = _S_size == 1;
267 static constexpr bool _S_use_bitmask = _Ap::_S_is_bitmask && !_S_is_scalar;
269 using _DataType =
typename _Ap::template _DataType<_Tp>;
279 static constexpr bool _S_is_partial =
sizeof(_M_data) >
sizeof(_Tp) * _S_size;
281 using __canon_value_type = __canonical_vec_type_t<_Tp>;
284 using value_type = _Tp;
286 using mask_type = _VecBase<_Tp, _Ap>::mask_type;
289 [[__gnu__::__always_inline__]]
290 static constexpr basic_vec
291 _S_init(_DataType __x)
298 [[__gnu__::__always_inline__]]
299 constexpr const _DataType&
303 [[__gnu__::__always_inline__]]
304 friend constexpr bool
305 __is_const_known(
const basic_vec& __x)
306 {
return __builtin_constant_p(__x._M_data); }
308 [[__gnu__::__always_inline__]]
310 _M_concat_data([[maybe_unused]]
bool __do_sanitize =
false)
const
312 if constexpr (_S_is_scalar)
313 return __vec_builtin_type<__canon_value_type, 1>{_M_data};
318 template <
int _Size = _S_size,
int _Offset = 0,
typename _A0,
typename _Fp>
319 [[__gnu__::__always_inline__]]
320 static constexpr basic_vec
321 _S_static_permute(
const basic_vec<value_type, _A0>& __x, _Fp&& __idxmap)
323 using _Xp = basic_vec<value_type, _A0>;
325 if constexpr (_S_is_scalar)
327 constexpr __simd_size_type __j = [&]
consteval {
328 if constexpr (__index_permutation_function_sized<_Fp>)
329 return __idxmap(_Offset, _Size);
331 return __idxmap(_Offset);
333 if constexpr (__j == simd::zero_element || __j == simd::uninit_element)
336 static_assert(__j >= 0 && __j < _Xp::_S_size);
337 __r._M_data = __x[__j];
341 auto __idxmap2 = [=](
auto __i)
consteval {
342 if constexpr (int(__i + _Offset) >= _Size)
343 return __simd_size_c<simd::uninit_element>;
344 else if constexpr (__index_permutation_function_sized<_Fp>)
345 return __simd_size_c<__idxmap(__i + _Offset, _Size)>;
347 return __simd_size_c<__idxmap(__i + _Offset)>;
349 constexpr auto __adj_idx = [](
auto __i) {
350 constexpr int __j = __i;
351 if constexpr (__j == simd::zero_element)
352 return __simd_size_c<__bit_ceil(
unsigned(_Xp::_S_size))>;
353 else if constexpr (__j == simd::uninit_element)
354 return __simd_size_c<-1>;
357 static_assert(__j >= 0 && __j < _Xp::_S_size);
358 return __simd_size_c<__j>;
361 constexpr auto [...__is0] = _IotaArray<_S_size>;
362 constexpr bool __needs_zero_element
363 = ((__idxmap2(__simd_size_c<__is0>).value == simd::zero_element) || ...);
364 constexpr auto [...__is_full] = _IotaArray<_S_full_size>;
365 if constexpr (_A0::_S_nreg == 2 && !__needs_zero_element)
367 __r._M_data = __builtin_shufflevector(
368 __x._M_data0._M_data, __x._M_data1._M_data,
369 __adj_idx(__idxmap2(__simd_size_c<__is_full>)).value...);
373 __r._M_data = __builtin_shufflevector(
374 __x._M_concat_data(),
decltype(__x._M_concat_data())(),
375 __adj_idx(__idxmap2(__simd_size_c<__is_full>)).value...);
381 template <
typename _Vp>
382 [[__gnu__::__always_inline__]]
384 _M_chunk() const noexcept
386 constexpr int __n = _S_size / _Vp::_S_size;
387 constexpr int __rem = _S_size % _Vp::_S_size;
388 constexpr auto [...__is] = _IotaArray<__n>;
389 if constexpr (__rem == 0)
390 return array<_Vp, __n> {__extract_simd_at<_Vp>(cw<_Vp::_S_size * __is>, *
this)...};
393 using _Rest = resize_t<__rem, _Vp>;
394 return tuple(__extract_simd_at<_Vp>(cw<_Vp::_S_size * __is>, *
this)...,
395 __extract_simd_at<_Rest>(cw<_Vp::_S_size * __n>, *
this));
399 [[__gnu__::__always_inline__]]
400 static constexpr basic_vec
401 _S_concat(
const basic_vec& __x0)
noexcept
404 template <
typename... _As>
405 requires (
sizeof...(_As) > 1)
406 [[__gnu__::__always_inline__]]
407 static constexpr basic_vec
408 _S_concat(
const basic_vec<value_type, _As>&... __xs)
noexcept
410 static_assert(_S_size == (_As::_S_size + ...));
411 return __extract_simd_at<basic_vec>(cw<0>, __xs...);
438 template <
int _Shift, _ArchTraits _Traits = {}>
439 [[__gnu__::__always_inline__]]
441 _M_elements_shifted_to_front()
const
443 static_assert(_Shift < _S_size && -_Shift < _S_size);
444 if constexpr (_Shift == 0)
447 else if (!__is_const_known(*
this))
449 if constexpr (
sizeof(_M_data) == 16 && _Shift > 0)
450 return reinterpret_cast<_DataType
>(
451 __builtin_ia32_psrldqi128(__vec_bit_cast<long long>(_M_data),
452 _Shift *
sizeof(value_type) * 8));
453 else if constexpr (
sizeof(_M_data) == 16 && _Shift < 0)
454 return reinterpret_cast<_DataType
>(
455 __builtin_ia32_pslldqi128(__vec_bit_cast<long long>(_M_data),
456 -_Shift *
sizeof(value_type) * 8));
457 else if constexpr (
sizeof(_M_data) < 16)
459 auto __x =
reinterpret_cast<__vec_builtin_type_bytes<long long, 16>
>(
460 __vec_zero_pad_to_16(_M_data));
461 if constexpr (_Shift > 0)
462 __x = __builtin_ia32_psrldqi128(__x, _Shift *
sizeof(value_type) * 8);
464 __x = __builtin_ia32_pslldqi128(__x, -_Shift *
sizeof(value_type) * 8);
465 return _VecOps<_DataType>::_S_extract(__vec_bit_cast<__canon_value_type>(__x));
469 return _S_static_permute(*
this, [](
int __i)
consteval {
470 int __off = __i + _Shift;
471 return __off >= _S_size || __off < 0 ? zero_element : __off;
481 template <
typename _Vp, __canon_value_type __
id>
482 [[__gnu__::__always_inline__]]
484 _M_pad_to_T_with_value() const noexcept
486 static_assert(!_Vp::_S_is_partial);
487 static_assert(_Ap::_S_nreg == 1);
488 if constexpr (
sizeof(_Vp) == 32)
490 static_assert(
sizeof(_M_data) == 32);
491 constexpr auto __k = _Vp::mask_type::_S_partial_mask_of_n(_S_size);
492 return __select_impl(__k, _Vp::_S_init(_M_data), __id);
496 static_assert(
sizeof(_Vp) <= 16);
497 static_assert(
sizeof(_M_data) <=
sizeof(_Vp));
498 _Vp __v1 = __vec_zero_pad_to<sizeof(_Vp)>(_M_data);
499 if constexpr (__id == 0 && _S_is_partial)
503 __v1 = __v1.template _M_elements_shifted_to_front<-(_Vp::_S_size - _S_size)>();
504 else if constexpr (_Vp::_S_size - _S_size == 1)
506 __vec_set(__v1._M_data, _Vp::_S_size - 1, __id);
507 else if constexpr (__has_single_bit(
unsigned(_Vp::_S_size - _S_size)))
509 constexpr int __n = _Vp::_S_size - _S_size;
510 using _Ip = __integer_from<__n *
sizeof(__canon_value_type)>;
511 constexpr auto [...__is] = _IotaArray<__n>;
512 constexpr __canon_value_type __idn[__n] = {((void)__is, __id)...};
513 auto __vn = __vec_bit_cast<_Ip>(__v1._M_data);
514 __vec_set(__vn, _Vp::_S_size / __n - 1, __builtin_bit_cast(_Ip, __idn));
515 __v1._M_data =
reinterpret_cast<typename _Vp::_DataType
>(__vn);
517 else if constexpr (__id != 0 && !_S_is_partial)
521 constexpr _Vp __idn([](
int __i) {
522 return __i >= _S_size ? __id : __canon_value_type();
524 __v1._M_data = __vec_or(__v1._M_data, __idn._M_data);
526 else if constexpr (__id != 0 || _S_is_partial)
528 constexpr auto __k = _Vp::mask_type::_S_partial_mask_of_n(_S_size);
529 __v1 = __select_impl(__k, __v1, __id);
535 [[__gnu__::__always_inline__]]
537 _M_reduce_to_half(
auto __binary_op)
const
539 static_assert(__has_single_bit(
unsigned(_S_size)));
540 auto [__a, __b] = chunk<_S_size / 2>(*this);
541 return __binary_op(__a, __b);
544 template <
typename _Rest,
typename _BinaryOp>
545 [[__gnu__::__always_inline__]]
547 _M_reduce_tail(
const _Rest& __rest, _BinaryOp __binary_op)
const
549 if constexpr (_S_is_scalar)
550 return __binary_op(*
this, __rest)._M_data;
551 else if constexpr (_Rest::_S_size == _S_size)
552 return __binary_op(*
this, __rest)._M_reduce(__binary_op);
553 else if constexpr (_Rest::_S_size > _S_size)
555 auto [__a, __b] = __rest.template _M_chunk<basic_vec>();
556 return __binary_op(*
this, __a)._M_reduce_tail(__b, __binary_op);
558 else if constexpr (_Rest::_S_size == 1)
559 return __binary_op(_Rest(_M_reduce(__binary_op)), __rest)[0];
560 else if constexpr (
sizeof(_M_data) <= 16
561 &&
requires { __default_identity_element<__canon_value_type, _BinaryOp>(); })
563 constexpr __canon_value_type __id
564 = __default_identity_element<__canon_value_type, _BinaryOp>();
565 return __binary_op(_M_data, __rest.template _M_pad_to_T_with_value<basic_vec, __id>())
566 ._M_reduce(__binary_op);
569 return _M_reduce_to_half(__binary_op)._M_reduce_tail(__rest, __binary_op);
578 template <
typename _BinaryOp, _ArchTraits _Traits = {}>
579 [[__gnu__::__always_inline__]]
581 _M_reduce(_BinaryOp __binary_op)
const
583 constexpr bool __have_id_elem
584 =
requires { __default_identity_element<__canon_value_type, _BinaryOp>(); };
585 if constexpr (_S_size == 1)
586 return operator[](0);
587 else if constexpr (_Traits.template _M_eval_as_f32<value_type>()
588 && (is_same_v<_BinaryOp, plus<>>
589 || is_same_v<_BinaryOp, multiplies<>>))
590 return value_type(rebind_t<float, basic_vec>(*this)._M_reduce(__binary_op));
592 else if constexpr (is_integral_v<value_type> &&
sizeof(value_type) == 1
593 && is_same_v<
decltype(__binary_op), multiplies<>>)
608 if constexpr (!_S_is_partial)
610 using _V16 = resize_t<_S_size / 2, rebind_t<unsigned short, basic_vec>>;
611 auto __a = __builtin_bit_cast(_V16, *
this);
612 return __binary_op(__a, __a >> 8)._M_reduce(__binary_op);
616 using _V16 = rebind_t<unsigned short, basic_vec>;
617 return _V16(*this)._M_reduce(__binary_op);
621 else if constexpr (__has_single_bit(
unsigned(_S_size)))
623 if constexpr (
sizeof(_M_data) > 16)
624 return _M_reduce_to_half(__binary_op)._M_reduce(__binary_op);
625 else if constexpr (_S_size == 2)
626 return _M_reduce_to_half(__binary_op)[0];
629 static_assert(_S_size <= 16);
632 if constexpr (
sizeof(_M_data) <= 16 && is_integral_v<value_type>)
634 if constexpr (_S_size > 8)
635 __x = __binary_op(__x, __x.template _M_elements_shifted_to_front<8>());
636 if constexpr (_S_size > 4)
637 __x = __binary_op(__x, __x.template _M_elements_shifted_to_front<4>());
638 if constexpr (_S_size > 2)
639 __x = __binary_op(__x, __x.template _M_elements_shifted_to_front<2>());
642 return __binary_op(__x, __x.template _M_elements_shifted_to_front<1>())[0];
645 if constexpr (_S_size > 8)
646 __x = __binary_op(__x, _S_static_permute(__x, _SwapNeighbors<8>()));
647 if constexpr (_S_size > 4)
648 __x = __binary_op(__x, _S_static_permute(__x, _SwapNeighbors<4>()));
651 if constexpr (is_integral_v<value_type> &&
sizeof(value_type) <= 1)
652 return value_type(resize_t<4, rebind_t<int, basic_vec>>(chunk<4>(__x)[0])
653 ._M_reduce(__binary_op));
655 if constexpr (_S_size > 2)
656 __x = __binary_op(__x, _S_static_permute(__x, _SwapNeighbors<2>()));
657 if constexpr (is_integral_v<value_type> &&
sizeof(value_type) == 2)
658 return __binary_op(__x, _S_static_permute(__x, _SwapNeighbors<1>()))[0];
660 return __binary_op(vec<value_type, 1>(__x[0]), vec<value_type, 1>(__x[1]))[0];
663 else if constexpr (
sizeof(_M_data) == 32)
665 const auto [__lo, __hi] = chunk<__bit_floor(unsigned(_S_size))>(*
this);
666 return __lo._M_reduce_tail(__hi, __binary_op);
668 else if constexpr (
sizeof(_M_data) == 64)
672 auto __chunked = chunk<__bit_floor(
unsigned(_S_size)) / 2>(*this);
673 using _Cp =
decltype(__chunked);
674 if constexpr (tuple_size_v<_Cp> == 4)
676 const auto& [__a, __b, __c, __rest] = __chunked;
677 constexpr bool __amd_cpu = _Traits._M_have_sse4a();
678 if constexpr (__have_id_elem && __rest._S_size > 1 && __amd_cpu)
684 const auto& [__a, __rest] = chunk<__bit_floor(unsigned(_S_size))>(*
this);
685 using _Vp = remove_cvref_t<
decltype(__a)>;
686 constexpr __canon_value_type __id
687 = __default_identity_element<__canon_value_type, _BinaryOp>();
688 const _Vp __b = __rest.template _M_pad_to_T_with_value<_Vp, __id>();
689 return __binary_op(__a, __b)._M_reduce(__binary_op);
691 else if constexpr (__have_id_elem && __rest._S_size > 1)
697 using _Vp = remove_cvref_t<
decltype(__a)>;
698 constexpr __canon_value_type __id
699 = __default_identity_element<__canon_value_type, _BinaryOp>();
700 const _Vp __d = __rest.template _M_pad_to_T_with_value<_Vp, __id>();
701 return __binary_op(__binary_op(__a, __b), __binary_op(__c, __d))
702 ._M_reduce(__binary_op);
705 return __binary_op(__binary_op(__a, __b), __c)
706 ._M_reduce_tail(__rest, __binary_op);
708 else if constexpr (tuple_size_v<_Cp> == 3)
710 const auto& [__a, __b, __rest] = __chunked;
711 return __binary_op(__a, __b)._M_reduce_tail(__rest, __binary_op);
714 static_assert(
false);
716 else if constexpr (__have_id_elem)
718 constexpr __canon_value_type __id
719 = __default_identity_element<__canon_value_type, _BinaryOp>();
720 using _Vp = resize_t<__bit_ceil(
unsigned(_S_size)), basic_vec>;
721 return _M_pad_to_T_with_value<_Vp, __id>()._M_reduce(__binary_op);
725 const auto& [__a, __rest] = chunk<__bit_floor(unsigned(_S_size))>(*
this);
726 return __a._M_reduce_tail(__rest, __binary_op);
735 template <_OptTraits _Traits = {}>
736 [[__gnu__::__always_inline__]]
738 _M_isnan() const requires is_floating_point_v<value_type>
740 if constexpr (_Traits._M_finite_math_only())
741 return mask_type(
false);
742 else if constexpr (_S_is_scalar)
743 return mask_type(std::isnan(_M_data));
744 else if constexpr (_S_use_bitmask)
745 return _M_isunordered(*
this);
746 else if constexpr (!_Traits._M_support_snan())
747 return !(*
this == *
this);
748 else if (__is_const_known(_M_data))
749 return mask_type([&](
int __i) {
return std::isnan(_M_data[__i]); });
753 using _Ip = __integer_from<
sizeof(value_type)>;
755 < __builtin_bit_cast(rebind_t<_Ip, basic_vec>, _M_fabs());
759 template <_TargetTraits _Traits = {}>
760 [[__gnu__::__always_inline__]]
762 _M_isinf() const requires is_floating_point_v<value_type>
764 if constexpr (_Traits._M_finite_math_only())
765 return mask_type(
false);
766 else if constexpr (_S_is_scalar)
767 return mask_type(std::isinf(_M_data));
768 else if (__is_const_known(_M_data))
769 return mask_type([&](
int __i) {
return std::isinf(_M_data[__i]); });
771 else if constexpr (_S_use_bitmask)
772 return mask_type::_S_init(__x86_bitmask_isinf(_M_data));
773 else if constexpr (_Traits._M_have_avx512dq())
774 return __x86_bit_to_vecmask<typename mask_type::_DataType>(
775 __x86_bitmask_isinf(_M_data));
779 using _Ip = __integer_from<
sizeof(value_type)>;
780 return __vec_bit_cast<_Ip>(_M_fabs()._M_data)
785 [[__gnu__::__always_inline__]]
787 _M_abs() const requires signed_integral<value_type>
788 {
return _M_data < 0 ? -_M_data : _M_data; }
790 [[__gnu__::__always_inline__]]
792 _M_fabs() const requires floating_point<value_type>
794 if constexpr (_S_is_scalar)
797 return __vec_and(__vec_not(_S_signmask<_DataType>), _M_data);
800 template <_TargetTraits _Traits = {}>
801 [[__gnu__::__always_inline__]]
803 _M_isunordered(basic_vec __y)
const requires is_floating_point_v<value_type>
805 if constexpr (_Traits._M_finite_math_only())
806 return mask_type(
false);
807 else if constexpr (_S_is_scalar)
808 return mask_type(std::isunordered(_M_data, __y._M_data));
810 else if constexpr (_S_use_bitmask)
811 return _M_bitmask_cmp<_X86Cmp::_Unord>(__y._M_data);
814 return mask_type([&](
int __i) {
815 return std::isunordered(_M_data[__i], __y._M_data[__i]);
826 template <
typename _Up, _ArchTraits _Traits = {}>
827 static inline basic_vec
828 _S_partial_load(
const _Up* __mem,
size_t __n)
830 if constexpr (_S_is_scalar)
831 return __n == 0 ? basic_vec() : basic_vec(
static_cast<value_type
>(*__mem));
832 else if (__is_const_known_equal_to(__n >=
size_t(_S_size),
true))
833 return basic_vec(_LoadCtorTag(), __mem);
834 else if constexpr (!__converts_trivially<_Up, value_type>)
835 return static_cast<basic_vec
>(rebind_t<_Up, basic_vec>::_S_partial_load(__mem, __n));
839 if constexpr (_Traits._M_have_avx512f()
840 || (_Traits._M_have_avx() &&
sizeof(_Up) >= 4))
842 const auto __k = __n < _S_size ? mask_type::_S_partial_mask_of_n(
int(__n))
844 return _S_masked_load(__mem, mask_type::_S_partial_mask_of_n(
int(__n)));
847 if (__n >=
size_t(_S_size)) [[unlikely]]
848 return basic_vec(_LoadCtorTag(), __mem);
851 else if (__is_const_known_equal_to(
853 return __select_impl(mask_type::_S_partial_mask_of_n(
int(__n)),
854 basic_vec(_LoadCtorTag(), __mem), basic_vec());
856 else if constexpr (_S_size > 4)
858 alignas(_DataType)
byte __dst[
sizeof(_DataType)] = {};
859 const byte* __src =
reinterpret_cast<const byte*
>(__mem);
860 __memcpy_chunks<sizeof(_Up), sizeof(_DataType)>(__dst, __src, __n);
861 return __builtin_bit_cast(_DataType, __dst);
863 else if (__n == 0) [[unlikely]]
865 else if constexpr (_S_size == 2)
866 return _DataType {
static_cast<value_type
>(__mem[0]), 0};
869 constexpr auto [...__is] = _IotaArray<_S_size - 2>;
871 static_cast<value_type
>(__mem[0]),
872 static_cast<value_type
>(__is + 1 < __n ? __mem[__is + 1] : 0)...
890 template <
typename _Up, _ArchTraits _Traits = {}>
891 static inline basic_vec
892 _S_masked_load(
const _Up* __mem, mask_type __k)
894 if constexpr (_S_size == 1)
895 return __k[0] ?
static_cast<value_type
>(__mem[0]) : value_type();
897 else if constexpr (_Traits._M_have_avx512f())
898 return __x86_masked_load<_DataType>(__mem, __k._M_data);
899 else if constexpr (_Traits._M_have_avx() && (
sizeof(_Up) == 4 ||
sizeof(_Up) == 8))
901 if constexpr (__converts_trivially<_Up, value_type>)
902 return __x86_masked_load<_DataType>(__mem, __k._M_data);
905 using _UV = rebind_t<_Up, basic_vec>;
906 return basic_vec(_UV::_S_masked_load(__mem,
typename _UV::mask_type(__k)));
910 else if (__k._M_none_of()) [[unlikely]]
912 else if constexpr (_S_is_scalar)
913 return basic_vec(
static_cast<value_type
>(*__mem));
917 _Bitmask<_S_size < 32 ? 32 : _S_size> __bits = __k._M_to_uint();
918 [[assume(__bits != 0)]];
919 if constexpr (__converts_trivially<_Up, value_type>)
922 __bit_foreach(__bits, [&] [[__gnu__::__always_inline__]] (
int __i) {
923 __r[__i] = __mem[__i];
929 using _UV = rebind_t<_Up, basic_vec>;
930 alignas(_UV) _Up __tmp[
sizeof(_UV) /
sizeof(_Up)] = {};
931 __bit_foreach(__bits, [&] [[__gnu__::__always_inline__]] (
int __i) {
932 __tmp[__i] = __mem[__i];
934 return basic_vec(__builtin_bit_cast(_UV, __tmp));
939 template <
typename _Up>
940 [[__gnu__::__always_inline__]]
942 _M_store(_Up* __mem)
const
944 if constexpr (__converts_trivially<value_type, _Up>)
945 __builtin_memcpy(__mem, &_M_data,
sizeof(_Up) * _S_size);
947 rebind_t<_Up, basic_vec>(*this)._M_store(__mem);
959 template <
typename _Up, _ArchTraits _Traits = {}>
961 _S_partial_store(
const basic_vec __v, _Up* __mem,
size_t __n)
963 if (__is_const_known_equal_to(__n >= _S_size,
true))
966 else if constexpr (_Traits._M_have_avx512f() && !_S_is_scalar)
968 const auto __k = __n < _S_size ? mask_type::_S_partial_mask_of_n(
int(__n))
970 return _S_masked_store(__v, __mem, __k);
973 else if (__n >= _S_size) [[unlikely]]
975 else if (__n == 0) [[unlikely]]
977 else if constexpr (__converts_trivially<value_type, _Up>)
979 byte* __dst =
reinterpret_cast<byte*
>(__mem);
980 const byte* __src =
reinterpret_cast<const byte*
>(&__v._M_data);
981 __memcpy_chunks<sizeof(_Up), sizeof(_M_data)>(__dst, __src, __n);
985 using _UV = rebind_t<_Up, basic_vec>;
986 _UV::_S_partial_store(_UV(__v), __mem, __n);
1003 template <
typename _Up, _ArchTraits _Traits = {}>
1006 _S_masked_store(
const basic_vec __v, _Up* __mem,
const mask_type __k)
1009 if constexpr (_Traits._M_have_avx512f())
1011 __x86_masked_store(__v._M_data, __mem, __k._M_data);
1014 else if constexpr (_Traits._M_have_avx() && (
sizeof(_Up) == 4 ||
sizeof(_Up) == 8))
1016 if constexpr (__converts_trivially<value_type, _Up>)
1017 __x86_masked_store(__v._M_data, __mem, __k._M_data);
1020 using _UV = rebind_t<_Up, basic_vec>;
1021 _UV::_S_masked_store(_UV(__v), __mem,
typename _UV::mask_type(__k));
1026 if (__k._M_none_of()) [[unlikely]]
1028 else if constexpr (_S_is_scalar)
1029 __mem[0] = __v._M_data;
1033 _Bitmask<_S_size < 32 ? 32 : _S_size> __bits = __k._M_to_uint();
1034 [[assume(__bits != 0)]];
1035 if constexpr (__converts_trivially<value_type, _Up>)
1037 __bit_foreach(__bits, [&] [[__gnu__::__always_inline__]] (
int __i) {
1038 __mem[__i] = __v[__i];
1043 const rebind_t<_Up, basic_vec> __cvted(__v);
1044 __bit_foreach(__bits, [&] [[__gnu__::__always_inline__]] (
int __i) {
1045 __mem[__i] = __cvted[__i];
1052 basic_vec() =
default;
1055 using _NativeVecType =
decltype([] {
1056 if constexpr (_S_is_scalar)
1057 return __vec_builtin_type<__canon_value_type, 1>();
1074 basic_vec(_NativeVecType __x)
1075 : _M_data([&] [[__gnu__::__always_inline__]] {
1076 if constexpr (_S_is_scalar)
1093 operator _NativeVecType()
const
1095 if constexpr (_S_is_scalar)
1096 return _NativeVecType{_M_data};
1105 template <__vec_builtin _IV>
1106 requires same_as<__x86_intel_intrin_value_type<value_type>, __vec_value_type<_IV>>
1107 && (
sizeof(_IV) ==
sizeof(_DataType) &&
sizeof(_IV) >= 16
1108 && !is_same_v<_IV, _DataType>)
1111 : _M_data(reinterpret_cast<_DataType>(__x))
1117 template <__vec_builtin _IV>
1118 requires same_as<__x86_intel_intrin_value_type<value_type>, __vec_value_type<_IV>>
1119 && (
sizeof(_IV) ==
sizeof(_DataType) &&
sizeof(_IV) >= 16
1120 && !is_same_v<_IV, _DataType>)
1122 operator _IV()
const
1123 {
return reinterpret_cast<_IV
>(_M_data); }
1138 template <__explicitly_convertible_to<value_type> _Up>
1139 [[__gnu__::__always_inline__]]
1140 constexpr explicit(!__broadcast_constructible<_Up, value_type>)
1141 basic_vec(_Up&& __x) noexcept
1142 : _M_data(_DataType() == _DataType() ?
static_cast<value_type
>(__x) : value_type())
1145 template <__simd_vec_bcast_consteval<value_type> _Up>
1147 basic_vec(_Up&& __x)
1148 : _M_data(_DataType() == _DataType()
1149 ? __value_preserving_cast<value_type>(__x) : value_type())
1153 template <
typename _Up,
typename _UAbi, _TargetTraits _Traits = {}>
1154 requires (_S_size == _UAbi::_S_size)
1155 && __explicitly_convertible_to<_Up, value_type>
1156 [[__gnu__::__always_inline__]]
1158 explicit(!__value_preserving_convertible_to<_Up, value_type>
1159 || __higher_rank_than<_Up, value_type>)
1160 basic_vec(
const basic_vec<_Up, _UAbi>& __x) noexcept
1161 : _M_data([&] [[__gnu__::__always_inline__]] {
1162 if constexpr (_S_is_scalar)
1163 return static_cast<value_type
>(__x[0]);
1164 else if constexpr (_UAbi::_S_nreg >= 2)
1168 return _S_concat(resize_t<__x._N0, basic_vec>(__x._M_data0),
1169 resize_t<__x._N1, basic_vec>(__x._M_data1))._M_data;
1171 return __vec_cast<_DataType>(__x._M_concat_data());
1175 using _VecBase<_Tp, _Ap>::_VecBase;
1178 template <__simd_generator_invokable<value_type, _S_size> _Fp>
1179 [[__gnu__::__always_inline__]]
1181 basic_vec(_Fp&& __gen)
1182 : _M_data([&] [[__gnu__::__always_inline__]] {
1183 constexpr auto [...__is] = _IotaArray<_S_size>;
1184 return _DataType{
static_cast<value_type
>(__gen(__simd_size_c<__is>))...};
1189 template <
typename _Up>
1190 [[__gnu__::__always_inline__]]
1192 basic_vec(_LoadCtorTag,
const _Up* __ptr)
1195 if constexpr (_S_is_scalar)
1196 _M_data =
static_cast<value_type
>(__ptr[0]);
1199 constexpr auto [...__is] = _IotaArray<_S_size>;
1200 _M_data = _DataType{
static_cast<value_type
>(__ptr[__is])...};
1204 if constexpr (__converts_trivially<_Up, value_type>)
1206 __builtin_memcpy(&_M_data, __ptr,
sizeof(value_type) * _S_size);
1209 __vec_builtin_type<_Up, _S_full_size> __tmp = {};
1210 __builtin_memcpy(&__tmp, __ptr,
sizeof(_Up) * _S_size);
1211 _M_data = __vec_cast<_DataType>(__tmp);
1216 template <ranges::contiguous_range _Rg,
typename... _Flags>
1217 requires __static_sized_range<_Rg, _S_size>
1218 && __vectorizable<ranges::range_value_t<_Rg>>
1219 && __explicitly_convertible_to<ranges::range_value_t<_Rg>, value_type>
1220 [[__gnu__::__always_inline__]]
1222 basic_vec(_Rg&& __range, flags<_Flags...> __flags = {})
1223 : basic_vec(_LoadCtorTag(), __flags.template _S_adjust_pointer<basic_vec>(
1224 ranges::
data(__range)))
1226 static_assert(__loadstore_convertible_to<ranges::range_value_t<_Rg>, value_type,
1236 [[__gnu__::__always_inline__]]
1237 constexpr value_type
1238 operator[](__simd_size_type __i)
const
1240 __glibcxx_simd_precondition(__i >= 0 && __i < _S_size,
"subscript is out of bounds");
1241 if constexpr (_S_is_scalar)
1244 return _M_data[__i];
1250 [[__gnu__::__always_inline__]]
1251 constexpr basic_vec&
1252 operator++() noexcept requires requires(value_type __a) { ++__a; }
1253 {
return *
this += value_type(1); }
1255 [[__gnu__::__always_inline__]]
1257 operator++(
int)
noexcept requires requires(value_type __a) { __a++; }
1259 basic_vec __r = *
this;
1260 *
this += value_type(1);
1264 [[__gnu__::__always_inline__]]
1265 constexpr basic_vec&
1266 operator--() noexcept requires requires(value_type __a) { --__a; }
1267 {
return *
this -= value_type(1); }
1269 [[__gnu__::__always_inline__]]
1271 operator--(
int)
noexcept requires requires(value_type __a) { __a--; }
1273 basic_vec __r = *
this;
1274 *
this -= value_type(1);
1278 [[__gnu__::__always_inline__]]
1280 operator!() const noexcept requires requires(value_type __a) { !__a; }
1281 {
return *
this == value_type(); }
1288 [[__gnu__::__always_inline__]]
1290 operator+() const noexcept requires requires(value_type __a) { +__a; }
1298 [[__gnu__::__always_inline__]]
1300 operator-() const noexcept requires requires(value_type __a) { -__a; }
1301 {
return _S_init(-_M_data); }
1308 [[__gnu__::__always_inline__]]
1310 operator~() const noexcept requires requires(value_type __a) { ~__a; }
1311 {
return _S_init(~_M_data); }
1319 [[__gnu__::__always_inline__]]
1320 friend constexpr basic_vec&
1321 operator&=(basic_vec& __x,
const basic_vec& __y)
noexcept
1322 requires requires(value_type __a) { __a & __a; }
1324 __x._M_data &= __y._M_data;
1333 [[__gnu__::__always_inline__]]
1334 friend constexpr basic_vec&
1335 operator|=(basic_vec& __x,
const basic_vec& __y)
noexcept
1336 requires requires(value_type __a) { __a | __a; }
1338 __x._M_data |= __y._M_data;
1347 [[__gnu__::__always_inline__]]
1348 friend constexpr basic_vec&
1349 operator^=(basic_vec& __x,
const basic_vec& __y)
noexcept
1350 requires requires(value_type __a) { __a ^ __a; }
1352 __x._M_data ^= __y._M_data;
1365 [[__gnu__::__always_inline__]]
1366 friend constexpr basic_vec&
1367 operator+=(basic_vec& __x,
const basic_vec& __y)
noexcept
1368 requires requires(value_type __a) { __a + __a; }
1370 if constexpr (_S_is_partial && is_integral_v<value_type> && is_signed_v<value_type>)
1385 using _UV =
typename _Ap::template _DataType<make_unsigned_t<value_type>>;
1386 const _DataType __result
1387 =
reinterpret_cast<_DataType
>(
reinterpret_cast<_UV
>(__x._M_data)
1388 +
reinterpret_cast<_UV
>(__y._M_data));
1389 const auto __positive = __y > value_type();
1390 const auto __overflow = __positive != (__result > __x);
1391 if (__overflow._M_any_of())
1392 __builtin_unreachable();
1393 __x._M_data = __result;
1395 else if constexpr (_TargetTraits()._M_eval_as_f32<value_type>())
1396 __x = basic_vec(rebind_t<float, basic_vec>(__x) + __y);
1398 __x._M_data += __y._M_data;
1404 [[__gnu__::__always_inline__]]
1405 friend constexpr basic_vec&
1406 operator-=(basic_vec& __x,
const basic_vec& __y)
noexcept
1407 requires requires(value_type __a) { __a - __a; }
1409 if constexpr (_S_is_partial && is_integral_v<value_type> && is_signed_v<value_type>)
1411 using _UV =
typename _Ap::template _DataType<make_unsigned_t<value_type>>;
1412 const _DataType __result
1413 =
reinterpret_cast<_DataType
>(
reinterpret_cast<_UV
>(__x._M_data)
1414 -
reinterpret_cast<_UV
>(__y._M_data));
1415 const auto __positive = __y > value_type();
1416 const auto __overflow = __positive != (__result < __x);
1417 if (__overflow._M_any_of())
1418 __builtin_unreachable();
1419 __x._M_data = __result;
1421 else if constexpr (_TargetTraits()._M_eval_as_f32<value_type>())
1422 __x = basic_vec(rebind_t<float, basic_vec>(__x) - __y);
1424 __x._M_data -= __y._M_data;
1430 [[__gnu__::__always_inline__]]
1431 friend constexpr basic_vec&
1432 operator*=(basic_vec& __x,
const basic_vec& __y)
noexcept
1433 requires requires(value_type __a) { __a * __a; }
1435 if constexpr (_S_is_partial && is_integral_v<value_type> && is_signed_v<value_type>)
1437 for (
int __i = 0; __i < _S_size; ++__i)
1439 if (__builtin_mul_overflow_p(__x._M_data[__i], __y._M_data[__i], value_type()))
1440 __builtin_unreachable();
1442 using _UV =
typename _Ap::template _DataType<make_unsigned_t<value_type>>;
1443 __x._M_data =
reinterpret_cast<_DataType
>(
reinterpret_cast<_UV
>(__x._M_data)
1444 *
reinterpret_cast<_UV
>(__y._M_data));
1450 else if constexpr (_S_is_scalar && is_unsigned_v<value_type>
1451 && is_signed_v<
decltype(value_type() * value_type())>)
1452 __x._M_data =
unsigned(__x._M_data) * unsigned(__y._M_data);
1454 else if constexpr (_TargetTraits()._M_eval_as_f32<value_type>())
1455 __x = basic_vec(rebind_t<float, basic_vec>(__x) * __y);
1458 __x._M_data *= __y._M_data;
1462 template <_TargetTraits _Traits = {}>
1463 [[__gnu__::__always_inline__]]
1464 friend constexpr basic_vec&
1465 operator/=(basic_vec& __x,
const basic_vec& __y)
noexcept
1466 requires requires(value_type __a) { __a / __a; }
1468 const basic_vec __result([&](
int __i) -> value_type {
return __x[__i] / __y[__i]; });
1469 if (__is_const_known(__result))
1471 return __x = __result;
1479 if constexpr (is_integral_v<value_type> && _S_size > 2
1480 && __value_preserving_convertible_to<value_type, double>)
1484 if (!__is_const_known(__y))
1487 if constexpr (_Traits._M_have_avx512fp16()
1488 && __value_preserving_convertible_to<value_type, _Float16>)
1489 return __x = basic_vec(rebind_t<_Float16, basic_vec>(__x) / __y);
1490 else if constexpr (__value_preserving_convertible_to<value_type, float>)
1491 return __x = basic_vec(rebind_t<float, basic_vec>(__x) / __y);
1493 return __x = basic_vec(rebind_t<double, basic_vec>(__x) / __y);
1497 if constexpr (_Traits._M_eval_as_f32<value_type>())
1498 return __x = basic_vec(rebind_t<float, basic_vec>(__x) / __y);
1500 basic_vec __y1 = __y;
1501 if constexpr (_S_is_partial)
1503 if constexpr (is_integral_v<value_type>)
1507 for (
int __i = 0; __i < _S_size; ++__i)
1508 __x._M_data[__i] /= __y._M_data[__i];
1512 __y1 = __select_impl(mask_type::_S_init(mask_type::_S_implicit_mask),
1513 __y, basic_vec(value_type(1)));
1515 __x._M_data /= __y1._M_data;
1519 [[__gnu__::__always_inline__]]
1520 friend constexpr basic_vec&
1521 operator%=(basic_vec& __x,
const basic_vec& __y)
noexcept
1522 requires requires(value_type __a) { __a % __a; }
1524 static_assert(is_integral_v<value_type>);
1525 if constexpr (_S_is_partial)
1527 const basic_vec __y1 = __select_impl(mask_type::_S_init(mask_type::_S_implicit_mask),
1528 __y, basic_vec(value_type(1)));
1529 if (__is_const_known(__y1))
1530 __x._M_data %= __y1._M_data;
1535 for (
int __i = 0; __i < _S_size; ++__i)
1536 __x._M_data[__i] %= __y._M_data[__i];
1540 __x._M_data %= __y._M_data;
1544 [[__gnu__::__always_inline__]]
1545 friend constexpr basic_vec&
1546 operator<<=(basic_vec& __x,
const basic_vec& __y) _GLIBCXX_SIMD_NOEXCEPT
1547 requires requires(value_type __a) { __a << __a; }
1549 __glibcxx_simd_precondition(is_unsigned_v<value_type> || all_of(__y >= value_type()),
1550 "negative shift is undefined behavior");
1551 __glibcxx_simd_precondition(all_of(__y < __max_shift<value_type>),
1552 "too large shift invokes undefined behavior");
1553 __x._M_data <<= __y._M_data;
1557 [[__gnu__::__always_inline__]]
1558 friend constexpr basic_vec&
1559 operator>>=(basic_vec& __x,
const basic_vec& __y) _GLIBCXX_SIMD_NOEXCEPT
1560 requires requires(value_type __a) { __a >> __a; }
1562 __glibcxx_simd_precondition(is_unsigned_v<value_type> || all_of(__y >= value_type()),
1563 "negative shift is undefined behavior");
1564 __glibcxx_simd_precondition(all_of(__y < __max_shift<value_type>),
1565 "too large shift invokes undefined behavior");
1566 __x._M_data >>= __y._M_data;
1570 [[__gnu__::__always_inline__]]
1571 friend constexpr basic_vec&
1572 operator<<=(basic_vec& __x, __simd_size_type __y) _GLIBCXX_SIMD_NOEXCEPT
1573 requires requires(value_type __a, __simd_size_type __b) { __a << __b; }
1575 __glibcxx_simd_precondition(__y >= 0,
"negative shift is undefined behavior");
1576 __glibcxx_simd_precondition(__y <
int(__max_shift<value_type>),
1577 "too large shift invokes undefined behavior");
1578 __x._M_data <<= __y;
1582 [[__gnu__::__always_inline__]]
1583 friend constexpr basic_vec&
1584 operator>>=(basic_vec& __x, __simd_size_type __y) _GLIBCXX_SIMD_NOEXCEPT
1585 requires requires(value_type __a, __simd_size_type __b) { __a >> __b; }
1587 __glibcxx_simd_precondition(__y >= 0,
"negative shift is undefined behavior");
1588 __glibcxx_simd_precondition(__y <
int(__max_shift<value_type>),
1589 "too large shift invokes undefined behavior");
1590 __x._M_data >>= __y;
1596 template <_X86Cmp _Cmp>
1597 [[__gnu__::__always_inline__]]
1599 _M_bitmask_cmp(_DataType __y)
const
1601 static_assert(_S_use_bitmask);
1602 if (__is_const_known(_M_data, __y))
1604 constexpr auto [...__is] = _IotaArray<_S_size>;
1605 constexpr auto __cmp_op = [] [[__gnu__::__always_inline__]]
1606 (value_type __a, value_type __b) {
1607 if constexpr (_Cmp == _X86Cmp::_Eq)
1609 else if constexpr (_Cmp == _X86Cmp::_Lt)
1611 else if constexpr (_Cmp == _X86Cmp::_Le)
1613 else if constexpr (_Cmp == _X86Cmp::_Unord)
1614 return std::isunordered(__a, __b);
1615 else if constexpr (_Cmp == _X86Cmp::_Neq)
1617 else if constexpr (_Cmp == _X86Cmp::_Nlt)
1618 return !(__a < __b);
1619 else if constexpr (_Cmp == _X86Cmp::_Nle)
1620 return !(__a <= __b);
1622 static_assert(
false);
1624 const _Bitmask<_S_size> __bits
1625 = ((__cmp_op(__vec_get(_M_data, __is), __vec_get(__y, __is))
1626 ? (1ULL << __is) : 0) | ...);
1627 return mask_type::_S_init(__bits);
1630 return mask_type::_S_init(__x86_bitmask_cmp<_Cmp>(_M_data, __y));
1634 [[__gnu__::__always_inline__]]
1635 friend constexpr mask_type
1636 operator==(
const basic_vec& __x,
const basic_vec& __y)
noexcept
1639 if constexpr (_S_use_bitmask)
1640 return __x._M_bitmask_cmp<_X86Cmp::_Eq>(__y._M_data);
1643 return mask_type::_S_init(__x._M_data == __y._M_data);
1646 [[__gnu__::__always_inline__]]
1647 friend constexpr mask_type
1648 operator!=(
const basic_vec& __x,
const basic_vec& __y)
noexcept
1651 if constexpr (_S_use_bitmask)
1652 return __x._M_bitmask_cmp<_X86Cmp::_Neq>(__y._M_data);
1655 return mask_type::_S_init(__x._M_data != __y._M_data);
1658 [[__gnu__::__always_inline__]]
1659 friend constexpr mask_type
1660 operator<(
const basic_vec& __x,
const basic_vec& __y)
noexcept
1663 if constexpr (_S_use_bitmask)
1664 return __x._M_bitmask_cmp<_X86Cmp::_Lt>(__y._M_data);
1667 return mask_type::_S_init(__x._M_data < __y._M_data);
1670 [[__gnu__::__always_inline__]]
1671 friend constexpr mask_type
1672 operator<=(
const basic_vec& __x,
const basic_vec& __y)
noexcept
1675 if constexpr (_S_use_bitmask)
1676 return __x._M_bitmask_cmp<_X86Cmp::_Le>(__y._M_data);
1679 return mask_type::_S_init(__x._M_data <= __y._M_data);
1682 [[__gnu__::__always_inline__]]
1683 friend constexpr mask_type
1684 operator>(
const basic_vec& __x,
const basic_vec& __y)
noexcept
1685 {
return __y < __x; }
1687 [[__gnu__::__always_inline__]]
1688 friend constexpr mask_type
1689 operator>=(
const basic_vec& __x,
const basic_vec& __y)
noexcept
1690 {
return __y <= __x; }
1693 template <_TargetTraits _Traits = {}>
1694 [[__gnu__::__always_inline__]]
1695 friend constexpr basic_vec
1696 __select_impl(
const mask_type& __k,
const basic_vec& __t,
const basic_vec& __f)
noexcept
1698 if constexpr (_S_size == 1)
1699 return __k[0] ? __t : __f;
1700 else if constexpr (_S_use_bitmask)
1703 if (__is_const_known(__k, __t, __f))
1704 return basic_vec([&](
int __i) {
return __k[__i] ? __t[__i] : __f[__i]; });
1706 return __x86_bitmask_blend(__k._M_data, __t._M_data, __f._M_data);
1708 static_assert(
false,
"TODO");
1713 return __k._M_data ? __t._M_data : __f._M_data;
1717 constexpr bool __uses_simd_register =
sizeof(_M_data) >= 8;
1718 using _VO = _VecOps<_DataType>;
1719 if (_VO::_S_is_const_known_equal_to(__f._M_data, 0))
1721 if (is_integral_v<value_type> && __uses_simd_register
1722 && _VO::_S_is_const_known_equal_to(__t._M_data, 1))
1727 return basic_vec((-__k)._M_abs());
1729 return __vec_and(
reinterpret_cast<_DataType
>(__k._M_data), __t._M_data);
1731 else if (_VecOps<_DataType>::_S_is_const_known_equal_to(__t._M_data, 0))
1733 if (is_integral_v<value_type> && __uses_simd_register
1734 && _VO::_S_is_const_known_equal_to(__f._M_data, 1))
1735 return value_type(1) + basic_vec(-__k);
1737 return __vec_and(
reinterpret_cast<_DataType
>(__vec_not(__k._M_data)), __f._M_data);
1746 return __k._M_data < 0 ? __t._M_data : __f._M_data;
1748 return __k._M_data ? __t._M_data : __f._M_data;
1754 template <__vectorizable _Tp, __abi_tag _Ap>
1755 requires (_Ap::_S_nreg > 1)
1756 class basic_vec<_Tp, _Ap>
1757 : public _VecBase<_Tp, _Ap>
1759 template <
typename,
typename>
1760 friend class basic_vec;
1762 template <
size_t,
typename>
1763 friend class basic_mask;
1765 static constexpr int _S_size = _Ap::_S_size;
1767 static constexpr int _N0 = __bit_ceil(
unsigned(_S_size)) / 2;
1769 static constexpr int _N1 = _S_size - _N0;
1771 using _DataType0 = __similar_vec<_Tp, _N0, _Ap>;
1774 static_assert(_N0 *
sizeof(_Tp) ==
sizeof(_DataType0));
1776 using _DataType1 = __similar_vec<_Tp, _N1, _Ap>;
1778 static_assert(_DataType0::abi_type::_S_nreg + _DataType1::abi_type::_S_nreg == _Ap::_S_nreg);
1780 static constexpr bool _S_is_scalar = _DataType0::_S_is_scalar;
1782 _DataType0 _M_data0;
1784 _DataType1 _M_data1;
1786 static constexpr bool _S_use_bitmask = _Ap::_S_is_bitmask;
1788 static constexpr bool _S_is_partial = _DataType1::_S_is_partial;
1791 using value_type = _Tp;
1793 using mask_type = _VecBase<_Tp, _Ap>::mask_type;
1795 [[__gnu__::__always_inline__]]
1796 static constexpr basic_vec
1797 _S_init(
const _DataType0& __x,
const _DataType1& __y)
1805 [[__gnu__::__always_inline__]]
1806 constexpr const _DataType0&
1808 {
return _M_data0; }
1810 [[__gnu__::__always_inline__]]
1811 constexpr const _DataType1&
1813 {
return _M_data1; }
1815 [[__gnu__::__always_inline__]]
1816 friend constexpr bool
1817 __is_const_known(
const basic_vec& __x)
1818 {
return __is_const_known(__x._M_data0) && __is_const_known(__x._M_data1); }
1820 [[__gnu__::__always_inline__]]
1822 _M_concat_data([[maybe_unused]]
bool __do_sanitize =
false)
const
1824 return __vec_concat(_M_data0._M_concat_data(
false),
1825 __vec_zero_pad_to<
sizeof(_M_data0)>(
1826 _M_data1._M_concat_data(__do_sanitize)));
1829 template <
int _Size = _S_size,
int _Offset = 0,
typename _A0,
typename _Fp>
1830 [[__gnu__::__always_inline__]]
1831 static constexpr basic_vec
1832 _S_static_permute(
const basic_vec<value_type, _A0>& __x, _Fp&& __idxmap)
1835 _DataType0::template _S_static_permute<_Size, _Offset>(__x, __idxmap),
1836 _DataType1::template _S_static_permute<_Size, _Offset + _N0>(__x, __idxmap));
1839 template <
typename _Vp>
1840 [[__gnu__::__always_inline__]]
1842 _M_chunk() const noexcept
1844 constexpr int __n = _S_size / _Vp::_S_size;
1845 constexpr int __rem = _S_size % _Vp::_S_size;
1846 constexpr auto [...__is] = _IotaArray<__n>;
1847 if constexpr (__rem == 0)
1848 return array<_Vp, __n>{__extract_simd_at<_Vp>(cw<_Vp::_S_size * __is>,
1849 _M_data0, _M_data1)...};
1852 using _Rest = resize_t<__rem, _Vp>;
1853 return tuple(__extract_simd_at<_Vp>(cw<_Vp::_S_size * __is>, _M_data0, _M_data1)...,
1854 __extract_simd_at<_Rest>(cw<_Vp::_S_size * __n>, _M_data0, _M_data1));
1858 [[__gnu__::__always_inline__]]
1859 static constexpr const basic_vec&
1860 _S_concat(
const basic_vec& __x0)
noexcept
1863 template <
typename... _As>
1864 requires (
sizeof...(_As) >= 2)
1865 [[__gnu__::__always_inline__]]
1866 static constexpr basic_vec
1867 _S_concat(
const basic_vec<value_type, _As>&... __xs)
noexcept
1869 static_assert(_S_size == (_As::_S_size + ...));
1870 return _S_init(__extract_simd_at<_DataType0>(cw<0>, __xs...),
1871 __extract_simd_at<_DataType1>(cw<_N0>, __xs...));
1874 [[__gnu__::__always_inline__]]
1876 _M_reduce_to_half(
auto __binary_op)
const requires (_N0 == _N1)
1877 {
return __binary_op(_M_data0, _M_data1); }
1879 [[__gnu__::__always_inline__]]
1880 constexpr value_type
1881 _M_reduce_tail(
const auto& __rest,
auto __binary_op)
const
1883 if constexpr (__rest.size() > _S_size)
1885 auto [__a, __b] = __rest.template _M_chunk<basic_vec>();
1886 return __binary_op(*
this, __a)._M_reduce_tail(__b, __binary_op);
1888 else if constexpr (__rest.size() == _S_size)
1889 return __binary_op(*
this, __rest)._M_reduce(__binary_op);
1891 return _M_reduce_to_half(__binary_op)._M_reduce_tail(__rest, __binary_op);
1894 template <
typename _BinaryOp, _TargetTraits _Traits = {}>
1895 [[__gnu__::__always_inline__]]
1896 constexpr value_type
1897 _M_reduce(_BinaryOp __binary_op)
const
1899 if constexpr (_Traits.template _M_eval_as_f32<value_type>()
1900 && (is_same_v<_BinaryOp, plus<>>
1901 || is_same_v<_BinaryOp, multiplies<>>))
1902 return value_type(rebind_t<float, basic_vec>(*this)._M_reduce(__binary_op));
1904 else if constexpr (is_integral_v<value_type> &&
sizeof(value_type) == 1
1905 && is_same_v<
decltype(__binary_op), multiplies<>>)
1918 if constexpr (_DataType1::_S_is_scalar)
1919 return __binary_op(_DataType1(_M_data0._M_reduce(__binary_op)), _M_data1)[0];
1921 else if constexpr (_S_size % 2 == 0)
1923 using _V16 = resize_t<_S_size / 2, rebind_t<unsigned short, basic_vec>>;
1924 auto __a = __builtin_bit_cast(_V16, *
this);
1925 return __binary_op(__a, __a >> __CHAR_BIT__)._M_reduce(__binary_op);
1929 using _V16 = rebind_t<unsigned short, basic_vec>;
1930 return _V16(*this)._M_reduce(__binary_op);
1935 return _M_data0._M_reduce_tail(_M_data1, __binary_op);
1938 [[__gnu__::__always_inline__]]
1940 _M_isnan() const requires is_floating_point_v<value_type>
1941 {
return mask_type::_S_init(_M_data0._M_isnan(), _M_data1._M_isnan()); }
1943 [[__gnu__::__always_inline__]]
1945 _M_isinf() const requires is_floating_point_v<value_type>
1946 {
return mask_type::_S_init(_M_data0._M_isinf(), _M_data1._M_isinf()); }
1948 [[__gnu__::__always_inline__]]
1950 _M_isunordered(basic_vec __y)
const requires is_floating_point_v<value_type>
1952 return mask_type::_S_init(_M_data0._M_isunordered(__y._M_data0),
1953 _M_data1._M_isunordered(__y._M_data1));
1956 [[__gnu__::__always_inline__]]
1958 _M_abs() const requires signed_integral<value_type>
1959 {
return _S_init(_M_data0._M_abs(), _M_data1._M_abs()); }
1961 [[__gnu__::__always_inline__]]
1963 _M_fabs() const requires floating_point<value_type>
1964 {
return _S_init(_M_data0._M_fabs(), _M_data1._M_fabs()); }
1966 template <
typename _Up>
1967 [[__gnu__::__always_inline__]]
1968 static inline basic_vec
1969 _S_partial_load(
const _Up* __mem,
size_t __n)
1972 return _S_init(_DataType0(_LoadCtorTag(), __mem),
1973 _DataType1::_S_partial_load(__mem + _N0, __n - _N0));
1975 return _S_init(_DataType0::_S_partial_load(__mem, __n),
1979 template <
typename _Up, _ArchTraits _Traits = {}>
1980 static inline basic_vec
1981 _S_masked_load(
const _Up* __mem, mask_type __k)
1983 return _S_init(_DataType0::_S_masked_load(__mem, __k._M_data0),
1984 _DataType1::_S_masked_load(__mem + _N0, __k._M_data1));
1987 template <
typename _Up>
1988 [[__gnu__::__always_inline__]]
1990 _M_store(_Up* __mem)
const
1992 _M_data0._M_store(__mem);
1993 _M_data1._M_store(__mem + _N0);
1996 template <
typename _Up>
1997 [[__gnu__::__always_inline__]]
1999 _S_partial_store(
const basic_vec& __v, _Up* __mem,
size_t __n)
2003 __v._M_data0._M_store(__mem);
2004 _DataType1::_S_partial_store(__v._M_data1, __mem + _N0, __n - _N0);
2008 _DataType0::_S_partial_store(__v._M_data0, __mem, __n);
2012 template <
typename _Up>
2013 [[__gnu__::__always_inline__]]
2015 _S_masked_store(
const basic_vec& __v, _Up* __mem,
const mask_type& __k)
2017 _DataType0::_S_masked_store(__v._M_data0, __mem, __k._M_data0);
2018 _DataType1::_S_masked_store(__v._M_data1, __mem + _N0, __k._M_data1);
2021 basic_vec() =
default;
2024 using _NativeVecType = __vec_builtin_type<value_type, __bit_ceil(
unsigned(_S_size))>;
2026 [[__gnu__::__always_inline__]]
2028 basic_vec(
const _NativeVecType& __x)
2029 : _M_data0(_VecOps<__vec_builtin_type<value_type, _N0>>::_S_extract(__x)),
2030 _M_data1(_VecOps<__vec_builtin_type<value_type, __bit_ceil(unsigned(_N1))>>
2031 ::_S_extract(__x, integral_constant<int, _N0>()))
2034 [[__gnu__::__always_inline__]]
2036 operator _NativeVecType()
const
2037 {
return _M_concat_data(); }
2040 template <__explicitly_convertible_to<value_type> _Up>
2041 [[__gnu__::__always_inline__]]
2042 constexpr explicit(!__broadcast_constructible<_Up, value_type>)
2043 basic_vec(_Up&& __x) noexcept
2044 : _M_data0(
static_cast<value_type
>(__x)), _M_data1(
static_cast<value_type
>(__x))
2047 template <__simd_vec_bcast_consteval<value_type> _Up>
2049 basic_vec(_Up&& __x)
2050 : _M_data0(__value_preserving_cast<value_type>(__x)),
2051 _M_data1(__value_preserving_cast<value_type>(__x))
2055 template <
typename _Up,
typename _UAbi>
2056 requires (_S_size == _UAbi::_S_size)
2057 && __explicitly_convertible_to<_Up, value_type>
2058 [[__gnu__::__always_inline__]]
2060 explicit(!__value_preserving_convertible_to<_Up, value_type>
2061 || __higher_rank_than<_Up, value_type>)
2062 basic_vec(
const basic_vec<_Up, _UAbi>& __x) noexcept
2063 : _M_data0(get<0>(chunk<_N0>(__x))),
2064 _M_data1(get<1>(chunk<_N0>(__x)))
2067 using _VecBase<_Tp, _Ap>::_VecBase;
2070 template <__simd_generator_invokable<value_type, _S_size> _Fp>
2071 [[__gnu__::__always_inline__]]
2073 basic_vec(_Fp&& __gen)
2074 : _M_data0(__gen), _M_data1([&] [[__gnu__::__always_inline__]] (auto __i) {
2075 return __gen(__simd_size_c<__i + _N0>);
2080 template <
typename _Up>
2081 [[__gnu__::__always_inline__]]
2083 basic_vec(_LoadCtorTag,
const _Up* __ptr)
2084 : _M_data0(_LoadCtorTag(), __ptr),
2085 _M_data1(_LoadCtorTag(), __ptr + _N0)
2088 template <ranges::contiguous_range _Rg,
typename... _Flags>
2089 requires __static_sized_range<_Rg, _S_size>
2090 && __vectorizable<ranges::range_value_t<_Rg>>
2091 && __explicitly_convertible_to<ranges::range_value_t<_Rg>, value_type>
2093 basic_vec(_Rg&& __range, flags<_Flags...> __flags = {})
2094 : basic_vec(_LoadCtorTag(),
2095 __flags.template _S_adjust_pointer<basic_vec>(ranges::
data(__range)))
2097 static_assert(__loadstore_convertible_to<ranges::range_value_t<_Rg>, value_type,
2102 [[__gnu__::__always_inline__]]
2103 constexpr value_type
2104 operator[](__simd_size_type __i)
const
2106 __glibcxx_simd_precondition(__i >= 0 && __i < _S_size,
"subscript is out of bounds");
2107 if (__is_const_known(__i))
2108 return __i < _N0 ? _M_data0[__i] : _M_data1[__i - _N0];
2111 using _AliasingT [[__gnu__::__may_alias__]] = value_type;
2112 return reinterpret_cast<const _AliasingT*
>(
this)[__i];
2117 [[__gnu__::__always_inline__]]
2118 constexpr basic_vec&
2119 operator++() noexcept requires requires(value_type __a) { ++__a; }
2126 [[__gnu__::__always_inline__]]
2128 operator++(
int)
noexcept requires requires(value_type __a) { __a++; }
2130 basic_vec __r = *
this;
2136 [[__gnu__::__always_inline__]]
2137 constexpr basic_vec&
2138 operator--() noexcept requires requires(value_type __a) { --__a; }
2145 [[__gnu__::__always_inline__]]
2147 operator--(
int)
noexcept requires requires(value_type __a) { __a--; }
2149 basic_vec __r = *
this;
2155 [[__gnu__::__always_inline__]]
2157 operator!() const noexcept requires requires(value_type __a) { !__a; }
2158 {
return mask_type::_S_init(!_M_data0, !_M_data1); }
2160 [[__gnu__::__always_inline__]]
2162 operator+() const noexcept requires requires(value_type __a) { +__a; }
2165 [[__gnu__::__always_inline__]]
2167 operator-() const noexcept requires requires(value_type __a) { -__a; }
2168 {
return _S_init(-_M_data0, -_M_data1); }
2170 [[__gnu__::__always_inline__]]
2172 operator~() const noexcept requires requires(value_type __a) { ~__a; }
2173 {
return _S_init(~_M_data0, ~_M_data1); }
2176#define _GLIBCXX_SIMD_DEFINE_OP(sym) \
2177 [[__gnu__::__always_inline__]] \
2178 friend constexpr basic_vec& \
2179 operator sym##=(basic_vec& __x, const basic_vec& __y) _GLIBCXX_SIMD_NOEXCEPT \
2181 __x._M_data0 sym##= __y._M_data0; \
2182 __x._M_data1 sym##= __y._M_data1; \
2186 _GLIBCXX_SIMD_DEFINE_OP(+)
2187 _GLIBCXX_SIMD_DEFINE_OP(-)
2188 _GLIBCXX_SIMD_DEFINE_OP(*)
2189 _GLIBCXX_SIMD_DEFINE_OP(/)
2190 _GLIBCXX_SIMD_DEFINE_OP(%)
2191 _GLIBCXX_SIMD_DEFINE_OP(&)
2192 _GLIBCXX_SIMD_DEFINE_OP(|)
2193 _GLIBCXX_SIMD_DEFINE_OP(^)
2194 _GLIBCXX_SIMD_DEFINE_OP(<<)
2195 _GLIBCXX_SIMD_DEFINE_OP(>>)
2197#undef _GLIBCXX_SIMD_DEFINE_OP
2199 [[__gnu__::__always_inline__]]
2200 friend constexpr basic_vec&
2201 operator<<=(basic_vec& __x, __simd_size_type __y) _GLIBCXX_SIMD_NOEXCEPT
2202 requires requires(value_type __a, __simd_size_type __b) { __a << __b; }
2204 __x._M_data0 <<= __y;
2205 __x._M_data1 <<= __y;
2209 [[__gnu__::__always_inline__]]
2210 friend constexpr basic_vec&
2211 operator>>=(basic_vec& __x, __simd_size_type __y) _GLIBCXX_SIMD_NOEXCEPT
2212 requires requires(value_type __a, __simd_size_type __b) { __a >> __b; }
2214 __x._M_data0 >>= __y;
2215 __x._M_data1 >>= __y;
2220 [[__gnu__::__always_inline__]]
2221 friend constexpr mask_type
2222 operator==(
const basic_vec& __x,
const basic_vec& __y)
noexcept
2223 {
return mask_type::_S_init(__x._M_data0 == __y._M_data0, __x._M_data1 == __y._M_data1); }
2225 [[__gnu__::__always_inline__]]
2226 friend constexpr mask_type
2227 operator!=(
const basic_vec& __x,
const basic_vec& __y)
noexcept
2228 {
return mask_type::_S_init(__x._M_data0 != __y._M_data0, __x._M_data1 != __y._M_data1); }
2230 [[__gnu__::__always_inline__]]
2231 friend constexpr mask_type
2232 operator<(
const basic_vec& __x,
const basic_vec& __y)
noexcept
2233 {
return mask_type::_S_init(__x._M_data0 < __y._M_data0, __x._M_data1 < __y._M_data1); }
2235 [[__gnu__::__always_inline__]]
2236 friend constexpr mask_type
2237 operator<=(
const basic_vec& __x,
const basic_vec& __y)
noexcept
2238 {
return mask_type::_S_init(__x._M_data0 <= __y._M_data0, __x._M_data1 <= __y._M_data1); }
2240 [[__gnu__::__always_inline__]]
2241 friend constexpr mask_type
2242 operator>(
const basic_vec& __x,
const basic_vec& __y)
noexcept
2243 {
return mask_type::_S_init(__x._M_data0 > __y._M_data0, __x._M_data1 > __y._M_data1); }
2245 [[__gnu__::__always_inline__]]
2246 friend constexpr mask_type
2247 operator>=(
const basic_vec& __x,
const basic_vec& __y)
noexcept
2248 {
return mask_type::_S_init(__x._M_data0 >= __y._M_data0, __x._M_data1 >= __y._M_data1); }
2251 [[__gnu__::__always_inline__]]
2252 friend constexpr basic_vec
2253 __select_impl(
const mask_type& __k,
const basic_vec& __t,
const basic_vec& __f)
noexcept
2255 return _S_init(__select_impl(__k._M_data0, __t._M_data0, __f._M_data0),
2256 __select_impl(__k._M_data1, __t._M_data1, __f._M_data1));
2261 template <ranges::contiguous_range _Rg,
typename... _Ts>
2262 requires __static_sized_range<_Rg>
2263 basic_vec(_Rg&& __r, _Ts...)
2264 -> basic_vec<ranges::range_value_t<_Rg>,
2265 __deduce_abi_t<ranges::range_value_t<_Rg>,
2267 static_cast<__simd_size_type
>(ranges::size(__r))>>;
2269 static_cast<__simd_size_type
>(
decltype(std::span(__r))::extent)>>;
2272 template <
size_t _Bytes,
typename _Ap>
2273 basic_vec(basic_mask<_Bytes, _Ap>)
2274 -> basic_vec<__integer_from<_Bytes>,
2275 decltype(__abi_rebind<__integer_from<_Bytes>, basic_mask<_Bytes, _Ap>::size.value,
2279 template <__vectorizable _Tp>
2280 requires is_arithmetic_v<_Tp>
2281 inline constexpr _Tp
2282 __iota<_Tp> = _Tp();
2284 template <
typename _Tp,
typename _Ap>
2285 inline constexpr basic_vec<_Tp, _Ap>
2286 __iota<basic_vec<_Tp, _Ap>> = basic_vec<_Tp, _Ap>([](_Tp __i) -> _Tp {
2288 "iota object would overflow");
2292_GLIBCXX_END_NAMESPACE_VERSION
2295#pragma GCC diagnostic pop
constexpr bool operator<=(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
constexpr bool operator>=(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
constexpr bool operator<(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
constexpr bool operator>(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
constexpr complex< _Tp > operator-(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x minus y.
constexpr complex< _Tp > operator+(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x plus y.
bool is_sufficiently_aligned(_Tp *__ptr)
Is __ptr aligned to an _Align byte boundary?
ISO C++ entities toplevel namespace is std.
_Tp fabs(const std::complex< _Tp > &__z)
fabs(__z) TR1 8.1.8 [tr.c99.cmplx.fabs]
constexpr auto data(_Container &__cont) noexcept(noexcept(__cont.data())) -> decltype(__cont.data())
Return the data pointer of a container.
static constexpr _Tp max() noexcept
static constexpr _Tp infinity() noexcept