25#ifndef _GLIBCXX_VEC_OPS_H
26#define _GLIBCXX_VEC_OPS_H 1
29#pragma GCC system_header
32#if __cplusplus >= 202400L
34#include "simd_details.h"
40#pragma GCC diagnostic push
41#pragma GCC diagnostic ignored "-Wpsabi"
43namespace std _GLIBCXX_VISIBILITY(default)
45_GLIBCXX_BEGIN_NAMESPACE_VERSION
48 template <std::
signed_
integral _Tp>
50 __signed_has_single_bit(_Tp __x)
56 template <__vectorizable _Tp,
size_t _Bytes>
57 requires (__has_single_bit(_Bytes))
58 using __vec_builtin_type_bytes [[__gnu__::__vector_size__(_Bytes)]] = _Tp;
63 template <__vectorizable _Tp, __simd_
size_type _W
idth>
64 requires (__signed_has_single_bit(_Width))
65 using __vec_builtin_type = __vec_builtin_type_bytes<_Tp,
sizeof(_Tp) * _Width>;
70 template <
typename _Tp,
typename _ValueType,
71 __simd_size_type _Width =
sizeof(_Tp) /
sizeof(_ValueType)>
73 = !is_class_v<_Tp> && !is_pointer_v<_Tp> && !is_arithmetic_v<_Tp>
74 && __vectorizable<_ValueType>
75 && _Width >= 1 &&
sizeof(_Tp) /
sizeof(_ValueType) == _Width
76 &&
same_as<__vec_builtin_type_bytes<_ValueType,
sizeof(_Tp)>, _Tp>
77 &&
requires(_Tp& __v, _ValueType __x) { __v[0] = __x; };
82 template <
typename _Tp>
89 template <__vec_builtin _Tp>
90 using __vec_value_type = remove_cvref_t<decltype(declval<const _Tp>()[0])>;
95 template <
typename _Tp>
96 inline constexpr __simd_size_type __width_of = 1;
98 template <
typename _Tp>
100 inline constexpr __simd_size_type __width_of<_Tp> =
sizeof(_Tp) /
sizeof(__vec_value_type<_Tp>);
105 template <__simd_
size_type _Np, __vec_builtin _TV>
106 using __resize_vec_builtin_t = __vec_builtin_type<__vec_value_type<_TV>, _Np>;
108 template <__vec_builtin _TV>
109 requires (__width_of<_TV> > 1)
110 using __half_vec_builtin_t = __resize_vec_builtin_t<__width_of<_TV> / 2, _TV>;
112 template <__vec_builtin _TV>
113 using __double_vec_builtin_t = __resize_vec_builtin_t<__width_of<_TV> * 2, _TV>;
115 template <
typename _Up, __vec_builtin _TV>
116 [[__gnu__::__always_inline__]]
117 constexpr __vec_builtin_type_bytes<_Up,
sizeof(_TV)>
118 __vec_bit_cast(_TV __v)
119 {
return reinterpret_cast<__vec_builtin_type_bytes<_Up, sizeof(_TV)
>>(__v); }
121 template <
int _Np, __vec_builtin _TV>
122 requires signed_integral<__vec_value_type<_TV>>
123 static constexpr _TV _S_vec_implicit_mask = []<
int... _Is> (
integer_sequence<int, _Is...>) {
124 return _TV{ (_Is < _Np ? -1 : 0)... };
130 template <__vec_builtin _TV>
131 [[__gnu__::__always_inline__]]
132 constexpr __vec_value_type<_TV>
133 __vec_get(_TV __v,
int __i)
138 return __builtin_bit_cast(array<__vec_value_type<_TV>, __width_of<_TV>>, __v)[__i];
151 template <__vec_builtin _TV>
152 [[__gnu__::__always_inline__]]
154 __vec_set(_TV& __v,
int __i, __vec_value_type<_TV> __x)
159 auto __arr = __builtin_bit_cast(array<__vec_value_type<_TV>, __width_of<_TV>>, __v);
161 __v = __builtin_bit_cast(_TV, __arr);
163 constexpr auto [...__j] = _IotaArray<__width_of<_TV>>;
164 __v = _TV{(__i == __j ? __x : __v[__j])...};
176 template <__vec_builtin _TV>
177 [[__gnu__::__always_inline__]]
178 constexpr __vec_builtin_type<__vec_value_type<_TV>, __width_of<_TV> * 2>
179 __vec_concat(_TV __a, _TV __b)
181 constexpr auto [...__is] = _IotaArray<__width_of<_TV> * 2>;
182 return __builtin_shufflevector(__a, __b, __is...);
197 [[__gnu__::__always_inline__]]
198 constexpr __vec_builtin_type<__vec_value_type<_TV0>,
199 __bit_ceil(
unsigned(_N0 + (_N1 + ... + _Ns)))>
200 __vec_concat_sized(
const _TV0& __a,
const _TV1& __b,
const _TVs&... __rest);
204 requires (__has_single_bit(
unsigned(_N0))) && (_N0 >= (_N1 + _N2))
205 [[__gnu__::__always_inline__]]
206 constexpr __vec_builtin_type<__vec_value_type<_TV0>,
207 __bit_ceil(
unsigned(_N0 + _N1 + (_N2 + ... + _Ns)))>
208 __vec_concat_sized(
const _TV0& __a,
const _TV1& __b,
const _TV2& __c,
const _TVs&... __rest)
210 return __vec_concat_sized<_N0, _N1 + _N2, _Ns...>(
211 __a, __vec_concat_sized<_N1, _N2>(__b, __c), __rest...);
216 [[__gnu__::__always_inline__]]
217 constexpr __vec_builtin_type<__vec_value_type<_TV0>,
218 __bit_ceil(
unsigned(_N0 + (_N1 + ... + _Ns)))>
219 __vec_concat_sized(
const _TV0& __a,
const _TV1& __b,
const _TVs&... __rest)
222 constexpr auto [...__is] = _IotaArray<__bit_ceil(
unsigned(_N0 + _N1)),
int>;
223 const auto __ab = __builtin_shufflevector(__a, __b, [](
int __i)
consteval {
226 else if (__i < _N0 + _N1)
227 return __i - _N0 + __width_of<_TV0>;
231 if constexpr (
sizeof...(__rest) == 0)
234 return __vec_concat_sized<_N0 + _N1, _Ns...>(__ab, __rest...);
237 template <__vec_builtin _TV>
238 [[__gnu__::__always_inline__]]
239 constexpr __half_vec_builtin_t<_TV>
240 __vec_split_lo(_TV __v)
242 constexpr int __n = __width_of<_TV> / 2;
243 constexpr auto [...__is] = _IotaArray<__n>;
244 return __builtin_shufflevector(__v, __v, __is...);
247 template <__vec_builtin _TV>
248 [[__gnu__::__always_inline__]]
249 constexpr __half_vec_builtin_t<_TV>
250 __vec_split_hi(_TV __v)
252 constexpr int __n = __width_of<_TV> / 2;
253 constexpr auto [...__is] = _IotaArray<__n>;
254 return __builtin_shufflevector(__v, __v, (__n + __is)...);
262 template <
size_t _Bytes, __vec_builtin _TV>
263 [[__gnu__::__always_inline__]]
265 __vec_zero_pad_to(_TV __x)
267 if constexpr (
sizeof(_TV) == _Bytes)
269 else if constexpr (
sizeof(_TV) <=
sizeof(0ull))
271 using _Up = _UInt<
sizeof(_TV)>;
272 __vec_builtin_type_bytes<_Up, _Bytes> __tmp = {__builtin_bit_cast(_Up, __x)};
273 return __builtin_bit_cast(__vec_builtin_type_bytes<__vec_value_type<_TV>, _Bytes>, __tmp);
275 else if constexpr (
sizeof(_TV) < _Bytes)
276 return __vec_zero_pad_to<_Bytes>(__vec_concat(__x, _TV()));
278 static_assert(
false);
286 template <__vec_builtin _TV>
287 [[__gnu__::__always_inline__]]
289 __vec_zero_pad_to_16(_TV __x)
291 static_assert(
sizeof(_TV) < 16);
292 return __vec_zero_pad_to<16>(__x);
297 template <
typename _Tp>
298 [[__gnu__::__always_inline__]]
300 __is_const_known(
const _Tp& __x)
302 return __builtin_constant_p(__x);
305 [[__gnu__::__always_inline__]]
307 __is_const_known(
const auto&... __xs)
requires(
sizeof...(__xs) >= 2)
315 return (__is_const_known(__xs) && ...);
319 [[__gnu__::__always_inline__]]
321 __is_const_known_equal_to(
const auto& __x,
const auto& __expect)
322 {
return __is_const_known(__x == __expect) && __x == __expect; }
325 template <__vec_builtin _UV, __vec_builtin _TV>
327 __x86_cvt_f16c(_TV __v);
337 [[__gnu__::__always_inline__]]
341 static_assert(__width_of<_UV> == __width_of<_TV>);
343 using _Up = __vec_value_type<_UV>;
344 using _Tp = __vec_value_type<_TV>;
345 constexpr bool __to_f16 = is_same_v<_Up, _Float16>;
346 constexpr bool __from_f16 = is_same_v<_Tp, _Float16>;
347 constexpr bool __needs_f16c = _Traits._M_have_f16c() && !_Traits._M_have_avx512fp16()
348 && (__to_f16 || __from_f16);
349 if (__needs_f16c && !__is_const_known(__v))
351 if constexpr (__needs_f16c)
352 return __x86_cvt_f16c<_UV>(__v);
354 if constexpr (is_floating_point_v<_Tp> && is_integral_v<_Up>
355 &&
sizeof(_UV) <
sizeof(_TV) &&
sizeof(_Up) <
sizeof(int))
357 using _Ip = __integer_from<
std::min(
sizeof(
int),
sizeof(_Tp))>;
358 using _IV = __vec_builtin_type<_Ip, __width_of<_TV>>;
359 return __vec_cast<_UV>(__vec_cast<_IV>(__v));
362 return __builtin_convertvector(__v, _UV);
371 template <__vectorizable _Up, __vec_builtin _TV>
372 [[__gnu__::__always_inline__]]
373 constexpr __vec_builtin_type<_Up, __width_of<_TV>>
375 {
return __vec_cast<__vec_builtin_type<_Up, __width_of<_TV>>>(__v); }
382 template <__vec_builtin _UV, __vec_builtin _TV>
383 [[__gnu__::__always_inline__]]
385 __vec_mask_cast(_TV __k)
387 static_assert(signed_integral<__vec_value_type<_UV>>);
388 static_assert(signed_integral<__vec_value_type<_TV>>);
391 return __builtin_convertvector(__k, _UV);
394 template <__vec_builtin _TV>
395 [[__gnu__::__always_inline__]]
397 __vec_xor(_TV __a, _TV __b)
399 using _Tp = __vec_value_type<_TV>;
400 if constexpr (is_floating_point_v<_Tp>)
402 using _UV = __vec_builtin_type<__integer_from<
sizeof(_Tp)>, __width_of<_TV>>;
403 return __builtin_bit_cast(
404 _TV, __builtin_bit_cast(_UV, __a) ^ __builtin_bit_cast(_UV, __b));
410 template <__vec_builtin _TV>
411 [[__gnu__::__always_inline__]]
413 __vec_or(_TV __a, _TV __b)
415 using _Tp = __vec_value_type<_TV>;
416 if constexpr (is_floating_point_v<_Tp>)
418 using _UV = __vec_builtin_type<__integer_from<
sizeof(_Tp)>, __width_of<_TV>>;
419 return __builtin_bit_cast(
420 _TV, __builtin_bit_cast(_UV, __a) | __builtin_bit_cast(_UV, __b));
426 template <__vec_builtin _TV>
427 [[__gnu__::__always_inline__]]
429 __vec_and(_TV __a, _TV __b)
431 using _Tp = __vec_value_type<_TV>;
432 if constexpr (is_floating_point_v<_Tp>)
434 using _UV = __vec_builtin_type<__integer_from<
sizeof(_Tp)>, __width_of<_TV>>;
435 return __builtin_bit_cast(
436 _TV, __builtin_bit_cast(_UV, __a) & __builtin_bit_cast(_UV, __b));
449 template <__vec_builtin _TV>
450 [[__gnu__::__always_inline__]]
452 __vec_andnot(_TV __a, _TV __b)
454 using _Tp = __vec_value_type<_TV>;
455 using _UV = __vec_builtin_type<__integer_from<
sizeof(_Tp)>, __width_of<_TV>>;
456 return __builtin_bit_cast(
457 _TV, ~__builtin_bit_cast(_UV, __a) & __builtin_bit_cast(_UV, __b));
460 template <__vec_builtin _TV>
461 [[__gnu__::__always_inline__]]
465 using _Tp = __vec_value_type<_TV>;
466 using _UV = __vec_builtin_type_bytes<__integer_from<
sizeof(_Tp)>,
sizeof(_TV)>;
467 if constexpr (is_floating_point_v<__vec_value_type<_TV>>)
468 return __builtin_bit_cast(_TV, ~__builtin_bit_cast(_UV, __a));
476 template <__vec_builtin _V>
477 requires std::floating_point<__vec_value_type<_V>>
478 constexpr _V _S_signmask = __vec_xor(_V() + 1, _V() - 1);
480 template <__vec_builtin _TV,
int _Np = __w
idth_of<_TV>,
481 typename = make_
integer_sequence<
int, _Np>>
485 struct _VecOps<_TV, _Np, integer_sequence<int, _Is...>>
487 static_assert(_Np <= __width_of<_TV>);
489 using _Tp = __vec_value_type<_TV>;
491 using _HV = __half_vec_builtin_t<__conditional_t<_Np >= 2, _TV, __double_vec_builtin_t<_TV>>>;
493 [[__gnu__::__always_inline__]]
495 _S_broadcast_to_even(_Tp __init)
496 {
return _TV {((_Is & 1) == 0 ? __init : _Tp())...}; }
498 [[__gnu__::__always_inline__]]
500 _S_broadcast_to_odd(_Tp __init)
501 {
return _TV {((_Is & 1) == 1 ? __init : _Tp())...}; }
503 [[__gnu__::__always_inline__]]
504 static constexpr bool
505 _S_all_of(_TV __k)
noexcept
506 {
return (... && (__k[_Is] != 0)); }
508 [[__gnu__::__always_inline__]]
509 static constexpr bool
510 _S_any_of(_TV __k)
noexcept
511 {
return (... || (__k[_Is] != 0)); }
513 [[__gnu__::__always_inline__]]
514 static constexpr bool
515 _S_none_of(_TV __k)
noexcept
516 {
return (... && (__k[_Is] == 0)); }
518 template <
typename _Offset =
integral_constant<
int, 0>>
519 [[__gnu__::__always_inline__]]
521 _S_extract(__vec_builtin
auto __x, _Offset = {})
523 static_assert(is_same_v<__vec_value_type<_TV>, __vec_value_type<
decltype(__x)>>);
524 return __builtin_shufflevector(__x,
decltype(__x)(), (_Is + _Offset::value)...);
528 [[__gnu__::__always_inline__]]
530 _S_swap_neighbors(_TV __x)
531 {
return __builtin_shufflevector(__x, __x, (_Is ^ 1)...); }
534 [[__gnu__::__always_inline__]]
537 {
return __builtin_shufflevector(__x, __x, (_Is & ~1)...); }
540 [[__gnu__::__always_inline__]]
543 {
return __builtin_shufflevector(__x, __x, (_Is | 1)...); }
545 [[__gnu__::__always_inline__]]
546 static constexpr void
547 _S_overwrite_even_elements(_TV& __x, _HV __y)
requires (_Np > 1)
549 constexpr __simd_size_type __n = __width_of<_TV>;
550 __x = __builtin_shufflevector(__x,
552 __vec_concat(__y, __y),
556 ((_Is & 1) == 0 ? __n + _Is / 2 : _Is)...);
559 [[__gnu__::__always_inline__]]
560 static constexpr void
561 _S_overwrite_even_elements(_TV& __xl, _TV& __xh, _TV __y)
563 constexpr __simd_size_type __nl = __width_of<_TV>;
564 constexpr __simd_size_type __nh = __nl * 3 / 2;
565 __xl = __builtin_shufflevector(__xl, __y, ((_Is & 1) == 0 ? __nl + _Is / 2 : _Is)...);
566 __xh = __builtin_shufflevector(__xh, __y, ((_Is & 1) == 0 ? __nh + _Is / 2 : _Is)...);
569 [[__gnu__::__always_inline__]]
570 static constexpr void
571 _S_overwrite_odd_elements(_TV& __x, _HV __y)
requires (_Np > 1)
573 constexpr __simd_size_type __n = __width_of<_TV>;
574 __x = __builtin_shufflevector(__x,
576 __vec_concat(__y, __y),
580 ((_Is & 1) == 1 ? __n + _Is / 2 : _Is)...);
583 [[__gnu__::__always_inline__]]
584 static constexpr void
585 _S_overwrite_odd_elements(_TV& __xl, _TV& __xh, _TV __y)
587 constexpr __simd_size_type __nl = __width_of<_TV>;
588 constexpr __simd_size_type __nh = __nl * 3 / 2;
589 __xl = __builtin_shufflevector(__xl, __y, ((_Is & 1) == 1 ? __nl + _Is / 2 : _Is)...);
590 __xh = __builtin_shufflevector(__xh, __y, ((_Is & 1) == 1 ? __nh + _Is / 2 : _Is)...);
594 [[__gnu__::__always_inline__]]
595 static constexpr bool
596 _S_is_const_known_equal_to(_TV __x, _Tp __ref)
597 {
return (__is_const_known_equal_to(__x[_Is], __ref) && ...); }
601_GLIBCXX_END_NAMESPACE_VERSION
604#pragma GCC diagnostic pop
typename make_unsigned< _Tp >::type make_unsigned_t
Alias template for make_unsigned.
constexpr const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
ISO C++ entities toplevel namespace is std.
__make_integer_seq< integer_sequence, _Tp, _Num > make_integer_sequence
Alias template make_integer_sequence.
Class template integer_sequence.
[concept.same], concept same_as