libstdc++
simd.h
1 // Definition of the public simd interfaces -*- C++ -*-
2 
3 // Copyright (C) 2020-2022 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_H
26 #define _GLIBCXX_EXPERIMENTAL_SIMD_H
27 
28 #if __cplusplus >= 201703L
29 
30 #include "simd_detail.h"
31 #include "numeric_traits.h"
32 #include <bit>
33 #include <bitset>
34 #ifdef _GLIBCXX_DEBUG_UB
35 #include <cstdio> // for stderr
36 #endif
37 #include <cstring>
38 #include <cmath>
39 #include <functional>
40 #include <iosfwd>
41 #include <utility>
42 
43 #if _GLIBCXX_SIMD_X86INTRIN
44 #include <x86intrin.h>
45 #elif _GLIBCXX_SIMD_HAVE_NEON
46 #pragma GCC diagnostic push
47 // narrowing conversion of '__a' from 'uint64_t' {aka 'long long unsigned int'} to
48 // 'int64x1_t' {aka 'long long int'} [-Wnarrowing]
49 #pragma GCC diagnostic ignored "-Wnarrowing"
50 #include <arm_neon.h>
51 #pragma GCC diagnostic pop
52 #endif
53 
54 /** @ingroup ts_simd
55  * @{
56  */
57 /* There are several closely related types, with the following naming
58  * convention:
59  * _Tp: vectorizable (arithmetic) type (or any type)
60  * _TV: __vector_type_t<_Tp, _Np>
61  * _TW: _SimdWrapper<_Tp, _Np>
62  * _TI: __intrinsic_type_t<_Tp, _Np>
63  * _TVT: _VectorTraits<_TV> or _VectorTraits<_TW>
64  * If one additional type is needed use _U instead of _T.
65  * Otherwise use _T\d, _TV\d, _TW\d, TI\d, _TVT\d.
66  *
67  * More naming conventions:
68  * _Ap or _Abi: An ABI tag from the simd_abi namespace
69  * _Ip: often used for integer types with sizeof(_Ip) == sizeof(_Tp),
70  * _IV, _IW as for _TV, _TW
71  * _Np: number of elements (not bytes)
72  * _Bytes: number of bytes
73  *
74  * Variable names:
75  * __k: mask object (vector- or bitmask)
76  */
77 _GLIBCXX_SIMD_BEGIN_NAMESPACE
78 
79 #if !_GLIBCXX_SIMD_X86INTRIN
80 using __m128 [[__gnu__::__vector_size__(16)]] = float;
81 using __m128d [[__gnu__::__vector_size__(16)]] = double;
82 using __m128i [[__gnu__::__vector_size__(16)]] = long long;
83 using __m256 [[__gnu__::__vector_size__(32)]] = float;
84 using __m256d [[__gnu__::__vector_size__(32)]] = double;
85 using __m256i [[__gnu__::__vector_size__(32)]] = long long;
86 using __m512 [[__gnu__::__vector_size__(64)]] = float;
87 using __m512d [[__gnu__::__vector_size__(64)]] = double;
88 using __m512i [[__gnu__::__vector_size__(64)]] = long long;
89 #endif
90 
91 namespace simd_abi {
92 // simd_abi forward declarations {{{
93 // implementation details:
94 struct _Scalar;
95 
96 template <int _Np>
97  struct _Fixed;
98 
99 // There are two major ABIs that appear on different architectures.
100 // Both have non-boolean values packed into an N Byte register
101 // -> #elements = N / sizeof(T)
102 // Masks differ:
103 // 1. Use value vector registers for masks (all 0 or all 1)
104 // 2. Use bitmasks (mask registers) with one bit per value in the corresponding
105 // value vector
106 //
107 // Both can be partially used, masking off the rest when doing horizontal
108 // operations or operations that can trap (e.g. FP_INVALID or integer division
109 // by 0). This is encoded as the number of used bytes.
110 template <int _UsedBytes>
111  struct _VecBuiltin;
112 
113 template <int _UsedBytes>
114  struct _VecBltnBtmsk;
115 
116 template <typename _Tp, int _Np>
117  using _VecN = _VecBuiltin<sizeof(_Tp) * _Np>;
118 
119 template <int _UsedBytes = 16>
120  using _Sse = _VecBuiltin<_UsedBytes>;
121 
122 template <int _UsedBytes = 32>
123  using _Avx = _VecBuiltin<_UsedBytes>;
124 
125 template <int _UsedBytes = 64>
126  using _Avx512 = _VecBltnBtmsk<_UsedBytes>;
127 
128 template <int _UsedBytes = 16>
129  using _Neon = _VecBuiltin<_UsedBytes>;
130 
131 // implementation-defined:
132 using __sse = _Sse<>;
133 using __avx = _Avx<>;
134 using __avx512 = _Avx512<>;
135 using __neon = _Neon<>;
136 using __neon128 = _Neon<16>;
137 using __neon64 = _Neon<8>;
138 
139 // standard:
140 template <typename _Tp, size_t _Np, typename...>
141  struct deduce;
142 
143 template <int _Np>
144  using fixed_size = _Fixed<_Np>;
145 
146 using scalar = _Scalar;
147 
148 // }}}
149 } // namespace simd_abi
150 // forward declarations is_simd(_mask), simd(_mask), simd_size {{{
151 template <typename _Tp>
152  struct is_simd;
153 
154 template <typename _Tp>
155  struct is_simd_mask;
156 
157 template <typename _Tp, typename _Abi>
158  class simd;
159 
160 template <typename _Tp, typename _Abi>
161  class simd_mask;
162 
163 template <typename _Tp, typename _Abi>
164  struct simd_size;
165 
166 // }}}
167 // load/store flags {{{
168 struct element_aligned_tag
169 {
170  template <typename _Tp, typename _Up = typename _Tp::value_type>
171  static constexpr size_t _S_alignment = alignof(_Up);
172 
173  template <typename _Tp, typename _Up>
174  _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
175  _S_apply(_Up* __ptr)
176  { return __ptr; }
177 };
178 
179 struct vector_aligned_tag
180 {
181  template <typename _Tp, typename _Up = typename _Tp::value_type>
182  static constexpr size_t _S_alignment
183  = std::__bit_ceil(sizeof(_Up) * _Tp::size());
184 
185  template <typename _Tp, typename _Up>
186  _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
187  _S_apply(_Up* __ptr)
188  { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _S_alignment<_Tp, _Up>)); }
189 };
190 
191 template <size_t _Np> struct overaligned_tag
192 {
193  template <typename _Tp, typename _Up = typename _Tp::value_type>
194  static constexpr size_t _S_alignment = _Np;
195 
196  template <typename _Tp, typename _Up>
197  _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
198  _S_apply(_Up* __ptr)
199  { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _Np)); }
200 };
201 
202 inline constexpr element_aligned_tag element_aligned = {};
203 
204 inline constexpr vector_aligned_tag vector_aligned = {};
205 
206 template <size_t _Np>
207  inline constexpr overaligned_tag<_Np> overaligned = {};
208 
209 // }}}
210 template <size_t _Xp>
211  using _SizeConstant = integral_constant<size_t, _Xp>;
212 // constexpr feature detection{{{
213 constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX;
214 constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE;
215 constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2;
216 constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3;
217 constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3;
218 constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1;
219 constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2;
220 constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP;
221 constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX;
222 constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2;
223 constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1;
224 constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2;
225 constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT;
226 constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A;
227 constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA;
228 constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4;
229 constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C;
230 constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT;
231 constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F;
232 constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ;
233 constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL;
234 constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW;
235 constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl;
236 constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl;
237 constexpr inline bool __have_avx512bitalg = _GLIBCXX_SIMD_HAVE_AVX512BITALG;
238 constexpr inline bool __have_avx512vbmi2 = _GLIBCXX_SIMD_HAVE_AVX512VBMI2;
239 constexpr inline bool __have_avx512vbmi = _GLIBCXX_SIMD_HAVE_AVX512VBMI;
240 constexpr inline bool __have_avx512ifma = _GLIBCXX_SIMD_HAVE_AVX512IFMA;
241 constexpr inline bool __have_avx512cd = _GLIBCXX_SIMD_HAVE_AVX512CD;
242 constexpr inline bool __have_avx512vnni = _GLIBCXX_SIMD_HAVE_AVX512VNNI;
243 constexpr inline bool __have_avx512vpopcntdq = _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ;
244 constexpr inline bool __have_avx512vp2intersect = _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT;
245 
246 constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON;
247 constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32;
248 constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64;
249 constexpr inline bool __support_neon_float =
250 #if defined __GCC_IEC_559
251  __GCC_IEC_559 == 0;
252 #elif defined __FAST_MATH__
253  true;
254 #else
255  false;
256 #endif
257 
258 #ifdef _ARCH_PWR10
259 constexpr inline bool __have_power10vec = true;
260 #else
261 constexpr inline bool __have_power10vec = false;
262 #endif
263 #ifdef __POWER9_VECTOR__
264 constexpr inline bool __have_power9vec = true;
265 #else
266 constexpr inline bool __have_power9vec = false;
267 #endif
268 #if defined __POWER8_VECTOR__
269 constexpr inline bool __have_power8vec = true;
270 #else
271 constexpr inline bool __have_power8vec = __have_power9vec;
272 #endif
273 #if defined __VSX__
274 constexpr inline bool __have_power_vsx = true;
275 #else
276 constexpr inline bool __have_power_vsx = __have_power8vec;
277 #endif
278 #if defined __ALTIVEC__
279 constexpr inline bool __have_power_vmx = true;
280 #else
281 constexpr inline bool __have_power_vmx = __have_power_vsx;
282 #endif
283 
284 // }}}
285 
286 namespace __detail
287 {
288 #ifdef math_errhandling
289  // Determines _S_handle_fpexcept from math_errhandling if it is defined and expands to a constant
290  // expression. math_errhandling may expand to an extern symbol, in which case a constexpr value
291  // must be guessed.
292  template <int = math_errhandling>
293  constexpr bool
294  __handle_fpexcept_impl(int)
295  { return math_errhandling & MATH_ERREXCEPT; }
296 #endif
297 
298  // Fallback if math_errhandling doesn't work: with fast-math assume floating-point exceptions are
299  // ignored, otherwise implement correct exception behavior.
300  constexpr bool
301  __handle_fpexcept_impl(float)
302  {
303 #if defined __FAST_MATH__
304  return false;
305 #else
306  return true;
307 #endif
308  }
309 
310  /// True if math functions must raise floating-point exceptions as specified by C17.
311  static constexpr bool _S_handle_fpexcept = __handle_fpexcept_impl(0);
312 
313  constexpr std::uint_least64_t
314  __floating_point_flags()
315  {
316  std::uint_least64_t __flags = 0;
317  if constexpr (_S_handle_fpexcept)
318  __flags |= 1;
319 #ifdef __FAST_MATH__
320  __flags |= 1 << 1;
321 #elif __FINITE_MATH_ONLY__
322  __flags |= 2 << 1;
323 #elif __GCC_IEC_559 < 2
324  __flags |= 3 << 1;
325 #endif
326  __flags |= (__FLT_EVAL_METHOD__ + 1) << 3;
327  return __flags;
328  }
329 
330  constexpr std::uint_least64_t
331  __machine_flags()
332  {
333  if constexpr (__have_mmx || __have_sse)
334  return __have_mmx
335  | (__have_sse << 1)
336  | (__have_sse2 << 2)
337  | (__have_sse3 << 3)
338  | (__have_ssse3 << 4)
339  | (__have_sse4_1 << 5)
340  | (__have_sse4_2 << 6)
341  | (__have_xop << 7)
342  | (__have_avx << 8)
343  | (__have_avx2 << 9)
344  | (__have_bmi << 10)
345  | (__have_bmi2 << 11)
346  | (__have_lzcnt << 12)
347  | (__have_sse4a << 13)
348  | (__have_fma << 14)
349  | (__have_fma4 << 15)
350  | (__have_f16c << 16)
351  | (__have_popcnt << 17)
352  | (__have_avx512f << 18)
353  | (__have_avx512dq << 19)
354  | (__have_avx512vl << 20)
355  | (__have_avx512bw << 21)
356  | (__have_avx512bitalg << 22)
357  | (__have_avx512vbmi2 << 23)
358  | (__have_avx512vbmi << 24)
359  | (__have_avx512ifma << 25)
360  | (__have_avx512cd << 26)
361  | (__have_avx512vnni << 27)
362  | (__have_avx512vpopcntdq << 28)
363  | (__have_avx512vp2intersect << 29);
364  else if constexpr (__have_neon)
365  return __have_neon
366  | (__have_neon_a32 << 1)
367  | (__have_neon_a64 << 2)
368  | (__have_neon_a64 << 2)
369  | (__support_neon_float << 3);
370  else if constexpr (__have_power_vmx)
371  return __have_power_vmx
372  | (__have_power_vsx << 1)
373  | (__have_power8vec << 2)
374  | (__have_power9vec << 3)
375  | (__have_power10vec << 4);
376  else
377  return 0;
378  }
379 
380  namespace
381  {
382  struct _OdrEnforcer {};
383  }
384 
385  template <std::uint_least64_t...>
386  struct _MachineFlagsTemplate {};
387 
388  /**@internal
389  * Use this type as default template argument to all function templates that
390  * are not declared always_inline. It ensures, that a function
391  * specialization, which the compiler decides not to inline, has a unique symbol
392  * (_OdrEnforcer) or a symbol matching the machine/architecture flags
393  * (_MachineFlagsTemplate). This helps to avoid ODR violations in cases where
394  * users link TUs compiled with different flags. This is especially important
395  * for using simd in libraries.
396  */
397  using __odr_helper
398  = conditional_t<__machine_flags() == 0, _OdrEnforcer,
399  _MachineFlagsTemplate<__machine_flags(), __floating_point_flags()>>;
400 
401  struct _Minimum
402  {
403  template <typename _Tp>
404  _GLIBCXX_SIMD_INTRINSIC constexpr
405  _Tp
406  operator()(_Tp __a, _Tp __b) const
407  {
408  using std::min;
409  return min(__a, __b);
410  }
411  };
412 
413  struct _Maximum
414  {
415  template <typename _Tp>
416  _GLIBCXX_SIMD_INTRINSIC constexpr
417  _Tp
418  operator()(_Tp __a, _Tp __b) const
419  {
420  using std::max;
421  return max(__a, __b);
422  }
423  };
424 } // namespace __detail
425 
426 // unrolled/pack execution helpers
427 // __execute_n_times{{{
428 template <typename _Fp, size_t... _I>
429  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
430  void
431  __execute_on_index_sequence(_Fp&& __f, index_sequence<_I...>)
432  { ((void)__f(_SizeConstant<_I>()), ...); }
433 
434 template <typename _Fp>
435  _GLIBCXX_SIMD_INTRINSIC constexpr void
436  __execute_on_index_sequence(_Fp&&, index_sequence<>)
437  { }
438 
439 template <size_t _Np, typename _Fp>
440  _GLIBCXX_SIMD_INTRINSIC constexpr void
441  __execute_n_times(_Fp&& __f)
442  {
443  __execute_on_index_sequence(static_cast<_Fp&&>(__f),
444  make_index_sequence<_Np>{});
445  }
446 
447 // }}}
448 // __generate_from_n_evaluations{{{
449 template <typename _R, typename _Fp, size_t... _I>
450  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
451  _R
452  __execute_on_index_sequence_with_return(_Fp&& __f, index_sequence<_I...>)
453  { return _R{__f(_SizeConstant<_I>())...}; }
454 
455 template <size_t _Np, typename _R, typename _Fp>
456  _GLIBCXX_SIMD_INTRINSIC constexpr _R
457  __generate_from_n_evaluations(_Fp&& __f)
458  {
459  return __execute_on_index_sequence_with_return<_R>(
460  static_cast<_Fp&&>(__f), make_index_sequence<_Np>{});
461  }
462 
463 // }}}
464 // __call_with_n_evaluations{{{
465 template <size_t... _I, typename _F0, typename _FArgs>
466  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
467  auto
468  __call_with_n_evaluations(index_sequence<_I...>, _F0&& __f0, _FArgs&& __fargs)
469  { return __f0(__fargs(_SizeConstant<_I>())...); }
470 
471 template <size_t _Np, typename _F0, typename _FArgs>
472  _GLIBCXX_SIMD_INTRINSIC constexpr auto
473  __call_with_n_evaluations(_F0&& __f0, _FArgs&& __fargs)
474  {
475  return __call_with_n_evaluations(make_index_sequence<_Np>{},
476  static_cast<_F0&&>(__f0),
477  static_cast<_FArgs&&>(__fargs));
478  }
479 
480 // }}}
481 // __call_with_subscripts{{{
482 template <size_t _First = 0, size_t... _It, typename _Tp, typename _Fp>
483  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
484  auto
485  __call_with_subscripts(_Tp&& __x, index_sequence<_It...>, _Fp&& __fun)
486  { return __fun(__x[_First + _It]...); }
487 
488 template <size_t _Np, size_t _First = 0, typename _Tp, typename _Fp>
489  _GLIBCXX_SIMD_INTRINSIC constexpr auto
490  __call_with_subscripts(_Tp&& __x, _Fp&& __fun)
491  {
492  return __call_with_subscripts<_First>(static_cast<_Tp&&>(__x),
493  make_index_sequence<_Np>(),
494  static_cast<_Fp&&>(__fun));
495  }
496 
497 // }}}
498 
499 // vvv ---- type traits ---- vvv
500 // integer type aliases{{{
501 using _UChar = unsigned char;
502 using _SChar = signed char;
503 using _UShort = unsigned short;
504 using _UInt = unsigned int;
505 using _ULong = unsigned long;
506 using _ULLong = unsigned long long;
507 using _LLong = long long;
508 
509 //}}}
510 // __first_of_pack{{{
511 template <typename _T0, typename...>
512  struct __first_of_pack
513  { using type = _T0; };
514 
515 template <typename... _Ts>
516  using __first_of_pack_t = typename __first_of_pack<_Ts...>::type;
517 
518 //}}}
519 // __value_type_or_identity_t {{{
520 template <typename _Tp>
521  typename _Tp::value_type
522  __value_type_or_identity_impl(int);
523 
524 template <typename _Tp>
525  _Tp
526  __value_type_or_identity_impl(float);
527 
528 template <typename _Tp>
529  using __value_type_or_identity_t
530  = decltype(__value_type_or_identity_impl<_Tp>(int()));
531 
532 // }}}
533 // __is_vectorizable {{{
534 template <typename _Tp>
535  struct __is_vectorizable : public is_arithmetic<_Tp> {};
536 
537 template <>
538  struct __is_vectorizable<bool> : public false_type {};
539 
540 template <typename _Tp>
541  inline constexpr bool __is_vectorizable_v = __is_vectorizable<_Tp>::value;
542 
543 // Deduces to a vectorizable type
544 template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
545  using _Vectorizable = _Tp;
546 
547 // }}}
548 // _LoadStorePtr / __is_possible_loadstore_conversion {{{
549 template <typename _Ptr, typename _ValueType>
550  struct __is_possible_loadstore_conversion
551  : conjunction<__is_vectorizable<_Ptr>, __is_vectorizable<_ValueType>> {};
552 
553 template <>
554  struct __is_possible_loadstore_conversion<bool, bool> : true_type {};
555 
556 // Deduces to a type allowed for load/store with the given value type.
557 template <typename _Ptr, typename _ValueType,
558  typename = enable_if_t<
559  __is_possible_loadstore_conversion<_Ptr, _ValueType>::value>>
560  using _LoadStorePtr = _Ptr;
561 
562 // }}}
563 // __is_bitmask{{{
564 template <typename _Tp, typename = void_t<>>
565  struct __is_bitmask : false_type {};
566 
567 template <typename _Tp>
568  inline constexpr bool __is_bitmask_v = __is_bitmask<_Tp>::value;
569 
570 // the __mmaskXX case:
571 template <typename _Tp>
572  struct __is_bitmask<_Tp,
573  void_t<decltype(declval<unsigned&>() = declval<_Tp>() & 1u)>>
574  : true_type {};
575 
576 // }}}
577 // __int_for_sizeof{{{
578 #pragma GCC diagnostic push
579 #pragma GCC diagnostic ignored "-Wpedantic"
580 template <size_t _Bytes>
581  constexpr auto
582  __int_for_sizeof()
583  {
584  static_assert(_Bytes > 0);
585  if constexpr (_Bytes == sizeof(int))
586  return int();
587  else if constexpr (_Bytes == sizeof(_SChar))
588  return _SChar();
589  else if constexpr (_Bytes == sizeof(short))
590  return short();
591  else if constexpr (_Bytes == sizeof(long))
592  return long();
593  else if constexpr (_Bytes == sizeof(_LLong))
594  return _LLong();
595  #ifdef __SIZEOF_INT128__
596  else if constexpr (_Bytes == sizeof(__int128))
597  return __int128();
598  #endif // __SIZEOF_INT128__
599  else if constexpr (_Bytes % sizeof(int) == 0)
600  {
601  constexpr size_t _Np = _Bytes / sizeof(int);
602  struct _Ip
603  {
604  int _M_data[_Np];
605 
606  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
607  operator&(_Ip __rhs) const
608  {
609  return __generate_from_n_evaluations<_Np, _Ip>(
610  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
611  return __rhs._M_data[__i] & _M_data[__i];
612  });
613  }
614 
615  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
616  operator|(_Ip __rhs) const
617  {
618  return __generate_from_n_evaluations<_Np, _Ip>(
619  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
620  return __rhs._M_data[__i] | _M_data[__i];
621  });
622  }
623 
624  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
625  operator^(_Ip __rhs) const
626  {
627  return __generate_from_n_evaluations<_Np, _Ip>(
628  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
629  return __rhs._M_data[__i] ^ _M_data[__i];
630  });
631  }
632 
633  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
634  operator~() const
635  {
636  return __generate_from_n_evaluations<_Np, _Ip>(
637  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return ~_M_data[__i]; });
638  }
639  };
640  return _Ip{};
641  }
642  else
643  static_assert(_Bytes == 0, "this should be unreachable");
644  }
645 #pragma GCC diagnostic pop
646 
647 template <typename _Tp>
648  using __int_for_sizeof_t = decltype(__int_for_sizeof<sizeof(_Tp)>());
649 
650 template <size_t _Np>
651  using __int_with_sizeof_t = decltype(__int_for_sizeof<_Np>());
652 
653 // }}}
654 // __is_fixed_size_abi{{{
655 template <typename _Tp>
656  struct __is_fixed_size_abi : false_type {};
657 
658 template <int _Np>
659  struct __is_fixed_size_abi<simd_abi::fixed_size<_Np>> : true_type {};
660 
661 template <typename _Tp>
662  inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value;
663 
664 // }}}
665 // __is_scalar_abi {{{
666 template <typename _Abi>
667  constexpr bool
668  __is_scalar_abi()
669  { return is_same_v<simd_abi::scalar, _Abi>; }
670 
671 // }}}
672 // __abi_bytes_v {{{
673 template <template <int> class _Abi, int _Bytes>
674  constexpr int
675  __abi_bytes_impl(_Abi<_Bytes>*)
676  { return _Bytes; }
677 
678 template <typename _Tp>
679  constexpr int
680  __abi_bytes_impl(_Tp*)
681  { return -1; }
682 
683 template <typename _Abi>
684  inline constexpr int __abi_bytes_v
685  = __abi_bytes_impl(static_cast<_Abi*>(nullptr));
686 
687 // }}}
688 // __is_builtin_bitmask_abi {{{
689 template <typename _Abi>
690  constexpr bool
691  __is_builtin_bitmask_abi()
692  { return is_same_v<simd_abi::_VecBltnBtmsk<__abi_bytes_v<_Abi>>, _Abi>; }
693 
694 // }}}
695 // __is_sse_abi {{{
696 template <typename _Abi>
697  constexpr bool
698  __is_sse_abi()
699  {
700  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
701  return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
702  }
703 
704 // }}}
705 // __is_avx_abi {{{
706 template <typename _Abi>
707  constexpr bool
708  __is_avx_abi()
709  {
710  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
711  return _Bytes > 16 && _Bytes <= 32
712  && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
713  }
714 
715 // }}}
716 // __is_avx512_abi {{{
717 template <typename _Abi>
718  constexpr bool
719  __is_avx512_abi()
720  {
721  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
722  return _Bytes <= 64 && is_same_v<simd_abi::_Avx512<_Bytes>, _Abi>;
723  }
724 
725 // }}}
726 // __is_neon_abi {{{
727 template <typename _Abi>
728  constexpr bool
729  __is_neon_abi()
730  {
731  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
732  return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
733  }
734 
735 // }}}
736 // __make_dependent_t {{{
737 template <typename, typename _Up>
738  struct __make_dependent
739  { using type = _Up; };
740 
741 template <typename _Tp, typename _Up>
742  using __make_dependent_t = typename __make_dependent<_Tp, _Up>::type;
743 
744 // }}}
745 // ^^^ ---- type traits ---- ^^^
746 
747 // __invoke_ub{{{
748 template <typename... _Args>
749  [[noreturn]] _GLIBCXX_SIMD_ALWAYS_INLINE void
750  __invoke_ub([[maybe_unused]] const char* __msg, [[maybe_unused]] const _Args&... __args)
751  {
752 #ifdef _GLIBCXX_DEBUG_UB
753  __builtin_fprintf(stderr, __msg, __args...);
754  __builtin_trap();
755 #else
756  __builtin_unreachable();
757 #endif
758  }
759 
760 // }}}
761 // __assert_unreachable{{{
762 template <typename _Tp>
763  struct __assert_unreachable
764  { static_assert(!is_same_v<_Tp, _Tp>, "this should be unreachable"); };
765 
766 // }}}
767 // __size_or_zero_v {{{
768 template <typename _Tp, typename _Ap, size_t _Np = simd_size<_Tp, _Ap>::value>
769  constexpr size_t
770  __size_or_zero_dispatch(int)
771  { return _Np; }
772 
773 template <typename _Tp, typename _Ap>
774  constexpr size_t
775  __size_or_zero_dispatch(float)
776  { return 0; }
777 
778 template <typename _Tp, typename _Ap>
779  inline constexpr size_t __size_or_zero_v
780  = __size_or_zero_dispatch<_Tp, _Ap>(0);
781 
782 // }}}
783 // __div_roundup {{{
784 inline constexpr size_t
785 __div_roundup(size_t __a, size_t __b)
786 { return (__a + __b - 1) / __b; }
787 
788 // }}}
789 // _ExactBool{{{
790 class _ExactBool
791 {
792  const bool _M_data;
793 
794 public:
795  _GLIBCXX_SIMD_INTRINSIC constexpr
796  _ExactBool(bool __b) : _M_data(__b) {}
797 
798  _ExactBool(int) = delete;
799 
800  _GLIBCXX_SIMD_INTRINSIC constexpr
801  operator bool() const
802  { return _M_data; }
803 };
804 
805 // }}}
806 // __may_alias{{{
807 /**@internal
808  * Helper __may_alias<_Tp> that turns _Tp into the type to be used for an
809  * aliasing pointer. This adds the __may_alias attribute to _Tp (with compilers
810  * that support it).
811  */
812 template <typename _Tp>
813  using __may_alias [[__gnu__::__may_alias__]] = _Tp;
814 
815 // }}}
816 // _UnsupportedBase {{{
817 // simd and simd_mask base for unsupported <_Tp, _Abi>
818 struct _UnsupportedBase
819 {
820  _UnsupportedBase() = delete;
821  _UnsupportedBase(const _UnsupportedBase&) = delete;
822  _UnsupportedBase& operator=(const _UnsupportedBase&) = delete;
823  ~_UnsupportedBase() = delete;
824 };
825 
826 // }}}
827 // _InvalidTraits {{{
828 /**
829  * @internal
830  * Defines the implementation of __a given <_Tp, _Abi>.
831  *
832  * Implementations must ensure that only valid <_Tp, _Abi> instantiations are
833  * possible. Static assertions in the type definition do not suffice. It is
834  * important that SFINAE works.
835  */
836 struct _InvalidTraits
837 {
838  using _IsValid = false_type;
839  using _SimdBase = _UnsupportedBase;
840  using _MaskBase = _UnsupportedBase;
841 
842  static constexpr size_t _S_full_size = 0;
843  static constexpr bool _S_is_partial = false;
844 
845  static constexpr size_t _S_simd_align = 1;
846  struct _SimdImpl;
847  struct _SimdMember {};
848  struct _SimdCastType;
849 
850  static constexpr size_t _S_mask_align = 1;
851  struct _MaskImpl;
852  struct _MaskMember {};
853  struct _MaskCastType;
854 };
855 
856 // }}}
857 // _SimdTraits {{{
858 template <typename _Tp, typename _Abi, typename = void_t<>>
859  struct _SimdTraits : _InvalidTraits {};
860 
861 // }}}
862 // __private_init, __bitset_init{{{
863 /**
864  * @internal
865  * Tag used for private init constructor of simd and simd_mask
866  */
867 inline constexpr struct _PrivateInit {} __private_init = {};
868 
869 inline constexpr struct _BitsetInit {} __bitset_init = {};
870 
871 // }}}
872 // __is_narrowing_conversion<_From, _To>{{{
873 template <typename _From, typename _To, bool = is_arithmetic_v<_From>,
874  bool = is_arithmetic_v<_To>>
875  struct __is_narrowing_conversion;
876 
877 // ignore "signed/unsigned mismatch" in the following trait.
878 // The implicit conversions will do the right thing here.
879 template <typename _From, typename _To>
880  struct __is_narrowing_conversion<_From, _To, true, true>
881  : public __bool_constant<(
882  __digits_v<_From> > __digits_v<_To>
883  || __finite_max_v<_From> > __finite_max_v<_To>
884  || __finite_min_v<_From> < __finite_min_v<_To>
885  || (is_signed_v<_From> && is_unsigned_v<_To>))> {};
886 
887 template <typename _Tp>
888  struct __is_narrowing_conversion<_Tp, bool, true, true>
889  : public true_type {};
890 
891 template <>
892  struct __is_narrowing_conversion<bool, bool, true, true>
893  : public false_type {};
894 
895 template <typename _Tp>
896  struct __is_narrowing_conversion<_Tp, _Tp, true, true>
897  : public false_type {};
898 
899 template <typename _From, typename _To>
900  struct __is_narrowing_conversion<_From, _To, false, true>
901  : public negation<is_convertible<_From, _To>> {};
902 
903 // }}}
904 // __converts_to_higher_integer_rank{{{
905 template <typename _From, typename _To, bool = (sizeof(_From) < sizeof(_To))>
906  struct __converts_to_higher_integer_rank : public true_type {};
907 
908 // this may fail for char -> short if sizeof(char) == sizeof(short)
909 template <typename _From, typename _To>
910  struct __converts_to_higher_integer_rank<_From, _To, false>
911  : public is_same<decltype(declval<_From>() + declval<_To>()), _To> {};
912 
913 // }}}
914 // __data(simd/simd_mask) {{{
915 template <typename _Tp, typename _Ap>
916  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
917  __data(const simd<_Tp, _Ap>& __x);
918 
919 template <typename _Tp, typename _Ap>
920  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
921  __data(simd<_Tp, _Ap>& __x);
922 
923 template <typename _Tp, typename _Ap>
924  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
925  __data(const simd_mask<_Tp, _Ap>& __x);
926 
927 template <typename _Tp, typename _Ap>
928  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
929  __data(simd_mask<_Tp, _Ap>& __x);
930 
931 // }}}
932 // _SimdConverter {{{
933 template <typename _FromT, typename _FromA, typename _ToT, typename _ToA,
934  typename = void>
935  struct _SimdConverter;
936 
937 template <typename _Tp, typename _Ap>
938  struct _SimdConverter<_Tp, _Ap, _Tp, _Ap, void>
939  {
940  template <typename _Up>
941  _GLIBCXX_SIMD_INTRINSIC const _Up&
942  operator()(const _Up& __x)
943  { return __x; }
944  };
945 
946 // }}}
947 // __to_value_type_or_member_type {{{
948 template <typename _V>
949  _GLIBCXX_SIMD_INTRINSIC constexpr auto
950  __to_value_type_or_member_type(const _V& __x) -> decltype(__data(__x))
951  { return __data(__x); }
952 
953 template <typename _V>
954  _GLIBCXX_SIMD_INTRINSIC constexpr const typename _V::value_type&
955  __to_value_type_or_member_type(const typename _V::value_type& __x)
956  { return __x; }
957 
958 // }}}
959 // __bool_storage_member_type{{{
960 template <size_t _Size>
961  struct __bool_storage_member_type;
962 
963 template <size_t _Size>
964  using __bool_storage_member_type_t =
965  typename __bool_storage_member_type<_Size>::type;
966 
967 // }}}
968 // _SimdTuple {{{
969 // why not tuple?
970 // 1. tuple gives no guarantee about the storage order, but I require
971 // storage
972 // equivalent to array<_Tp, _Np>
973 // 2. direct access to the element type (first template argument)
974 // 3. enforces equal element type, only different _Abi types are allowed
975 template <typename _Tp, typename... _Abis>
976  struct _SimdTuple;
977 
978 //}}}
979 // __fixed_size_storage_t {{{
980 template <typename _Tp, int _Np>
981  struct __fixed_size_storage;
982 
983 template <typename _Tp, int _Np>
984  using __fixed_size_storage_t = typename __fixed_size_storage<_Tp, _Np>::type;
985 
986 // }}}
987 // _SimdWrapper fwd decl{{{
988 template <typename _Tp, size_t _Size, typename = void_t<>>
989  struct _SimdWrapper;
990 
991 template <typename _Tp>
992  using _SimdWrapper8 = _SimdWrapper<_Tp, 8 / sizeof(_Tp)>;
993 template <typename _Tp>
994  using _SimdWrapper16 = _SimdWrapper<_Tp, 16 / sizeof(_Tp)>;
995 template <typename _Tp>
996  using _SimdWrapper32 = _SimdWrapper<_Tp, 32 / sizeof(_Tp)>;
997 template <typename _Tp>
998  using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>;
999 
1000 // }}}
1001 // __is_simd_wrapper {{{
1002 template <typename _Tp>
1003  struct __is_simd_wrapper : false_type {};
1004 
1005 template <typename _Tp, size_t _Np>
1006  struct __is_simd_wrapper<_SimdWrapper<_Tp, _Np>> : true_type {};
1007 
1008 template <typename _Tp>
1009  inline constexpr bool __is_simd_wrapper_v = __is_simd_wrapper<_Tp>::value;
1010 
1011 // }}}
1012 // _BitOps {{{
1013 struct _BitOps
1014 {
1015  // _S_bit_iteration {{{
1016  template <typename _Tp, typename _Fp>
1017  static void
1018  _S_bit_iteration(_Tp __mask, _Fp&& __f)
1019  {
1020  static_assert(sizeof(_ULLong) >= sizeof(_Tp));
1021  conditional_t<sizeof(_Tp) <= sizeof(_UInt), _UInt, _ULLong> __k;
1022  if constexpr (is_convertible_v<_Tp, decltype(__k)>)
1023  __k = __mask;
1024  else
1025  __k = __mask.to_ullong();
1026  while(__k)
1027  {
1028  __f(std::__countr_zero(__k));
1029  __k &= (__k - 1);
1030  }
1031  }
1032 
1033  //}}}
1034 };
1035 
1036 //}}}
1037 // __increment, __decrement {{{
1038 template <typename _Tp = void>
1039  struct __increment
1040  { constexpr _Tp operator()(_Tp __a) const { return ++__a; } };
1041 
1042 template <>
1043  struct __increment<void>
1044  {
1045  template <typename _Tp>
1046  constexpr _Tp
1047  operator()(_Tp __a) const
1048  { return ++__a; }
1049  };
1050 
1051 template <typename _Tp = void>
1052  struct __decrement
1053  { constexpr _Tp operator()(_Tp __a) const { return --__a; } };
1054 
1055 template <>
1056  struct __decrement<void>
1057  {
1058  template <typename _Tp>
1059  constexpr _Tp
1060  operator()(_Tp __a) const
1061  { return --__a; }
1062  };
1063 
1064 // }}}
1065 // _ValuePreserving(OrInt) {{{
1066 template <typename _From, typename _To,
1067  typename = enable_if_t<negation<
1068  __is_narrowing_conversion<__remove_cvref_t<_From>, _To>>::value>>
1069  using _ValuePreserving = _From;
1070 
1071 template <typename _From, typename _To,
1072  typename _DecayedFrom = __remove_cvref_t<_From>,
1073  typename = enable_if_t<conjunction<
1074  is_convertible<_From, _To>,
1075  disjunction<
1076  is_same<_DecayedFrom, _To>, is_same<_DecayedFrom, int>,
1077  conjunction<is_same<_DecayedFrom, _UInt>, is_unsigned<_To>>,
1078  negation<__is_narrowing_conversion<_DecayedFrom, _To>>>>::value>>
1079  using _ValuePreservingOrInt = _From;
1080 
1081 // }}}
1082 // __intrinsic_type {{{
1083 template <typename _Tp, size_t _Bytes, typename = void_t<>>
1084  struct __intrinsic_type;
1085 
1086 template <typename _Tp, size_t _Size>
1087  using __intrinsic_type_t =
1088  typename __intrinsic_type<_Tp, _Size * sizeof(_Tp)>::type;
1089 
1090 template <typename _Tp>
1091  using __intrinsic_type2_t = typename __intrinsic_type<_Tp, 2>::type;
1092 template <typename _Tp>
1093  using __intrinsic_type4_t = typename __intrinsic_type<_Tp, 4>::type;
1094 template <typename _Tp>
1095  using __intrinsic_type8_t = typename __intrinsic_type<_Tp, 8>::type;
1096 template <typename _Tp>
1097  using __intrinsic_type16_t = typename __intrinsic_type<_Tp, 16>::type;
1098 template <typename _Tp>
1099  using __intrinsic_type32_t = typename __intrinsic_type<_Tp, 32>::type;
1100 template <typename _Tp>
1101  using __intrinsic_type64_t = typename __intrinsic_type<_Tp, 64>::type;
1102 
1103 // }}}
1104 // _BitMask {{{
1105 template <size_t _Np, bool _Sanitized = false>
1106  struct _BitMask;
1107 
1108 template <size_t _Np, bool _Sanitized>
1109  struct __is_bitmask<_BitMask<_Np, _Sanitized>, void> : true_type {};
1110 
1111 template <size_t _Np>
1112  using _SanitizedBitMask = _BitMask<_Np, true>;
1113 
1114 template <size_t _Np, bool _Sanitized>
1115  struct _BitMask
1116  {
1117  static_assert(_Np > 0);
1118 
1119  static constexpr size_t _NBytes = __div_roundup(_Np, __CHAR_BIT__);
1120 
1121  using _Tp = conditional_t<_Np == 1, bool,
1122  make_unsigned_t<__int_with_sizeof_t<std::min(
1123  sizeof(_ULLong), std::__bit_ceil(_NBytes))>>>;
1124 
1125  static constexpr int _S_array_size = __div_roundup(_NBytes, sizeof(_Tp));
1126 
1127  _Tp _M_bits[_S_array_size];
1128 
1129  static constexpr int _S_unused_bits
1130  = _Np == 1 ? 0 : _S_array_size * sizeof(_Tp) * __CHAR_BIT__ - _Np;
1131 
1132  static constexpr _Tp _S_bitmask = +_Tp(~_Tp()) >> _S_unused_bits;
1133 
1134  constexpr _BitMask() noexcept = default;
1135 
1136  constexpr _BitMask(unsigned long long __x) noexcept
1137  : _M_bits{static_cast<_Tp>(__x)} {}
1138 
1139  _BitMask(bitset<_Np> __x) noexcept : _BitMask(__x.to_ullong()) {}
1140 
1141  constexpr _BitMask(const _BitMask&) noexcept = default;
1142 
1143  template <bool _RhsSanitized, typename = enable_if_t<_RhsSanitized == false
1144  && _Sanitized == true>>
1145  constexpr _BitMask(const _BitMask<_Np, _RhsSanitized>& __rhs) noexcept
1146  : _BitMask(__rhs._M_sanitized()) {}
1147 
1148  constexpr operator _SimdWrapper<bool, _Np>() const noexcept
1149  {
1150  static_assert(_S_array_size == 1);
1151  return _M_bits[0];
1152  }
1153 
1154  // precondition: is sanitized
1155  constexpr _Tp
1156  _M_to_bits() const noexcept
1157  {
1158  static_assert(_S_array_size == 1);
1159  return _M_bits[0];
1160  }
1161 
1162  // precondition: is sanitized
1163  constexpr unsigned long long
1164  to_ullong() const noexcept
1165  {
1166  static_assert(_S_array_size == 1);
1167  return _M_bits[0];
1168  }
1169 
1170  // precondition: is sanitized
1171  constexpr unsigned long
1172  to_ulong() const noexcept
1173  {
1174  static_assert(_S_array_size == 1);
1175  return _M_bits[0];
1176  }
1177 
1178  constexpr bitset<_Np>
1179  _M_to_bitset() const noexcept
1180  {
1181  static_assert(_S_array_size == 1);
1182  return _M_bits[0];
1183  }
1184 
1185  constexpr decltype(auto)
1186  _M_sanitized() const noexcept
1187  {
1188  if constexpr (_Sanitized)
1189  return *this;
1190  else if constexpr (_Np == 1)
1191  return _SanitizedBitMask<_Np>(_M_bits[0]);
1192  else
1193  {
1194  _SanitizedBitMask<_Np> __r = {};
1195  for (int __i = 0; __i < _S_array_size; ++__i)
1196  __r._M_bits[__i] = _M_bits[__i];
1197  if constexpr (_S_unused_bits > 0)
1198  __r._M_bits[_S_array_size - 1] &= _S_bitmask;
1199  return __r;
1200  }
1201  }
1202 
1203  template <size_t _Mp, bool _LSanitized>
1204  constexpr _BitMask<_Np + _Mp, _Sanitized>
1205  _M_prepend(_BitMask<_Mp, _LSanitized> __lsb) const noexcept
1206  {
1207  constexpr size_t _RN = _Np + _Mp;
1208  using _Rp = _BitMask<_RN, _Sanitized>;
1209  if constexpr (_Rp::_S_array_size == 1)
1210  {
1211  _Rp __r{{_M_bits[0]}};
1212  __r._M_bits[0] <<= _Mp;
1213  __r._M_bits[0] |= __lsb._M_sanitized()._M_bits[0];
1214  return __r;
1215  }
1216  else
1217  __assert_unreachable<_Rp>();
1218  }
1219 
1220  // Return a new _BitMask with size _NewSize while dropping _DropLsb least
1221  // significant bits. If the operation implicitly produces a sanitized bitmask,
1222  // the result type will have _Sanitized set.
1223  template <size_t _DropLsb, size_t _NewSize = _Np - _DropLsb>
1224  constexpr auto
1225  _M_extract() const noexcept
1226  {
1227  static_assert(_Np > _DropLsb);
1228  static_assert(_DropLsb + _NewSize <= sizeof(_ULLong) * __CHAR_BIT__,
1229  "not implemented for bitmasks larger than one ullong");
1230  if constexpr (_NewSize == 1)
1231  // must sanitize because the return _Tp is bool
1232  return _SanitizedBitMask<1>(_M_bits[0] & (_Tp(1) << _DropLsb));
1233  else
1234  return _BitMask<_NewSize,
1235  ((_NewSize + _DropLsb == sizeof(_Tp) * __CHAR_BIT__
1236  && _NewSize + _DropLsb <= _Np)
1237  || ((_Sanitized || _Np == sizeof(_Tp) * __CHAR_BIT__)
1238  && _NewSize + _DropLsb >= _Np))>(_M_bits[0]
1239  >> _DropLsb);
1240  }
1241 
1242  // True if all bits are set. Implicitly sanitizes if _Sanitized == false.
1243  constexpr bool
1244  all() const noexcept
1245  {
1246  if constexpr (_Np == 1)
1247  return _M_bits[0];
1248  else if constexpr (!_Sanitized)
1249  return _M_sanitized().all();
1250  else
1251  {
1252  constexpr _Tp __allbits = ~_Tp();
1253  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1254  if (_M_bits[__i] != __allbits)
1255  return false;
1256  return _M_bits[_S_array_size - 1] == _S_bitmask;
1257  }
1258  }
1259 
1260  // True if at least one bit is set. Implicitly sanitizes if _Sanitized ==
1261  // false.
1262  constexpr bool
1263  any() const noexcept
1264  {
1265  if constexpr (_Np == 1)
1266  return _M_bits[0];
1267  else if constexpr (!_Sanitized)
1268  return _M_sanitized().any();
1269  else
1270  {
1271  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1272  if (_M_bits[__i] != 0)
1273  return true;
1274  return _M_bits[_S_array_size - 1] != 0;
1275  }
1276  }
1277 
1278  // True if no bit is set. Implicitly sanitizes if _Sanitized == false.
1279  constexpr bool
1280  none() const noexcept
1281  {
1282  if constexpr (_Np == 1)
1283  return !_M_bits[0];
1284  else if constexpr (!_Sanitized)
1285  return _M_sanitized().none();
1286  else
1287  {
1288  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1289  if (_M_bits[__i] != 0)
1290  return false;
1291  return _M_bits[_S_array_size - 1] == 0;
1292  }
1293  }
1294 
1295  // Returns the number of set bits. Implicitly sanitizes if _Sanitized ==
1296  // false.
1297  constexpr int
1298  count() const noexcept
1299  {
1300  if constexpr (_Np == 1)
1301  return _M_bits[0];
1302  else if constexpr (!_Sanitized)
1303  return _M_sanitized().none();
1304  else
1305  {
1306  int __result = __builtin_popcountll(_M_bits[0]);
1307  for (int __i = 1; __i < _S_array_size; ++__i)
1308  __result += __builtin_popcountll(_M_bits[__i]);
1309  return __result;
1310  }
1311  }
1312 
1313  // Returns the bit at offset __i as bool.
1314  constexpr bool
1315  operator[](size_t __i) const noexcept
1316  {
1317  if constexpr (_Np == 1)
1318  return _M_bits[0];
1319  else if constexpr (_S_array_size == 1)
1320  return (_M_bits[0] >> __i) & 1;
1321  else
1322  {
1323  const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1324  const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1325  return (_M_bits[__j] >> __shift) & 1;
1326  }
1327  }
1328 
1329  template <size_t __i>
1330  constexpr bool
1331  operator[](_SizeConstant<__i>) const noexcept
1332  {
1333  static_assert(__i < _Np);
1334  constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1335  constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1336  return static_cast<bool>(_M_bits[__j] & (_Tp(1) << __shift));
1337  }
1338 
1339  // Set the bit at offset __i to __x.
1340  constexpr void
1341  set(size_t __i, bool __x) noexcept
1342  {
1343  if constexpr (_Np == 1)
1344  _M_bits[0] = __x;
1345  else if constexpr (_S_array_size == 1)
1346  {
1347  _M_bits[0] &= ~_Tp(_Tp(1) << __i);
1348  _M_bits[0] |= _Tp(_Tp(__x) << __i);
1349  }
1350  else
1351  {
1352  const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1353  const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1354  _M_bits[__j] &= ~_Tp(_Tp(1) << __shift);
1355  _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1356  }
1357  }
1358 
1359  template <size_t __i>
1360  constexpr void
1361  set(_SizeConstant<__i>, bool __x) noexcept
1362  {
1363  static_assert(__i < _Np);
1364  if constexpr (_Np == 1)
1365  _M_bits[0] = __x;
1366  else
1367  {
1368  constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1369  constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1370  constexpr _Tp __mask = ~_Tp(_Tp(1) << __shift);
1371  _M_bits[__j] &= __mask;
1372  _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1373  }
1374  }
1375 
1376  // Inverts all bits. Sanitized input leads to sanitized output.
1377  constexpr _BitMask
1378  operator~() const noexcept
1379  {
1380  if constexpr (_Np == 1)
1381  return !_M_bits[0];
1382  else
1383  {
1384  _BitMask __result{};
1385  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1386  __result._M_bits[__i] = ~_M_bits[__i];
1387  if constexpr (_Sanitized)
1388  __result._M_bits[_S_array_size - 1]
1389  = _M_bits[_S_array_size - 1] ^ _S_bitmask;
1390  else
1391  __result._M_bits[_S_array_size - 1] = ~_M_bits[_S_array_size - 1];
1392  return __result;
1393  }
1394  }
1395 
1396  constexpr _BitMask&
1397  operator^=(const _BitMask& __b) & noexcept
1398  {
1399  __execute_n_times<_S_array_size>(
1400  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] ^= __b._M_bits[__i]; });
1401  return *this;
1402  }
1403 
1404  constexpr _BitMask&
1405  operator|=(const _BitMask& __b) & noexcept
1406  {
1407  __execute_n_times<_S_array_size>(
1408  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] |= __b._M_bits[__i]; });
1409  return *this;
1410  }
1411 
1412  constexpr _BitMask&
1413  operator&=(const _BitMask& __b) & noexcept
1414  {
1415  __execute_n_times<_S_array_size>(
1416  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] &= __b._M_bits[__i]; });
1417  return *this;
1418  }
1419 
1420  friend constexpr _BitMask
1421  operator^(const _BitMask& __a, const _BitMask& __b) noexcept
1422  {
1423  _BitMask __r = __a;
1424  __r ^= __b;
1425  return __r;
1426  }
1427 
1428  friend constexpr _BitMask
1429  operator|(const _BitMask& __a, const _BitMask& __b) noexcept
1430  {
1431  _BitMask __r = __a;
1432  __r |= __b;
1433  return __r;
1434  }
1435 
1436  friend constexpr _BitMask
1437  operator&(const _BitMask& __a, const _BitMask& __b) noexcept
1438  {
1439  _BitMask __r = __a;
1440  __r &= __b;
1441  return __r;
1442  }
1443 
1444  _GLIBCXX_SIMD_INTRINSIC
1445  constexpr bool
1446  _M_is_constprop() const
1447  {
1448  if constexpr (_S_array_size == 0)
1449  return __builtin_constant_p(_M_bits[0]);
1450  else
1451  {
1452  for (int __i = 0; __i < _S_array_size; ++__i)
1453  if (!__builtin_constant_p(_M_bits[__i]))
1454  return false;
1455  return true;
1456  }
1457  }
1458  };
1459 
1460 // }}}
1461 
1462 // vvv ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- vvv
1463 // __min_vector_size {{{
1464 template <typename _Tp = void>
1465  static inline constexpr int __min_vector_size = 2 * sizeof(_Tp);
1466 
1467 #if _GLIBCXX_SIMD_HAVE_NEON
1468 template <>
1469  inline constexpr int __min_vector_size<void> = 8;
1470 #else
1471 template <>
1472  inline constexpr int __min_vector_size<void> = 16;
1473 #endif
1474 
1475 // }}}
1476 // __vector_type {{{
1477 template <typename _Tp, size_t _Np, typename = void>
1478  struct __vector_type_n {};
1479 
1480 // substition failure for 0-element case
1481 template <typename _Tp>
1482  struct __vector_type_n<_Tp, 0, void> {};
1483 
1484 // special case 1-element to be _Tp itself
1485 template <typename _Tp>
1486  struct __vector_type_n<_Tp, 1, enable_if_t<__is_vectorizable_v<_Tp>>>
1487  { using type = _Tp; };
1488 
1489 // else, use GNU-style builtin vector types
1490 template <typename _Tp, size_t _Np>
1491  struct __vector_type_n<_Tp, _Np, enable_if_t<__is_vectorizable_v<_Tp> && _Np >= 2>>
1492  {
1493  static constexpr size_t _S_Np2 = std::__bit_ceil(_Np * sizeof(_Tp));
1494 
1495  static constexpr size_t _S_Bytes =
1496 #ifdef __i386__
1497  // Using [[gnu::vector_size(8)]] would wreak havoc on the FPU because
1498  // those objects are passed via MMX registers and nothing ever calls EMMS.
1499  _S_Np2 == 8 ? 16 :
1500 #endif
1501  _S_Np2 < __min_vector_size<_Tp> ? __min_vector_size<_Tp>
1502  : _S_Np2;
1503 
1504  using type [[__gnu__::__vector_size__(_S_Bytes)]] = _Tp;
1505  };
1506 
1507 template <typename _Tp, size_t _Bytes, size_t = _Bytes % sizeof(_Tp)>
1508  struct __vector_type;
1509 
1510 template <typename _Tp, size_t _Bytes>
1511  struct __vector_type<_Tp, _Bytes, 0>
1512  : __vector_type_n<_Tp, _Bytes / sizeof(_Tp)> {};
1513 
1514 template <typename _Tp, size_t _Size>
1515  using __vector_type_t = typename __vector_type_n<_Tp, _Size>::type;
1516 
1517 template <typename _Tp>
1518  using __vector_type2_t = typename __vector_type<_Tp, 2>::type;
1519 template <typename _Tp>
1520  using __vector_type4_t = typename __vector_type<_Tp, 4>::type;
1521 template <typename _Tp>
1522  using __vector_type8_t = typename __vector_type<_Tp, 8>::type;
1523 template <typename _Tp>
1524  using __vector_type16_t = typename __vector_type<_Tp, 16>::type;
1525 template <typename _Tp>
1526  using __vector_type32_t = typename __vector_type<_Tp, 32>::type;
1527 template <typename _Tp>
1528  using __vector_type64_t = typename __vector_type<_Tp, 64>::type;
1529 
1530 // }}}
1531 // __is_vector_type {{{
1532 template <typename _Tp, typename = void_t<>>
1533  struct __is_vector_type : false_type {};
1534 
1535 template <typename _Tp>
1536  struct __is_vector_type<
1537  _Tp, void_t<typename __vector_type<
1538  remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1539  : is_same<_Tp, typename __vector_type<
1540  remove_reference_t<decltype(declval<_Tp>()[0])>,
1541  sizeof(_Tp)>::type> {};
1542 
1543 template <typename _Tp>
1544  inline constexpr bool __is_vector_type_v = __is_vector_type<_Tp>::value;
1545 
1546 // }}}
1547 // __is_intrinsic_type {{{
1548 #if _GLIBCXX_SIMD_HAVE_SSE_ABI
1549 template <typename _Tp>
1550  using __is_intrinsic_type = __is_vector_type<_Tp>;
1551 #else // not SSE (x86)
1552 template <typename _Tp, typename = void_t<>>
1553  struct __is_intrinsic_type : false_type {};
1554 
1555 template <typename _Tp>
1556  struct __is_intrinsic_type<
1557  _Tp, void_t<typename __intrinsic_type<
1558  remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1559  : is_same<_Tp, typename __intrinsic_type<
1560  remove_reference_t<decltype(declval<_Tp>()[0])>,
1561  sizeof(_Tp)>::type> {};
1562 #endif
1563 
1564 template <typename _Tp>
1565  inline constexpr bool __is_intrinsic_type_v = __is_intrinsic_type<_Tp>::value;
1566 
1567 // }}}
1568 // _VectorTraits{{{
1569 template <typename _Tp, typename = void_t<>>
1570  struct _VectorTraitsImpl;
1571 
1572 template <typename _Tp>
1573  struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp>
1574  || __is_intrinsic_type_v<_Tp>>>
1575  {
1576  using type = _Tp;
1577  using value_type = remove_reference_t<decltype(declval<_Tp>()[0])>;
1578  static constexpr int _S_full_size = sizeof(_Tp) / sizeof(value_type);
1579  using _Wrapper = _SimdWrapper<value_type, _S_full_size>;
1580  template <typename _Up, int _W = _S_full_size>
1581  static constexpr bool _S_is
1582  = is_same_v<value_type, _Up> && _W == _S_full_size;
1583  };
1584 
1585 template <typename _Tp, size_t _Np>
1586  struct _VectorTraitsImpl<_SimdWrapper<_Tp, _Np>,
1587  void_t<__vector_type_t<_Tp, _Np>>>
1588  {
1589  using type = __vector_type_t<_Tp, _Np>;
1590  using value_type = _Tp;
1591  static constexpr int _S_full_size = sizeof(type) / sizeof(value_type);
1592  using _Wrapper = _SimdWrapper<_Tp, _Np>;
1593  static constexpr bool _S_is_partial = (_Np == _S_full_size);
1594  static constexpr int _S_partial_width = _Np;
1595  template <typename _Up, int _W = _S_full_size>
1596  static constexpr bool _S_is
1597  = is_same_v<value_type, _Up>&& _W == _S_full_size;
1598  };
1599 
1600 template <typename _Tp, typename = typename _VectorTraitsImpl<_Tp>::type>
1601  using _VectorTraits = _VectorTraitsImpl<_Tp>;
1602 
1603 // }}}
1604 // __as_vector{{{
1605 template <typename _V>
1606  _GLIBCXX_SIMD_INTRINSIC constexpr auto
1607  __as_vector(_V __x)
1608  {
1609  if constexpr (__is_vector_type_v<_V>)
1610  return __x;
1611  else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1612  {
1613  if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
1614  {
1615  static_assert(is_simd<_V>::value);
1616  static_assert(_V::abi_type::template __traits<
1617  typename _V::value_type>::_SimdMember::_S_tuple_size == 1);
1618  return __as_vector(__data(__x).first);
1619  }
1620  else if constexpr (_V::size() > 1)
1621  return __data(__x)._M_data;
1622  else
1623  {
1624  static_assert(is_simd<_V>::value);
1625  using _Tp = typename _V::value_type;
1626 #ifdef __i386__
1627  constexpr auto __bytes = sizeof(_Tp) == 8 ? 16 : sizeof(_Tp);
1628  using _RV [[__gnu__::__vector_size__(__bytes)]] = _Tp;
1629 #else
1630  using _RV [[__gnu__::__vector_size__(sizeof(_Tp))]] = _Tp;
1631 #endif
1632  return _RV{__data(__x)};
1633  }
1634  }
1635  else if constexpr (__is_vectorizable_v<_V>)
1636  return __vector_type_t<_V, 2>{__x};
1637  else
1638  return __x._M_data;
1639  }
1640 
1641 // }}}
1642 // __as_wrapper{{{
1643 template <size_t _Np = 0, typename _V>
1644  _GLIBCXX_SIMD_INTRINSIC constexpr auto
1645  __as_wrapper(_V __x)
1646  {
1647  if constexpr (__is_vector_type_v<_V>)
1648  return _SimdWrapper<typename _VectorTraits<_V>::value_type,
1649  (_Np > 0 ? _Np : _VectorTraits<_V>::_S_full_size)>(__x);
1650  else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1651  {
1652  static_assert(_V::size() == _Np);
1653  return __data(__x);
1654  }
1655  else
1656  {
1657  static_assert(_V::_S_size == _Np);
1658  return __x;
1659  }
1660  }
1661 
1662 // }}}
1663 // __intrin_bitcast{{{
1664 template <typename _To, typename _From>
1665  _GLIBCXX_SIMD_INTRINSIC constexpr _To
1666  __intrin_bitcast(_From __v)
1667  {
1668  static_assert((__is_vector_type_v<_From> || __is_intrinsic_type_v<_From>)
1669  && (__is_vector_type_v<_To> || __is_intrinsic_type_v<_To>));
1670  if constexpr (sizeof(_To) == sizeof(_From))
1671  return reinterpret_cast<_To>(__v);
1672  else if constexpr (sizeof(_From) > sizeof(_To))
1673  if constexpr (sizeof(_To) >= 16)
1674  return reinterpret_cast<const __may_alias<_To>&>(__v);
1675  else
1676  {
1677  _To __r;
1678  __builtin_memcpy(&__r, &__v, sizeof(_To));
1679  return __r;
1680  }
1681 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1682  else if constexpr (__have_avx && sizeof(_From) == 16 && sizeof(_To) == 32)
1683  return reinterpret_cast<_To>(__builtin_ia32_ps256_ps(
1684  reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1685  else if constexpr (__have_avx512f && sizeof(_From) == 16
1686  && sizeof(_To) == 64)
1687  return reinterpret_cast<_To>(__builtin_ia32_ps512_ps(
1688  reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1689  else if constexpr (__have_avx512f && sizeof(_From) == 32
1690  && sizeof(_To) == 64)
1691  return reinterpret_cast<_To>(__builtin_ia32_ps512_256ps(
1692  reinterpret_cast<__vector_type_t<float, 8>>(__v)));
1693 #endif // _GLIBCXX_SIMD_X86INTRIN
1694  else if constexpr (sizeof(__v) <= 8)
1695  return reinterpret_cast<_To>(
1696  __vector_type_t<__int_for_sizeof_t<_From>, sizeof(_To) / sizeof(_From)>{
1697  reinterpret_cast<__int_for_sizeof_t<_From>>(__v)});
1698  else
1699  {
1700  static_assert(sizeof(_To) > sizeof(_From));
1701  _To __r = {};
1702  __builtin_memcpy(&__r, &__v, sizeof(_From));
1703  return __r;
1704  }
1705  }
1706 
1707 // }}}
1708 // __vector_bitcast{{{
1709 template <typename _To, size_t _NN = 0, typename _From,
1710  typename _FromVT = _VectorTraits<_From>,
1711  size_t _Np = _NN == 0 ? sizeof(_From) / sizeof(_To) : _NN>
1712  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1713  __vector_bitcast(_From __x)
1714  {
1715  using _R = __vector_type_t<_To, _Np>;
1716  return __intrin_bitcast<_R>(__x);
1717  }
1718 
1719 template <typename _To, size_t _NN = 0, typename _Tp, size_t _Nx,
1720  size_t _Np
1721  = _NN == 0 ? sizeof(_SimdWrapper<_Tp, _Nx>) / sizeof(_To) : _NN>
1722  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1723  __vector_bitcast(const _SimdWrapper<_Tp, _Nx>& __x)
1724  {
1725  static_assert(_Np > 1);
1726  return __intrin_bitcast<__vector_type_t<_To, _Np>>(__x._M_data);
1727  }
1728 
1729 // }}}
1730 // __convert_x86 declarations {{{
1731 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
1732 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1733  _To __convert_x86(_Tp);
1734 
1735 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1736  _To __convert_x86(_Tp, _Tp);
1737 
1738 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1739  _To __convert_x86(_Tp, _Tp, _Tp, _Tp);
1740 
1741 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1742  _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp);
1743 
1744 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1745  _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp,
1746  _Tp, _Tp, _Tp, _Tp);
1747 #endif // _GLIBCXX_SIMD_WORKAROUND_PR85048
1748 
1749 //}}}
1750 // __bit_cast {{{
1751 template <typename _To, typename _From>
1752  _GLIBCXX_SIMD_INTRINSIC constexpr _To
1753  __bit_cast(const _From __x)
1754  {
1755 #if __has_builtin(__builtin_bit_cast)
1756  return __builtin_bit_cast(_To, __x);
1757 #else
1758  static_assert(sizeof(_To) == sizeof(_From));
1759  constexpr bool __to_is_vectorizable
1760  = is_arithmetic_v<_To> || is_enum_v<_To>;
1761  constexpr bool __from_is_vectorizable
1762  = is_arithmetic_v<_From> || is_enum_v<_From>;
1763  if constexpr (__is_vector_type_v<_To> && __is_vector_type_v<_From>)
1764  return reinterpret_cast<_To>(__x);
1765  else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable)
1766  {
1767  using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
1768  return reinterpret_cast<_To>(_FV{__x});
1769  }
1770  else if constexpr (__to_is_vectorizable && __from_is_vectorizable)
1771  {
1772  using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
1773  using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
1774  return reinterpret_cast<_TV>(_FV{__x})[0];
1775  }
1776  else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>)
1777  {
1778  using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
1779  return reinterpret_cast<_TV>(__x)[0];
1780  }
1781  else
1782  {
1783  _To __r;
1784  __builtin_memcpy(reinterpret_cast<char*>(&__r),
1785  reinterpret_cast<const char*>(&__x), sizeof(_To));
1786  return __r;
1787  }
1788 #endif
1789  }
1790 
1791 // }}}
1792 // __to_intrin {{{
1793 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
1794  typename _R = __intrinsic_type_t<typename _TVT::value_type, _TVT::_S_full_size>>
1795  _GLIBCXX_SIMD_INTRINSIC constexpr _R
1796  __to_intrin(_Tp __x)
1797  {
1798  static_assert(sizeof(__x) <= sizeof(_R),
1799  "__to_intrin may never drop values off the end");
1800  if constexpr (sizeof(__x) == sizeof(_R))
1801  return reinterpret_cast<_R>(__as_vector(__x));
1802  else
1803  {
1804  using _Up = __int_for_sizeof_t<_Tp>;
1805  return reinterpret_cast<_R>(
1806  __vector_type_t<_Up, sizeof(_R) / sizeof(_Up)>{__bit_cast<_Up>(__x)});
1807  }
1808  }
1809 
1810 // }}}
1811 // __make_vector{{{
1812 template <typename _Tp, typename... _Args>
1813  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, sizeof...(_Args)>
1814  __make_vector(const _Args&... __args)
1815  { return __vector_type_t<_Tp, sizeof...(_Args)>{static_cast<_Tp>(__args)...}; }
1816 
1817 // }}}
1818 // __vector_broadcast{{{
1819 template <size_t _Np, typename _Tp, size_t... _I>
1820  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1821  __vector_broadcast_impl(_Tp __x, index_sequence<_I...>)
1822  { return __vector_type_t<_Tp, _Np>{((void)_I, __x)...}; }
1823 
1824 template <size_t _Np, typename _Tp>
1825  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1826  __vector_broadcast(_Tp __x)
1827  { return __vector_broadcast_impl<_Np, _Tp>(__x, make_index_sequence<_Np>()); }
1828 
1829 // }}}
1830 // __generate_vector{{{
1831  template <typename _Tp, size_t _Np, typename _Gp, size_t... _I>
1832  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1833  __generate_vector_impl(_Gp&& __gen, index_sequence<_I...>)
1834  { return __vector_type_t<_Tp, _Np>{ static_cast<_Tp>(__gen(_SizeConstant<_I>()))...}; }
1835 
1836 template <typename _V, typename _VVT = _VectorTraits<_V>, typename _Gp>
1837  _GLIBCXX_SIMD_INTRINSIC constexpr _V
1838  __generate_vector(_Gp&& __gen)
1839  {
1840  if constexpr (__is_vector_type_v<_V>)
1841  return __generate_vector_impl<typename _VVT::value_type,
1842  _VVT::_S_full_size>(
1843  static_cast<_Gp&&>(__gen), make_index_sequence<_VVT::_S_full_size>());
1844  else
1845  return __generate_vector_impl<typename _VVT::value_type,
1846  _VVT::_S_partial_width>(
1847  static_cast<_Gp&&>(__gen),
1848  make_index_sequence<_VVT::_S_partial_width>());
1849  }
1850 
1851 template <typename _Tp, size_t _Np, typename _Gp>
1852  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1853  __generate_vector(_Gp&& __gen)
1854  {
1855  return __generate_vector_impl<_Tp, _Np>(static_cast<_Gp&&>(__gen),
1856  make_index_sequence<_Np>());
1857  }
1858 
1859 // }}}
1860 // __xor{{{
1861 template <typename _TW>
1862  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1863  __xor(_TW __a, _TW __b) noexcept
1864  {
1865  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1866  {
1867  using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1868  _VectorTraitsImpl<_TW>>::value_type;
1869  if constexpr (is_floating_point_v<_Tp>)
1870  {
1871  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1872  return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1873  ^ __vector_bitcast<_Ip>(__b));
1874  }
1875  else if constexpr (__is_vector_type_v<_TW>)
1876  return __a ^ __b;
1877  else
1878  return __a._M_data ^ __b._M_data;
1879  }
1880  else
1881  return __a ^ __b;
1882  }
1883 
1884 // }}}
1885 // __or{{{
1886 template <typename _TW>
1887  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1888  __or(_TW __a, _TW __b) noexcept
1889  {
1890  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1891  {
1892  using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1893  _VectorTraitsImpl<_TW>>::value_type;
1894  if constexpr (is_floating_point_v<_Tp>)
1895  {
1896  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1897  return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1898  | __vector_bitcast<_Ip>(__b));
1899  }
1900  else if constexpr (__is_vector_type_v<_TW>)
1901  return __a | __b;
1902  else
1903  return __a._M_data | __b._M_data;
1904  }
1905  else
1906  return __a | __b;
1907  }
1908 
1909 // }}}
1910 // __and{{{
1911 template <typename _TW>
1912  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1913  __and(_TW __a, _TW __b) noexcept
1914  {
1915  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1916  {
1917  using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1918  _VectorTraitsImpl<_TW>>::value_type;
1919  if constexpr (is_floating_point_v<_Tp>)
1920  {
1921  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1922  return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1923  & __vector_bitcast<_Ip>(__b));
1924  }
1925  else if constexpr (__is_vector_type_v<_TW>)
1926  return __a & __b;
1927  else
1928  return __a._M_data & __b._M_data;
1929  }
1930  else
1931  return __a & __b;
1932  }
1933 
1934 // }}}
1935 // __andnot{{{
1936 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1937 static constexpr struct
1938 {
1939  _GLIBCXX_SIMD_INTRINSIC __v4sf
1940  operator()(__v4sf __a, __v4sf __b) const noexcept
1941  { return __builtin_ia32_andnps(__a, __b); }
1942 
1943  _GLIBCXX_SIMD_INTRINSIC __v2df
1944  operator()(__v2df __a, __v2df __b) const noexcept
1945  { return __builtin_ia32_andnpd(__a, __b); }
1946 
1947  _GLIBCXX_SIMD_INTRINSIC __v2di
1948  operator()(__v2di __a, __v2di __b) const noexcept
1949  { return __builtin_ia32_pandn128(__a, __b); }
1950 
1951  _GLIBCXX_SIMD_INTRINSIC __v8sf
1952  operator()(__v8sf __a, __v8sf __b) const noexcept
1953  { return __builtin_ia32_andnps256(__a, __b); }
1954 
1955  _GLIBCXX_SIMD_INTRINSIC __v4df
1956  operator()(__v4df __a, __v4df __b) const noexcept
1957  { return __builtin_ia32_andnpd256(__a, __b); }
1958 
1959  _GLIBCXX_SIMD_INTRINSIC __v4di
1960  operator()(__v4di __a, __v4di __b) const noexcept
1961  {
1962  if constexpr (__have_avx2)
1963  return __builtin_ia32_andnotsi256(__a, __b);
1964  else
1965  return reinterpret_cast<__v4di>(
1966  __builtin_ia32_andnpd256(reinterpret_cast<__v4df>(__a),
1967  reinterpret_cast<__v4df>(__b)));
1968  }
1969 
1970  _GLIBCXX_SIMD_INTRINSIC __v16sf
1971  operator()(__v16sf __a, __v16sf __b) const noexcept
1972  {
1973  if constexpr (__have_avx512dq)
1974  return _mm512_andnot_ps(__a, __b);
1975  else
1976  return reinterpret_cast<__v16sf>(
1977  _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1978  reinterpret_cast<__v8di>(__b)));
1979  }
1980 
1981  _GLIBCXX_SIMD_INTRINSIC __v8df
1982  operator()(__v8df __a, __v8df __b) const noexcept
1983  {
1984  if constexpr (__have_avx512dq)
1985  return _mm512_andnot_pd(__a, __b);
1986  else
1987  return reinterpret_cast<__v8df>(
1988  _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1989  reinterpret_cast<__v8di>(__b)));
1990  }
1991 
1992  _GLIBCXX_SIMD_INTRINSIC __v8di
1993  operator()(__v8di __a, __v8di __b) const noexcept
1994  { return _mm512_andnot_si512(__a, __b); }
1995 } _S_x86_andnot;
1996 #endif // _GLIBCXX_SIMD_X86INTRIN && !__clang__
1997 
1998 template <typename _TW>
1999  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
2000  __andnot(_TW __a, _TW __b) noexcept
2001  {
2002  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
2003  {
2004  using _TVT = conditional_t<__is_simd_wrapper_v<_TW>, _TW,
2005  _VectorTraitsImpl<_TW>>;
2006  using _Tp = typename _TVT::value_type;
2007 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
2008  if constexpr (sizeof(_TW) >= 16)
2009  {
2010  const auto __ai = __to_intrin(__a);
2011  const auto __bi = __to_intrin(__b);
2012  if (!__builtin_is_constant_evaluated()
2013  && !(__builtin_constant_p(__ai) && __builtin_constant_p(__bi)))
2014  {
2015  const auto __r = _S_x86_andnot(__ai, __bi);
2016  if constexpr (is_convertible_v<decltype(__r), _TW>)
2017  return __r;
2018  else
2019  return reinterpret_cast<typename _TVT::type>(__r);
2020  }
2021  }
2022 #endif // _GLIBCXX_SIMD_X86INTRIN
2023  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
2024  return __vector_bitcast<_Tp>(~__vector_bitcast<_Ip>(__a)
2025  & __vector_bitcast<_Ip>(__b));
2026  }
2027  else
2028  return ~__a & __b;
2029  }
2030 
2031 // }}}
2032 // __not{{{
2033 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2034  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2035  __not(_Tp __a) noexcept
2036  {
2037  if constexpr (is_floating_point_v<typename _TVT::value_type>)
2038  return reinterpret_cast<typename _TVT::type>(
2039  ~__vector_bitcast<unsigned>(__a));
2040  else
2041  return ~__a;
2042  }
2043 
2044 // }}}
2045 // __vec_shuffle{{{
2046 template <typename _T0, typename _T1, typename _Fun, size_t... _Is>
2047  _GLIBCXX_SIMD_INTRINSIC constexpr
2048  __vector_type_t<remove_reference_t<decltype(declval<_T0>()[0])>, sizeof...(_Is)>
2049  __vec_shuffle(_T0 __x, _T1 __y, index_sequence<_Is...> __seq, _Fun __idx_perm)
2050  {
2051  constexpr int _N0 = sizeof(__x) / sizeof(__x[0]);
2052  constexpr int _N1 = sizeof(__y) / sizeof(__y[0]);
2053  using _Tp = remove_reference_t<decltype(declval<_T0>()[0])>;
2054  using _RV [[maybe_unused]] = __vector_type_t<_Tp, sizeof...(_Is)>;
2055 #if __has_builtin(__builtin_shufflevector)
2056 #ifdef __clang__
2057  // Clang requires _T0 == _T1
2058  if constexpr (sizeof(__x) > sizeof(__y) and _N1 == 1)
2059  return __vec_shuffle(__x, _T0{__y[0]}, __seq, __idx_perm);
2060  else if constexpr (sizeof(__x) > sizeof(__y))
2061  return __vec_shuffle(__x, __intrin_bitcast<_T0>(__y), __seq, __idx_perm);
2062  else if constexpr (sizeof(__x) < sizeof(__y) and _N0 == 1)
2063  return __vec_shuffle(_T1{__x[0]}, __y, __seq, [=](int __i) {
2064  __i = __idx_perm(__i);
2065  return __i < _N0 ? __i : __i - _N0 + _N1;
2066  });
2067  else if constexpr (sizeof(__x) < sizeof(__y))
2068  return __vec_shuffle(__intrin_bitcast<_T1>(__x), __y, __seq, [=](int __i) {
2069  __i = __idx_perm(__i);
2070  return __i < _N0 ? __i : __i - _N0 + _N1;
2071  });
2072  else
2073 #endif
2074  {
2075  const auto __r = __builtin_shufflevector(__x, __y, [=] {
2076  constexpr int __j = __idx_perm(_Is);
2077  static_assert(__j < _N0 + _N1);
2078  return __j;
2079  }()...);
2080 #ifdef __i386__
2081  if constexpr (sizeof(__r) == sizeof(_RV))
2082  return __r;
2083  else
2084  return _RV {__r[_Is]...};
2085 #else
2086  return __r;
2087 #endif
2088  }
2089 #else
2090  return _RV {
2091  [=]() -> _Tp {
2092  constexpr int __j = __idx_perm(_Is);
2093  static_assert(__j < _N0 + _N1);
2094  if constexpr (__j < 0)
2095  return 0;
2096  else if constexpr (__j < _N0)
2097  return __x[__j];
2098  else
2099  return __y[__j - _N0];
2100  }()...
2101  };
2102 #endif
2103  }
2104 
2105 template <typename _T0, typename _Fun, typename _Seq>
2106  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2107  __vec_shuffle(_T0 __x, _Seq __seq, _Fun __idx_perm)
2108  { return __vec_shuffle(__x, _T0(), __seq, __idx_perm); }
2109 
2110 // }}}
2111 // __concat{{{
2112 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
2113  typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size * 2>>
2114  constexpr _R
2115  __concat(_Tp a_, _Tp b_)
2116  {
2117 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2118  using _W
2119  = conditional_t<is_floating_point_v<typename _TVT::value_type>, double,
2120  conditional_t<(sizeof(_Tp) >= 2 * sizeof(long long)),
2121  long long, typename _TVT::value_type>>;
2122  constexpr int input_width = sizeof(_Tp) / sizeof(_W);
2123  const auto __a = __vector_bitcast<_W>(a_);
2124  const auto __b = __vector_bitcast<_W>(b_);
2125  using _Up = __vector_type_t<_W, sizeof(_R) / sizeof(_W)>;
2126 #else
2127  constexpr int input_width = _TVT::_S_full_size;
2128  const _Tp& __a = a_;
2129  const _Tp& __b = b_;
2130  using _Up = _R;
2131 #endif
2132  if constexpr (input_width == 2)
2133  return reinterpret_cast<_R>(_Up{__a[0], __a[1], __b[0], __b[1]});
2134  else if constexpr (input_width == 4)
2135  return reinterpret_cast<_R>(
2136  _Up{__a[0], __a[1], __a[2], __a[3], __b[0], __b[1], __b[2], __b[3]});
2137  else if constexpr (input_width == 8)
2138  return reinterpret_cast<_R>(
2139  _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], __a[7],
2140  __b[0], __b[1], __b[2], __b[3], __b[4], __b[5], __b[6], __b[7]});
2141  else if constexpr (input_width == 16)
2142  return reinterpret_cast<_R>(
2143  _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
2144  __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
2145  __a[14], __a[15], __b[0], __b[1], __b[2], __b[3], __b[4],
2146  __b[5], __b[6], __b[7], __b[8], __b[9], __b[10], __b[11],
2147  __b[12], __b[13], __b[14], __b[15]});
2148  else if constexpr (input_width == 32)
2149  return reinterpret_cast<_R>(
2150  _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
2151  __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
2152  __a[14], __a[15], __a[16], __a[17], __a[18], __a[19], __a[20],
2153  __a[21], __a[22], __a[23], __a[24], __a[25], __a[26], __a[27],
2154  __a[28], __a[29], __a[30], __a[31], __b[0], __b[1], __b[2],
2155  __b[3], __b[4], __b[5], __b[6], __b[7], __b[8], __b[9],
2156  __b[10], __b[11], __b[12], __b[13], __b[14], __b[15], __b[16],
2157  __b[17], __b[18], __b[19], __b[20], __b[21], __b[22], __b[23],
2158  __b[24], __b[25], __b[26], __b[27], __b[28], __b[29], __b[30],
2159  __b[31]});
2160  }
2161 
2162 // }}}
2163 // __zero_extend {{{
2164 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2165  struct _ZeroExtendProxy
2166  {
2167  using value_type = typename _TVT::value_type;
2168  static constexpr size_t _Np = _TVT::_S_full_size;
2169  const _Tp __x;
2170 
2171  template <typename _To, typename _ToVT = _VectorTraits<_To>,
2172  typename
2173  = enable_if_t<is_same_v<typename _ToVT::value_type, value_type>>>
2174  _GLIBCXX_SIMD_INTRINSIC operator _To() const
2175  {
2176  constexpr size_t _ToN = _ToVT::_S_full_size;
2177  if constexpr (_ToN == _Np)
2178  return __x;
2179  else if constexpr (_ToN == 2 * _Np)
2180  {
2181 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
2182  if constexpr (__have_avx && _TVT::template _S_is<float, 4>)
2183  return __vector_bitcast<value_type>(
2184  _mm256_insertf128_ps(__m256(), __x, 0));
2185  else if constexpr (__have_avx && _TVT::template _S_is<double, 2>)
2186  return __vector_bitcast<value_type>(
2187  _mm256_insertf128_pd(__m256d(), __x, 0));
2188  else if constexpr (__have_avx2 && _Np * sizeof(value_type) == 16)
2189  return __vector_bitcast<value_type>(
2190  _mm256_insertf128_si256(__m256i(), __to_intrin(__x), 0));
2191  else if constexpr (__have_avx512f && _TVT::template _S_is<float, 8>)
2192  {
2193  if constexpr (__have_avx512dq)
2194  return __vector_bitcast<value_type>(
2195  _mm512_insertf32x8(__m512(), __x, 0));
2196  else
2197  return reinterpret_cast<__m512>(
2198  _mm512_insertf64x4(__m512d(),
2199  reinterpret_cast<__m256d>(__x), 0));
2200  }
2201  else if constexpr (__have_avx512f
2202  && _TVT::template _S_is<double, 4>)
2203  return __vector_bitcast<value_type>(
2204  _mm512_insertf64x4(__m512d(), __x, 0));
2205  else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 32)
2206  return __vector_bitcast<value_type>(
2207  _mm512_inserti64x4(__m512i(), __to_intrin(__x), 0));
2208 #endif
2209  return __concat(__x, _Tp());
2210  }
2211  else if constexpr (_ToN == 4 * _Np)
2212  {
2213 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
2214  if constexpr (__have_avx512dq && _TVT::template _S_is<double, 2>)
2215  {
2216  return __vector_bitcast<value_type>(
2217  _mm512_insertf64x2(__m512d(), __x, 0));
2218  }
2219  else if constexpr (__have_avx512f
2220  && is_floating_point_v<value_type>)
2221  {
2222  return __vector_bitcast<value_type>(
2223  _mm512_insertf32x4(__m512(), reinterpret_cast<__m128>(__x),
2224  0));
2225  }
2226  else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 16)
2227  {
2228  return __vector_bitcast<value_type>(
2229  _mm512_inserti32x4(__m512i(), __to_intrin(__x), 0));
2230  }
2231 #endif
2232  return __concat(__concat(__x, _Tp()),
2233  __vector_type_t<value_type, _Np * 2>());
2234  }
2235  else if constexpr (_ToN == 8 * _Np)
2236  return __concat(operator __vector_type_t<value_type, _Np * 4>(),
2237  __vector_type_t<value_type, _Np * 4>());
2238  else if constexpr (_ToN == 16 * _Np)
2239  return __concat(operator __vector_type_t<value_type, _Np * 8>(),
2240  __vector_type_t<value_type, _Np * 8>());
2241  else
2242  __assert_unreachable<_Tp>();
2243  }
2244  };
2245 
2246 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2247  _GLIBCXX_SIMD_INTRINSIC _ZeroExtendProxy<_Tp, _TVT>
2248  __zero_extend(_Tp __x)
2249  { return {__x}; }
2250 
2251 // }}}
2252 // __extract<_Np, By>{{{
2253 template <int _Offset,
2254  int _SplitBy,
2255  typename _Tp,
2256  typename _TVT = _VectorTraits<_Tp>,
2257  typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size / _SplitBy>>
2258  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2259  __extract(_Tp __in)
2260  {
2261  using value_type = typename _TVT::value_type;
2262 #if _GLIBCXX_SIMD_X86INTRIN // {{{
2263  if constexpr (sizeof(_Tp) == 64 && _SplitBy == 4 && _Offset > 0)
2264  {
2265  if constexpr (__have_avx512dq && is_same_v<double, value_type>)
2266  return _mm512_extractf64x2_pd(__to_intrin(__in), _Offset);
2267  else if constexpr (is_floating_point_v<value_type>)
2268  return __vector_bitcast<value_type>(
2269  _mm512_extractf32x4_ps(__intrin_bitcast<__m512>(__in), _Offset));
2270  else
2271  return reinterpret_cast<_R>(
2272  _mm512_extracti32x4_epi32(__intrin_bitcast<__m512i>(__in),
2273  _Offset));
2274  }
2275  else
2276 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
2277  {
2278 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2279  using _W = conditional_t<
2280  is_floating_point_v<value_type>, double,
2281  conditional_t<(sizeof(_R) >= 16), long long, value_type>>;
2282  static_assert(sizeof(_R) % sizeof(_W) == 0);
2283  constexpr int __return_width = sizeof(_R) / sizeof(_W);
2284  using _Up = __vector_type_t<_W, __return_width>;
2285  const auto __x = __vector_bitcast<_W>(__in);
2286 #else
2287  constexpr int __return_width = _TVT::_S_full_size / _SplitBy;
2288  using _Up = _R;
2289  const __vector_type_t<value_type, _TVT::_S_full_size>& __x
2290  = __in; // only needed for _Tp = _SimdWrapper<value_type, _Np>
2291 #endif
2292  constexpr int _O = _Offset * __return_width;
2293  return __call_with_subscripts<__return_width, _O>(
2294  __x, [](auto... __entries) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
2295  return reinterpret_cast<_R>(_Up{__entries...});
2296  });
2297  }
2298  }
2299 
2300 // }}}
2301 // __lo/__hi64[z]{{{
2302 template <typename _Tp,
2303  typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2304  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2305  __lo64(_Tp __x)
2306  {
2307  _R __r{};
2308  __builtin_memcpy(&__r, &__x, 8);
2309  return __r;
2310  }
2311 
2312 template <typename _Tp,
2313  typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2314  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2315  __hi64(_Tp __x)
2316  {
2317  static_assert(sizeof(_Tp) == 16, "use __hi64z if you meant it");
2318  _R __r{};
2319  __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2320  return __r;
2321  }
2322 
2323 template <typename _Tp,
2324  typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2325  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2326  __hi64z([[maybe_unused]] _Tp __x)
2327  {
2328  _R __r{};
2329  if constexpr (sizeof(_Tp) == 16)
2330  __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2331  return __r;
2332  }
2333 
2334 // }}}
2335 // __lo/__hi128{{{
2336 template <typename _Tp>
2337  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2338  __lo128(_Tp __x)
2339  { return __extract<0, sizeof(_Tp) / 16>(__x); }
2340 
2341 template <typename _Tp>
2342  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2343  __hi128(_Tp __x)
2344  {
2345  static_assert(sizeof(__x) == 32);
2346  return __extract<1, 2>(__x);
2347  }
2348 
2349 // }}}
2350 // __lo/__hi256{{{
2351 template <typename _Tp>
2352  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2353  __lo256(_Tp __x)
2354  {
2355  static_assert(sizeof(__x) == 64);
2356  return __extract<0, 2>(__x);
2357  }
2358 
2359 template <typename _Tp>
2360  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2361  __hi256(_Tp __x)
2362  {
2363  static_assert(sizeof(__x) == 64);
2364  return __extract<1, 2>(__x);
2365  }
2366 
2367 // }}}
2368 // __auto_bitcast{{{
2369 template <typename _Tp>
2370  struct _AutoCast
2371  {
2372  static_assert(__is_vector_type_v<_Tp>);
2373 
2374  const _Tp __x;
2375 
2376  template <typename _Up, typename _UVT = _VectorTraits<_Up>>
2377  _GLIBCXX_SIMD_INTRINSIC constexpr operator _Up() const
2378  { return __intrin_bitcast<typename _UVT::type>(__x); }
2379  };
2380 
2381 template <typename _Tp>
2382  _GLIBCXX_SIMD_INTRINSIC constexpr _AutoCast<_Tp>
2383  __auto_bitcast(const _Tp& __x)
2384  { return {__x}; }
2385 
2386 template <typename _Tp, size_t _Np>
2387  _GLIBCXX_SIMD_INTRINSIC constexpr
2388  _AutoCast<typename _SimdWrapper<_Tp, _Np>::_BuiltinType>
2389  __auto_bitcast(const _SimdWrapper<_Tp, _Np>& __x)
2390  { return {__x._M_data}; }
2391 
2392 // }}}
2393 // ^^^ ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- ^^^
2394 
2395 #if _GLIBCXX_SIMD_HAVE_SSE_ABI
2396 // __bool_storage_member_type{{{
2397 #if _GLIBCXX_SIMD_HAVE_AVX512F && _GLIBCXX_SIMD_X86INTRIN
2398 template <size_t _Size>
2399  struct __bool_storage_member_type
2400  {
2401  static_assert((_Size & (_Size - 1)) != 0,
2402  "This trait may only be used for non-power-of-2 sizes. "
2403  "Power-of-2 sizes must be specialized.");
2404  using type =
2405  typename __bool_storage_member_type<std::__bit_ceil(_Size)>::type;
2406  };
2407 
2408 template <>
2409  struct __bool_storage_member_type<1> { using type = bool; };
2410 
2411 template <>
2412  struct __bool_storage_member_type<2> { using type = __mmask8; };
2413 
2414 template <>
2415  struct __bool_storage_member_type<4> { using type = __mmask8; };
2416 
2417 template <>
2418  struct __bool_storage_member_type<8> { using type = __mmask8; };
2419 
2420 template <>
2421  struct __bool_storage_member_type<16> { using type = __mmask16; };
2422 
2423 template <>
2424  struct __bool_storage_member_type<32> { using type = __mmask32; };
2425 
2426 template <>
2427  struct __bool_storage_member_type<64> { using type = __mmask64; };
2428 #endif // _GLIBCXX_SIMD_HAVE_AVX512F
2429 
2430 // }}}
2431 // __intrinsic_type (x86){{{
2432 // the following excludes bool via __is_vectorizable
2433 #if _GLIBCXX_SIMD_HAVE_SSE
2434 template <typename _Tp, size_t _Bytes>
2435  struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 64>>
2436  {
2437  static_assert(!is_same_v<_Tp, long double>,
2438  "no __intrinsic_type support for long double on x86");
2439 
2440  static constexpr size_t _S_VBytes = _Bytes <= 16 ? 16 : _Bytes <= 32 ? 32 : 64;
2441 
2442  using type [[__gnu__::__vector_size__(_S_VBytes)]]
2443  = conditional_t<is_integral_v<_Tp>, long long int, _Tp>;
2444  };
2445 #endif // _GLIBCXX_SIMD_HAVE_SSE
2446 
2447 // }}}
2448 #endif // _GLIBCXX_SIMD_HAVE_SSE_ABI
2449 // __intrinsic_type (ARM){{{
2450 #if _GLIBCXX_SIMD_HAVE_NEON
2451 template <>
2452  struct __intrinsic_type<float, 8, void>
2453  { using type = float32x2_t; };
2454 
2455 template <>
2456  struct __intrinsic_type<float, 16, void>
2457  { using type = float32x4_t; };
2458 
2459 template <>
2460  struct __intrinsic_type<double, 8, void>
2461  {
2462 #if _GLIBCXX_SIMD_HAVE_NEON_A64
2463  using type = float64x1_t;
2464 #endif
2465  };
2466 
2467 template <>
2468  struct __intrinsic_type<double, 16, void>
2469  {
2470 #if _GLIBCXX_SIMD_HAVE_NEON_A64
2471  using type = float64x2_t;
2472 #endif
2473  };
2474 
2475 #define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np) \
2476 template <> \
2477  struct __intrinsic_type<__int_with_sizeof_t<_Bits / 8>, \
2478  _Np * _Bits / 8, void> \
2479  { using type = int##_Bits##x##_Np##_t; }; \
2480 template <> \
2481  struct __intrinsic_type<make_unsigned_t<__int_with_sizeof_t<_Bits / 8>>, \
2482  _Np * _Bits / 8, void> \
2483  { using type = uint##_Bits##x##_Np##_t; }
2484 _GLIBCXX_SIMD_ARM_INTRIN(8, 8);
2485 _GLIBCXX_SIMD_ARM_INTRIN(8, 16);
2486 _GLIBCXX_SIMD_ARM_INTRIN(16, 4);
2487 _GLIBCXX_SIMD_ARM_INTRIN(16, 8);
2488 _GLIBCXX_SIMD_ARM_INTRIN(32, 2);
2489 _GLIBCXX_SIMD_ARM_INTRIN(32, 4);
2490 _GLIBCXX_SIMD_ARM_INTRIN(64, 1);
2491 _GLIBCXX_SIMD_ARM_INTRIN(64, 2);
2492 #undef _GLIBCXX_SIMD_ARM_INTRIN
2493 
2494 template <typename _Tp, size_t _Bytes>
2495  struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2496  {
2497  static constexpr int _SVecBytes = _Bytes <= 8 ? 8 : 16;
2498 
2499  using _Ip = __int_for_sizeof_t<_Tp>;
2500 
2501  using _Up = conditional_t<
2502  is_floating_point_v<_Tp>, _Tp,
2503  conditional_t<is_unsigned_v<_Tp>, make_unsigned_t<_Ip>, _Ip>>;
2504 
2505  static_assert(!is_same_v<_Tp, _Up> || _SVecBytes != _Bytes,
2506  "should use explicit specialization above");
2507 
2508  using type = typename __intrinsic_type<_Up, _SVecBytes>::type;
2509  };
2510 #endif // _GLIBCXX_SIMD_HAVE_NEON
2511 
2512 // }}}
2513 // __intrinsic_type (PPC){{{
2514 #ifdef __ALTIVEC__
2515 template <typename _Tp>
2516  struct __intrinsic_type_impl;
2517 
2518 #define _GLIBCXX_SIMD_PPC_INTRIN(_Tp) \
2519  template <> \
2520  struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; }
2521 _GLIBCXX_SIMD_PPC_INTRIN(float);
2522 #ifdef __VSX__
2523 _GLIBCXX_SIMD_PPC_INTRIN(double);
2524 #endif
2525 _GLIBCXX_SIMD_PPC_INTRIN(signed char);
2526 _GLIBCXX_SIMD_PPC_INTRIN(unsigned char);
2527 _GLIBCXX_SIMD_PPC_INTRIN(signed short);
2528 _GLIBCXX_SIMD_PPC_INTRIN(unsigned short);
2529 _GLIBCXX_SIMD_PPC_INTRIN(signed int);
2530 _GLIBCXX_SIMD_PPC_INTRIN(unsigned int);
2531 #if defined __VSX__ || __SIZEOF_LONG__ == 4
2532 _GLIBCXX_SIMD_PPC_INTRIN(signed long);
2533 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
2534 #endif
2535 #ifdef __VSX__
2536 _GLIBCXX_SIMD_PPC_INTRIN(signed long long);
2537 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long long);
2538 #endif
2539 #undef _GLIBCXX_SIMD_PPC_INTRIN
2540 
2541 template <typename _Tp, size_t _Bytes>
2542  struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2543  {
2544  static constexpr bool _S_is_ldouble = is_same_v<_Tp, long double>;
2545 
2546  // allow _Tp == long double with -mlong-double-64
2547  static_assert(!(_S_is_ldouble && sizeof(long double) > sizeof(double)),
2548  "no __intrinsic_type support for 128-bit floating point on PowerPC");
2549 
2550 #ifndef __VSX__
2551  static_assert(!(is_same_v<_Tp, double>
2552  || (_S_is_ldouble && sizeof(long double) == sizeof(double))),
2553  "no __intrinsic_type support for 64-bit floating point on PowerPC w/o VSX");
2554 #endif
2555 
2556  static constexpr auto __element_type()
2557  {
2558  if constexpr (is_floating_point_v<_Tp>)
2559  {
2560  if constexpr (_S_is_ldouble)
2561  return double {};
2562  else
2563  return _Tp {};
2564  }
2565  else if constexpr (is_signed_v<_Tp>)
2566  {
2567  if constexpr (sizeof(_Tp) == sizeof(_SChar))
2568  return _SChar {};
2569  else if constexpr (sizeof(_Tp) == sizeof(short))
2570  return short {};
2571  else if constexpr (sizeof(_Tp) == sizeof(int))
2572  return int {};
2573  else if constexpr (sizeof(_Tp) == sizeof(_LLong))
2574  return _LLong {};
2575  }
2576  else
2577  {
2578  if constexpr (sizeof(_Tp) == sizeof(_UChar))
2579  return _UChar {};
2580  else if constexpr (sizeof(_Tp) == sizeof(_UShort))
2581  return _UShort {};
2582  else if constexpr (sizeof(_Tp) == sizeof(_UInt))
2583  return _UInt {};
2584  else if constexpr (sizeof(_Tp) == sizeof(_ULLong))
2585  return _ULLong {};
2586  }
2587  }
2588 
2589  using type = typename __intrinsic_type_impl<decltype(__element_type())>::type;
2590  };
2591 #endif // __ALTIVEC__
2592 
2593 // }}}
2594 // _SimdWrapper<bool>{{{1
2595 template <size_t _Width>
2596  struct _SimdWrapper<bool, _Width,
2597  void_t<typename __bool_storage_member_type<_Width>::type>>
2598  {
2599  using _BuiltinType = typename __bool_storage_member_type<_Width>::type;
2600  using value_type = bool;
2601 
2602  static constexpr size_t _S_full_size = sizeof(_BuiltinType) * __CHAR_BIT__;
2603 
2604  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _S_full_size>
2605  __as_full_vector() const
2606  { return _M_data; }
2607 
2608  _GLIBCXX_SIMD_INTRINSIC constexpr
2609  _SimdWrapper() = default;
2610 
2611  _GLIBCXX_SIMD_INTRINSIC constexpr
2612  _SimdWrapper(_BuiltinType __k) : _M_data(__k) {};
2613 
2614  _GLIBCXX_SIMD_INTRINSIC
2615  operator const _BuiltinType&() const
2616  { return _M_data; }
2617 
2618  _GLIBCXX_SIMD_INTRINSIC
2619  operator _BuiltinType&()
2620  { return _M_data; }
2621 
2622  _GLIBCXX_SIMD_INTRINSIC _BuiltinType
2623  __intrin() const
2624  { return _M_data; }
2625 
2626  _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2627  operator[](size_t __i) const
2628  { return _M_data & (_BuiltinType(1) << __i); }
2629 
2630  template <size_t __i>
2631  _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2632  operator[](_SizeConstant<__i>) const
2633  { return _M_data & (_BuiltinType(1) << __i); }
2634 
2635  _GLIBCXX_SIMD_INTRINSIC constexpr void
2636  _M_set(size_t __i, value_type __x)
2637  {
2638  if (__x)
2639  _M_data |= (_BuiltinType(1) << __i);
2640  else
2641  _M_data &= ~(_BuiltinType(1) << __i);
2642  }
2643 
2644  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2645  _M_is_constprop() const
2646  { return __builtin_constant_p(_M_data); }
2647 
2648  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2649  _M_is_constprop_none_of() const
2650  {
2651  if (__builtin_constant_p(_M_data))
2652  {
2653  constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2654  constexpr _BuiltinType __active_mask
2655  = ~_BuiltinType() >> (__nbits - _Width);
2656  return (_M_data & __active_mask) == 0;
2657  }
2658  return false;
2659  }
2660 
2661  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2662  _M_is_constprop_all_of() const
2663  {
2664  if (__builtin_constant_p(_M_data))
2665  {
2666  constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2667  constexpr _BuiltinType __active_mask
2668  = ~_BuiltinType() >> (__nbits - _Width);
2669  return (_M_data & __active_mask) == __active_mask;
2670  }
2671  return false;
2672  }
2673 
2674  _BuiltinType _M_data;
2675  };
2676 
2677 // _SimdWrapperBase{{{1
2678 template <bool _MustZeroInitPadding, typename _BuiltinType>
2679  struct _SimdWrapperBase;
2680 
2681 template <typename _BuiltinType>
2682  struct _SimdWrapperBase<false, _BuiltinType> // no padding or no SNaNs
2683  {
2684  _GLIBCXX_SIMD_INTRINSIC constexpr
2685  _SimdWrapperBase() = default;
2686 
2687  _GLIBCXX_SIMD_INTRINSIC constexpr
2688  _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {}
2689 
2690  _BuiltinType _M_data;
2691  };
2692 
2693 template <typename _BuiltinType>
2694  struct _SimdWrapperBase<true, _BuiltinType> // with padding that needs to
2695  // never become SNaN
2696  {
2697  _GLIBCXX_SIMD_INTRINSIC constexpr
2698  _SimdWrapperBase() : _M_data() {}
2699 
2700  _GLIBCXX_SIMD_INTRINSIC constexpr
2701  _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {}
2702 
2703  _BuiltinType _M_data;
2704  };
2705 
2706 // }}}
2707 // _SimdWrapper{{{
2708 struct _DisabledSimdWrapper;
2709 
2710 template <typename _Tp, size_t _Width>
2711  struct _SimdWrapper<
2712  _Tp, _Width,
2713  void_t<__vector_type_t<_Tp, _Width>, __intrinsic_type_t<_Tp, _Width>>>
2714  : _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2715  && sizeof(_Tp) * _Width
2716  == sizeof(__vector_type_t<_Tp, _Width>),
2717  __vector_type_t<_Tp, _Width>>
2718  {
2719  static constexpr bool _S_need_default_init
2720  = __has_iec559_behavior<__signaling_NaN, _Tp>::value
2721  and sizeof(_Tp) * _Width == sizeof(__vector_type_t<_Tp, _Width>);
2722 
2723  using _BuiltinType = __vector_type_t<_Tp, _Width>;
2724 
2725  using _Base = _SimdWrapperBase<_S_need_default_init, _BuiltinType>;
2726 
2727  static_assert(__is_vectorizable_v<_Tp>);
2728  static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then
2729 
2730  using value_type = _Tp;
2731 
2732  static inline constexpr size_t _S_full_size
2733  = sizeof(_BuiltinType) / sizeof(value_type);
2734  static inline constexpr int _S_size = _Width;
2735  static inline constexpr bool _S_is_partial = _S_full_size != _S_size;
2736 
2737  using _Base::_M_data;
2738 
2739  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _S_full_size>
2740  __as_full_vector() const
2741  { return _M_data; }
2742 
2743  _GLIBCXX_SIMD_INTRINSIC constexpr
2744  _SimdWrapper(initializer_list<_Tp> __init)
2745  : _Base(__generate_from_n_evaluations<_Width, _BuiltinType>(
2746  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
2747  return __init.begin()[__i.value];
2748  })) {}
2749 
2750  _GLIBCXX_SIMD_INTRINSIC constexpr
2751  _SimdWrapper() = default;
2752 
2753  _GLIBCXX_SIMD_INTRINSIC constexpr
2754  _SimdWrapper(const _SimdWrapper&) = default;
2755 
2756  _GLIBCXX_SIMD_INTRINSIC constexpr
2757  _SimdWrapper(_SimdWrapper&&) = default;
2758 
2759  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2760  operator=(const _SimdWrapper&) = default;
2761 
2762  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2763  operator=(_SimdWrapper&&) = default;
2764 
2765  // Convert from exactly matching __vector_type_t
2766  using _SimdWrapperBase<_S_need_default_init, _BuiltinType>::_SimdWrapperBase;
2767 
2768  // Convert from __intrinsic_type_t if __intrinsic_type_t and __vector_type_t differ, otherwise
2769  // this ctor should not exist. Making the argument type unusable is our next best solution.
2770  _GLIBCXX_SIMD_INTRINSIC constexpr
2771  _SimdWrapper(conditional_t<is_same_v<_BuiltinType, __intrinsic_type_t<_Tp, _Width>>,
2772  _DisabledSimdWrapper, __intrinsic_type_t<_Tp, _Width>> __x)
2773  : _Base(__vector_bitcast<_Tp, _Width>(__x)) {}
2774 
2775  // Convert from different __vector_type_t, but only if bit reinterpretation is a correct
2776  // conversion of the value_type
2777  template <typename _V, typename _TVT = _VectorTraits<_V>,
2778  typename = enable_if_t<sizeof(typename _TVT::value_type) == sizeof(_Tp)
2779  and sizeof(_V) == sizeof(_BuiltinType)
2780  and is_integral_v<_Tp>
2781  and is_integral_v<typename _TVT::value_type>>>
2782  _GLIBCXX_SIMD_INTRINSIC constexpr
2783  _SimdWrapper(_V __x)
2784  : _Base(reinterpret_cast<_BuiltinType>(__x)) {}
2785 
2786  template <typename... _As,
2787  typename = enable_if_t<((is_same_v<simd_abi::scalar, _As> && ...)
2788  && sizeof...(_As) <= _Width)>>
2789  _GLIBCXX_SIMD_INTRINSIC constexpr
2790  operator _SimdTuple<_Tp, _As...>() const
2791  {
2792  return __generate_from_n_evaluations<sizeof...(_As), _SimdTuple<_Tp, _As...>>(
2793  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
2794  { return _M_data[int(__i)]; });
2795  }
2796 
2797  _GLIBCXX_SIMD_INTRINSIC constexpr
2798  operator const _BuiltinType&() const
2799  { return _M_data; }
2800 
2801  _GLIBCXX_SIMD_INTRINSIC constexpr
2802  operator _BuiltinType&()
2803  { return _M_data; }
2804 
2805  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2806  operator[](size_t __i) const
2807  { return _M_data[__i]; }
2808 
2809  template <size_t __i>
2810  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2811  operator[](_SizeConstant<__i>) const
2812  { return _M_data[__i]; }
2813 
2814  _GLIBCXX_SIMD_INTRINSIC constexpr void
2815  _M_set(size_t __i, _Tp __x)
2816  {
2817  if (__builtin_is_constant_evaluated())
2818  _M_data = __generate_from_n_evaluations<_Width, _BuiltinType>([&](auto __j) {
2819  return __j == __i ? __x : _M_data[__j()];
2820  });
2821  else
2822  _M_data[__i] = __x;
2823  }
2824 
2825  _GLIBCXX_SIMD_INTRINSIC
2826  constexpr bool
2827  _M_is_constprop() const
2828  { return __builtin_constant_p(_M_data); }
2829 
2830  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2831  _M_is_constprop_none_of() const
2832  {
2833  if (__builtin_constant_p(_M_data))
2834  {
2835  bool __r = true;
2836  if constexpr (is_floating_point_v<_Tp>)
2837  {
2838  using _Ip = __int_for_sizeof_t<_Tp>;
2839  const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2840  __execute_n_times<_Width>(
2841  [&](auto __i) { __r &= __intdata[__i.value] == _Ip(); });
2842  }
2843  else
2844  __execute_n_times<_Width>(
2845  [&](auto __i) { __r &= _M_data[__i.value] == _Tp(); });
2846  if (__builtin_constant_p(__r))
2847  return __r;
2848  }
2849  return false;
2850  }
2851 
2852  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2853  _M_is_constprop_all_of() const
2854  {
2855  if (__builtin_constant_p(_M_data))
2856  {
2857  bool __r = true;
2858  if constexpr (is_floating_point_v<_Tp>)
2859  {
2860  using _Ip = __int_for_sizeof_t<_Tp>;
2861  const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2862  __execute_n_times<_Width>(
2863  [&](auto __i) { __r &= __intdata[__i.value] == ~_Ip(); });
2864  }
2865  else
2866  __execute_n_times<_Width>(
2867  [&](auto __i) { __r &= _M_data[__i.value] == ~_Tp(); });
2868  if (__builtin_constant_p(__r))
2869  return __r;
2870  }
2871  return false;
2872  }
2873  };
2874 
2875 // }}}
2876 
2877 // __vectorized_sizeof {{{
2878 template <typename _Tp>
2879  constexpr size_t
2880  __vectorized_sizeof()
2881  {
2882  if constexpr (!__is_vectorizable_v<_Tp>)
2883  return 0;
2884 
2885  if constexpr (sizeof(_Tp) <= 8)
2886  {
2887  // X86:
2888  if constexpr (__have_avx512bw)
2889  return 64;
2890  if constexpr (__have_avx512f && sizeof(_Tp) >= 4)
2891  return 64;
2892  if constexpr (__have_avx2)
2893  return 32;
2894  if constexpr (__have_avx && is_floating_point_v<_Tp>)
2895  return 32;
2896  if constexpr (__have_sse2)
2897  return 16;
2898  if constexpr (__have_sse && is_same_v<_Tp, float>)
2899  return 16;
2900  /* The following is too much trouble because of mixed MMX and x87 code.
2901  * While nothing here explicitly calls MMX instructions of registers,
2902  * they are still emitted but no EMMS cleanup is done.
2903  if constexpr (__have_mmx && sizeof(_Tp) <= 4 && is_integral_v<_Tp>)
2904  return 8;
2905  */
2906 
2907  // PowerPC:
2908  if constexpr (__have_power8vec
2909  || (__have_power_vmx && (sizeof(_Tp) < 8))
2910  || (__have_power_vsx && is_floating_point_v<_Tp>) )
2911  return 16;
2912 
2913  // ARM:
2914  if constexpr (__have_neon_a64
2915  || (__have_neon_a32 && !is_same_v<_Tp, double>) )
2916  return 16;
2917  if constexpr (__have_neon
2918  && sizeof(_Tp) < 8
2919  // Only allow fp if the user allows non-ICE559 fp (e.g.
2920  // via -ffast-math). ARMv7 NEON fp is not conforming to
2921  // IEC559.
2922  && (__support_neon_float || !is_floating_point_v<_Tp>))
2923  return 16;
2924  }
2925 
2926  return sizeof(_Tp);
2927  }
2928 
2929 // }}}
2930 namespace simd_abi {
2931 // most of simd_abi is defined in simd_detail.h
2932 template <typename _Tp>
2933  inline constexpr int max_fixed_size
2934  = (__have_avx512bw && sizeof(_Tp) == 1) ? 64 : 32;
2935 
2936 // compatible {{{
2937 #if defined __x86_64__ || defined __aarch64__
2938 template <typename _Tp>
2939  using compatible = conditional_t<(sizeof(_Tp) <= 8), _VecBuiltin<16>, scalar>;
2940 #elif defined __ARM_NEON
2941 // FIXME: not sure, probably needs to be scalar (or dependent on the hard-float
2942 // ABI?)
2943 template <typename _Tp>
2944  using compatible
2945  = conditional_t<(sizeof(_Tp) < 8
2946  && (__support_neon_float || !is_floating_point_v<_Tp>)),
2947  _VecBuiltin<16>, scalar>;
2948 #else
2949 template <typename>
2950  using compatible = scalar;
2951 #endif
2952 
2953 // }}}
2954 // native {{{
2955 template <typename _Tp>
2956  constexpr auto
2957  __determine_native_abi()
2958  {
2959  constexpr size_t __bytes = __vectorized_sizeof<_Tp>();
2960  if constexpr (__bytes == sizeof(_Tp))
2961  return static_cast<scalar*>(nullptr);
2962  else if constexpr (__have_avx512vl || (__have_avx512f && __bytes == 64))
2963  return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr);
2964  else
2965  return static_cast<_VecBuiltin<__bytes>*>(nullptr);
2966  }
2967 
2968 template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
2969  using native = remove_pointer_t<decltype(__determine_native_abi<_Tp>())>;
2970 
2971 // }}}
2972 // __default_abi {{{
2973 #if defined _GLIBCXX_SIMD_DEFAULT_ABI
2974 template <typename _Tp>
2975  using __default_abi = _GLIBCXX_SIMD_DEFAULT_ABI<_Tp>;
2976 #else
2977 template <typename _Tp>
2978  using __default_abi = compatible<_Tp>;
2979 #endif
2980 
2981 // }}}
2982 } // namespace simd_abi
2983 
2984 // traits {{{1
2985 template <typename _Tp>
2986  struct is_simd_flag_type
2987  : false_type
2988  {};
2989 
2990 template <>
2991  struct is_simd_flag_type<element_aligned_tag>
2992  : true_type
2993  {};
2994 
2995 template <>
2996  struct is_simd_flag_type<vector_aligned_tag>
2997  : true_type
2998  {};
2999 
3000 template <size_t _Np>
3001  struct is_simd_flag_type<overaligned_tag<_Np>>
3002  : __bool_constant<(_Np > 0) and __has_single_bit(_Np)>
3003  {};
3004 
3005 template <typename _Tp>
3006  inline constexpr bool is_simd_flag_type_v = is_simd_flag_type<_Tp>::value;
3007 
3008 template <typename _Tp, typename = enable_if_t<is_simd_flag_type_v<_Tp>>>
3009  using _IsSimdFlagType = _Tp;
3010 
3011 // is_abi_tag {{{2
3012 template <typename _Tp, typename = void_t<>>
3013  struct is_abi_tag : false_type {};
3014 
3015 template <typename _Tp>
3016  struct is_abi_tag<_Tp, void_t<typename _Tp::_IsValidAbiTag>>
3017  : public _Tp::_IsValidAbiTag {};
3018 
3019 template <typename _Tp>
3020  inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value;
3021 
3022 // is_simd(_mask) {{{2
3023 template <typename _Tp>
3024  struct is_simd : public false_type {};
3025 
3026 template <typename _Tp>
3027  inline constexpr bool is_simd_v = is_simd<_Tp>::value;
3028 
3029 template <typename _Tp>
3030  struct is_simd_mask : public false_type {};
3031 
3032 template <typename _Tp>
3033 inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value;
3034 
3035 // simd_size {{{2
3036 template <typename _Tp, typename _Abi, typename = void>
3037  struct __simd_size_impl {};
3038 
3039 template <typename _Tp, typename _Abi>
3040  struct __simd_size_impl<
3041  _Tp, _Abi,
3042  enable_if_t<conjunction_v<__is_vectorizable<_Tp>, is_abi_tag<_Abi>>>>
3043  : _SizeConstant<_Abi::template _S_size<_Tp>> {};
3044 
3045 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3046  struct simd_size : __simd_size_impl<_Tp, _Abi> {};
3047 
3048 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3049  inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value;
3050 
3051 // simd_abi::deduce {{{2
3052 template <typename _Tp, size_t _Np, typename = void>
3053  struct __deduce_impl;
3054 
3055 namespace simd_abi {
3056 /**
3057  * @tparam _Tp The requested `value_type` for the elements.
3058  * @tparam _Np The requested number of elements.
3059  * @tparam _Abis This parameter is ignored, since this implementation cannot
3060  * make any use of it. Either __a good native ABI is matched and used as `type`
3061  * alias, or the `fixed_size<_Np>` ABI is used, which internally is built from
3062  * the best matching native ABIs.
3063  */
3064 template <typename _Tp, size_t _Np, typename...>
3065  struct deduce : __deduce_impl<_Tp, _Np> {};
3066 
3067 template <typename _Tp, size_t _Np, typename... _Abis>
3068  using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type;
3069 } // namespace simd_abi
3070 
3071 // }}}2
3072 // rebind_simd {{{2
3073 template <typename _Tp, typename _V, typename = void>
3074  struct rebind_simd;
3075 
3076 template <typename _Tp, typename _Up, typename _Abi>
3077  struct rebind_simd<_Tp, simd<_Up, _Abi>,
3078  void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
3079  { using type = simd<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; };
3080 
3081 template <typename _Tp, typename _Up, typename _Abi>
3082  struct rebind_simd<_Tp, simd_mask<_Up, _Abi>,
3083  void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
3084  { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; };
3085 
3086 template <typename _Tp, typename _V>
3087  using rebind_simd_t = typename rebind_simd<_Tp, _V>::type;
3088 
3089 // resize_simd {{{2
3090 template <int _Np, typename _V, typename = void>
3091  struct resize_simd;
3092 
3093 template <int _Np, typename _Tp, typename _Abi>
3094  struct resize_simd<_Np, simd<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
3095  { using type = simd<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
3096 
3097 template <int _Np, typename _Tp, typename _Abi>
3098  struct resize_simd<_Np, simd_mask<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
3099  { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
3100 
3101 template <int _Np, typename _V>
3102  using resize_simd_t = typename resize_simd<_Np, _V>::type;
3103 
3104 // }}}2
3105 // memory_alignment {{{2
3106 template <typename _Tp, typename _Up = typename _Tp::value_type>
3107  struct memory_alignment
3108  : public _SizeConstant<vector_aligned_tag::_S_alignment<_Tp, _Up>> {};
3109 
3110 template <typename _Tp, typename _Up = typename _Tp::value_type>
3111  inline constexpr size_t memory_alignment_v = memory_alignment<_Tp, _Up>::value;
3112 
3113 // class template simd [simd] {{{1
3114 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3115  class simd;
3116 
3117 template <typename _Tp, typename _Abi>
3118  struct is_simd<simd<_Tp, _Abi>> : public true_type {};
3119 
3120 template <typename _Tp>
3121  using native_simd = simd<_Tp, simd_abi::native<_Tp>>;
3122 
3123 template <typename _Tp, int _Np>
3124  using fixed_size_simd = simd<_Tp, simd_abi::fixed_size<_Np>>;
3125 
3126 template <typename _Tp, size_t _Np>
3127  using __deduced_simd = simd<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
3128 
3129 // class template simd_mask [simd_mask] {{{1
3130 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3131  class simd_mask;
3132 
3133 template <typename _Tp, typename _Abi>
3134  struct is_simd_mask<simd_mask<_Tp, _Abi>> : public true_type {};
3135 
3136 template <typename _Tp>
3137  using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>;
3138 
3139 template <typename _Tp, int _Np>
3140  using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>;
3141 
3142 template <typename _Tp, size_t _Np>
3143  using __deduced_simd_mask = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
3144 
3145 // casts [simd.casts] {{{1
3146 // static_simd_cast {{{2
3147 template <typename _Tp, typename _Up, typename _Ap, bool = is_simd_v<_Tp>, typename = void>
3148  struct __static_simd_cast_return_type;
3149 
3150 template <typename _Tp, typename _A0, typename _Up, typename _Ap>
3151  struct __static_simd_cast_return_type<simd_mask<_Tp, _A0>, _Up, _Ap, false, void>
3152  : __static_simd_cast_return_type<simd<_Tp, _A0>, _Up, _Ap> {};
3153 
3154 template <typename _Tp, typename _Up, typename _Ap>
3155  struct __static_simd_cast_return_type<
3156  _Tp, _Up, _Ap, true, enable_if_t<_Tp::size() == simd_size_v<_Up, _Ap>>>
3157  { using type = _Tp; };
3158 
3159 template <typename _Tp, typename _Ap>
3160  struct __static_simd_cast_return_type<_Tp, _Tp, _Ap, false,
3161 #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
3162  enable_if_t<__is_vectorizable_v<_Tp>>
3163 #else
3164  void
3165 #endif
3166  >
3167  { using type = simd<_Tp, _Ap>; };
3168 
3169 template <typename _Tp, typename = void>
3170  struct __safe_make_signed { using type = _Tp;};
3171 
3172 template <typename _Tp>
3173  struct __safe_make_signed<_Tp, enable_if_t<is_integral_v<_Tp>>>
3174  {
3175  // the extra make_unsigned_t is because of PR85951
3176  using type = make_signed_t<make_unsigned_t<_Tp>>;
3177  };
3178 
3179 template <typename _Tp>
3180  using safe_make_signed_t = typename __safe_make_signed<_Tp>::type;
3181 
3182 template <typename _Tp, typename _Up, typename _Ap>
3183  struct __static_simd_cast_return_type<_Tp, _Up, _Ap, false,
3184 #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
3185  enable_if_t<__is_vectorizable_v<_Tp>>
3186 #else
3187  void
3188 #endif
3189  >
3190  {
3191  using type = conditional_t<
3192  (is_integral_v<_Up> && is_integral_v<_Tp> &&
3193 #ifndef _GLIBCXX_SIMD_FIX_P2TS_ISSUE65
3194  is_signed_v<_Up> != is_signed_v<_Tp> &&
3195 #endif
3196  is_same_v<safe_make_signed_t<_Up>, safe_make_signed_t<_Tp>>),
3197  simd<_Tp, _Ap>, fixed_size_simd<_Tp, simd_size_v<_Up, _Ap>>>;
3198  };
3199 
3200 template <typename _Tp, typename _Up, typename _Ap,
3201  typename _R
3202  = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
3203  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _R
3204  static_simd_cast(const simd<_Up, _Ap>& __x)
3205  {
3206  if constexpr (is_same<_R, simd<_Up, _Ap>>::value)
3207  return __x;
3208  else
3209  {
3210  _SimdConverter<_Up, _Ap, typename _R::value_type, typename _R::abi_type>
3211  __c;
3212  return _R(__private_init, __c(__data(__x)));
3213  }
3214  }
3215 
3216 namespace __proposed {
3217 template <typename _Tp, typename _Up, typename _Ap,
3218  typename _R
3219  = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
3220  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR typename _R::mask_type
3221  static_simd_cast(const simd_mask<_Up, _Ap>& __x)
3222  {
3223  using _RM = typename _R::mask_type;
3224  return {__private_init, _RM::abi_type::_MaskImpl::template _S_convert<
3225  typename _RM::simd_type::value_type>(__x)};
3226  }
3227 
3228 template <typename _To, typename _Up, typename _Abi>
3229  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3230  _To
3231  simd_bit_cast(const simd<_Up, _Abi>& __x)
3232  {
3233  using _Tp = typename _To::value_type;
3234  using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
3235  using _From = simd<_Up, _Abi>;
3236  using _FromMember = typename _SimdTraits<_Up, _Abi>::_SimdMember;
3237  // with concepts, the following should be constraints
3238  static_assert(sizeof(_To) == sizeof(_From));
3239  static_assert(is_trivially_copyable_v<_Tp> && is_trivially_copyable_v<_Up>);
3240  static_assert(is_trivially_copyable_v<_ToMember> && is_trivially_copyable_v<_FromMember>);
3241 #if __has_builtin(__builtin_bit_cast)
3242  return {__private_init, __builtin_bit_cast(_ToMember, __data(__x))};
3243 #else
3244  return {__private_init, __bit_cast<_ToMember>(__data(__x))};
3245 #endif
3246  }
3247 
3248 template <typename _To, typename _Up, typename _Abi>
3249  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3250  _To
3251  simd_bit_cast(const simd_mask<_Up, _Abi>& __x)
3252  {
3253  using _From = simd_mask<_Up, _Abi>;
3254  static_assert(sizeof(_To) == sizeof(_From));
3255  static_assert(is_trivially_copyable_v<_From>);
3256  // _To can be simd<T, A>, specifically simd<T, fixed_size<N>> in which case _To is not trivially
3257  // copyable.
3258  if constexpr (is_simd_v<_To>)
3259  {
3260  using _Tp = typename _To::value_type;
3261  using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
3262  static_assert(is_trivially_copyable_v<_ToMember>);
3263 #if __has_builtin(__builtin_bit_cast)
3264  return {__private_init, __builtin_bit_cast(_ToMember, __x)};
3265 #else
3266  return {__private_init, __bit_cast<_ToMember>(__x)};
3267 #endif
3268  }
3269  else
3270  {
3271  static_assert(is_trivially_copyable_v<_To>);
3272 #if __has_builtin(__builtin_bit_cast)
3273  return __builtin_bit_cast(_To, __x);
3274 #else
3275  return __bit_cast<_To>(__x);
3276 #endif
3277  }
3278  }
3279 } // namespace __proposed
3280 
3281 // simd_cast {{{2
3282 template <typename _Tp, typename _Up, typename _Ap,
3283  typename _To = __value_type_or_identity_t<_Tp>>
3284  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
3285  simd_cast(const simd<_ValuePreserving<_Up, _To>, _Ap>& __x)
3286  -> decltype(static_simd_cast<_Tp>(__x))
3287  { return static_simd_cast<_Tp>(__x); }
3288 
3289 namespace __proposed {
3290 template <typename _Tp, typename _Up, typename _Ap,
3291  typename _To = __value_type_or_identity_t<_Tp>>
3292  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
3293  simd_cast(const simd_mask<_ValuePreserving<_Up, _To>, _Ap>& __x)
3294  -> decltype(static_simd_cast<_Tp>(__x))
3295  { return static_simd_cast<_Tp>(__x); }
3296 } // namespace __proposed
3297 
3298 // }}}2
3299 // resizing_simd_cast {{{
3300 namespace __proposed {
3301 /* Proposed spec:
3302 
3303 template <class T, class U, class Abi>
3304 T resizing_simd_cast(const simd<U, Abi>& x)
3305 
3306 p1 Constraints:
3307  - is_simd_v<T> is true and
3308  - T::value_type is the same type as U
3309 
3310 p2 Returns:
3311  A simd object with the i^th element initialized to x[i] for all i in the
3312  range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
3313  than simd_size_v<U, Abi>, the remaining elements are value-initialized.
3314 
3315 template <class T, class U, class Abi>
3316 T resizing_simd_cast(const simd_mask<U, Abi>& x)
3317 
3318 p1 Constraints: is_simd_mask_v<T> is true
3319 
3320 p2 Returns:
3321  A simd_mask object with the i^th element initialized to x[i] for all i in
3322 the range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
3323  than simd_size_v<U, Abi>, the remaining elements are initialized to false.
3324 
3325  */
3326 
3327 template <typename _Tp, typename _Up, typename _Ap>
3328  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR enable_if_t<
3329  conjunction_v<is_simd<_Tp>, is_same<typename _Tp::value_type, _Up>>, _Tp>
3330  resizing_simd_cast(const simd<_Up, _Ap>& __x)
3331  {
3332  if constexpr (is_same_v<typename _Tp::abi_type, _Ap>)
3333  return __x;
3334  else if (__builtin_is_constant_evaluated())
3335  return _Tp([&](auto __i) constexpr {
3336  return __i < simd_size_v<_Up, _Ap> ? __x[__i] : _Up();
3337  });
3338  else if constexpr (simd_size_v<_Up, _Ap> == 1)
3339  {
3340  _Tp __r{};
3341  __r[0] = __x[0];
3342  return __r;
3343  }
3344  else if constexpr (_Tp::size() == 1)
3345  return __x[0];
3346  else if constexpr (sizeof(_Tp) == sizeof(__x)
3347  && !__is_fixed_size_abi_v<_Ap>)
3348  return {__private_init,
3349  __vector_bitcast<typename _Tp::value_type, _Tp::size()>(
3350  _Ap::_S_masked(__data(__x))._M_data)};
3351  else
3352  {
3353  _Tp __r{};
3354  __builtin_memcpy(&__data(__r), &__data(__x),
3355  sizeof(_Up)
3356  * std::min(_Tp::size(), simd_size_v<_Up, _Ap>));
3357  return __r;
3358  }
3359  }
3360 
3361 template <typename _Tp, typename _Up, typename _Ap>
3362  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3363  enable_if_t<is_simd_mask_v<_Tp>, _Tp>
3364  resizing_simd_cast(const simd_mask<_Up, _Ap>& __x)
3365  {
3366  return {__private_init, _Tp::abi_type::_MaskImpl::template _S_convert<
3367  typename _Tp::simd_type::value_type>(__x)};
3368  }
3369 } // namespace __proposed
3370 
3371 // }}}
3372 // to_fixed_size {{{2
3373 template <typename _Tp, int _Np>
3374  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, _Np>
3375  to_fixed_size(const fixed_size_simd<_Tp, _Np>& __x)
3376  { return __x; }
3377 
3378 template <typename _Tp, int _Np>
3379  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, _Np>
3380  to_fixed_size(const fixed_size_simd_mask<_Tp, _Np>& __x)
3381  { return __x; }
3382 
3383 template <typename _Tp, typename _Ap>
3384  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>
3385  to_fixed_size(const simd<_Tp, _Ap>& __x)
3386  {
3387  using _Rp = fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>;
3388  return _Rp([&__x](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3389  }
3390 
3391 template <typename _Tp, typename _Ap>
3392  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, simd_size_v<_Tp, _Ap>>
3393  to_fixed_size(const simd_mask<_Tp, _Ap>& __x)
3394  {
3395  return {__private_init,
3396  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }};
3397  }
3398 
3399 // to_native {{{2
3400 template <typename _Tp, int _Np>
3401  _GLIBCXX_SIMD_INTRINSIC
3402  enable_if_t<(_Np == native_simd<_Tp>::size()), native_simd<_Tp>>
3403  to_native(const fixed_size_simd<_Tp, _Np>& __x)
3404  {
3405  alignas(memory_alignment_v<native_simd<_Tp>>) _Tp __mem[_Np];
3406  __x.copy_to(__mem, vector_aligned);
3407  return {__mem, vector_aligned};
3408  }
3409 
3410 template <typename _Tp, int _Np>
3411  _GLIBCXX_SIMD_INTRINSIC
3412  enable_if_t<(_Np == native_simd_mask<_Tp>::size()), native_simd_mask<_Tp>>
3413  to_native(const fixed_size_simd_mask<_Tp, _Np>& __x)
3414  {
3415  return native_simd_mask<_Tp>(
3416  __private_init,
3417  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3418  }
3419 
3420 // to_compatible {{{2
3421 template <typename _Tp, int _Np>
3422  _GLIBCXX_SIMD_INTRINSIC enable_if_t<(_Np == simd<_Tp>::size()), simd<_Tp>>
3423  to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x)
3424  {
3425  alignas(memory_alignment_v<simd<_Tp>>) _Tp __mem[_Np];
3426  __x.copy_to(__mem, vector_aligned);
3427  return {__mem, vector_aligned};
3428  }
3429 
3430 template <typename _Tp, int _Np>
3431  _GLIBCXX_SIMD_INTRINSIC
3432  enable_if_t<(_Np == simd_mask<_Tp>::size()), simd_mask<_Tp>>
3433  to_compatible(const simd_mask<_Tp, simd_abi::fixed_size<_Np>>& __x)
3434  {
3435  return simd_mask<_Tp>(
3436  __private_init,
3437  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3438  }
3439 
3440 // masked assignment [simd_mask.where] {{{1
3441 
3442 // where_expression {{{1
3443 // const_where_expression<M, T> {{{2
3444 template <typename _M, typename _Tp>
3445  class const_where_expression
3446  {
3447  using _V = _Tp;
3448  static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3449 
3450  struct _Wrapper { using value_type = _V; };
3451 
3452  protected:
3453  using _Impl = typename _V::_Impl;
3454 
3455  using value_type =
3456  typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3457 
3458  _GLIBCXX_SIMD_INTRINSIC friend const _M&
3459  __get_mask(const const_where_expression& __x)
3460  { return __x._M_k; }
3461 
3462  _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3463  __get_lvalue(const const_where_expression& __x)
3464  { return __x._M_value; }
3465 
3466  const _M& _M_k;
3467  _Tp& _M_value;
3468 
3469  public:
3470  const_where_expression(const const_where_expression&) = delete;
3471 
3472  const_where_expression& operator=(const const_where_expression&) = delete;
3473 
3474  _GLIBCXX_SIMD_INTRINSIC constexpr
3475  const_where_expression(const _M& __kk, const _Tp& dd)
3476  : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3477 
3478  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3479  operator-() const&&
3480  {
3481  return {__private_init,
3482  _Impl::template _S_masked_unary<negate>(__data(_M_k),
3483  __data(_M_value))};
3484  }
3485 
3486  template <typename _Up, typename _Flags>
3487  [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3488  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3489  {
3490  return {__private_init,
3491  _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3492  _Flags::template _S_apply<_V>(__mem))};
3493  }
3494 
3495  template <typename _Up, typename _Flags>
3496  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3497  copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3498  {
3499  _Impl::_S_masked_store(__data(_M_value),
3500  _Flags::template _S_apply<_V>(__mem),
3501  __data(_M_k));
3502  }
3503  };
3504 
3505 // const_where_expression<bool, T> {{{2
3506 template <typename _Tp>
3507  class const_where_expression<bool, _Tp>
3508  {
3509  using _M = bool;
3510  using _V = _Tp;
3511 
3512  static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3513 
3514  struct _Wrapper { using value_type = _V; };
3515 
3516  protected:
3517  using value_type
3518  = typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3519 
3520  _GLIBCXX_SIMD_INTRINSIC friend const _M&
3521  __get_mask(const const_where_expression& __x)
3522  { return __x._M_k; }
3523 
3524  _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3525  __get_lvalue(const const_where_expression& __x)
3526  { return __x._M_value; }
3527 
3528  const bool _M_k;
3529  _Tp& _M_value;
3530 
3531  public:
3532  const_where_expression(const const_where_expression&) = delete;
3533  const_where_expression& operator=(const const_where_expression&) = delete;
3534 
3535  _GLIBCXX_SIMD_INTRINSIC constexpr
3536  const_where_expression(const bool __kk, const _Tp& dd)
3537  : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3538 
3539  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3540  operator-() const&&
3541  { return _M_k ? -_M_value : _M_value; }
3542 
3543  template <typename _Up, typename _Flags>
3544  [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3545  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3546  { return _M_k ? static_cast<_V>(__mem[0]) : _M_value; }
3547 
3548  template <typename _Up, typename _Flags>
3549  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3550  copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3551  {
3552  if (_M_k)
3553  __mem[0] = _M_value;
3554  }
3555  };
3556 
3557 // where_expression<M, T> {{{2
3558 template <typename _M, typename _Tp>
3559  class where_expression : public const_where_expression<_M, _Tp>
3560  {
3561  using _Impl = typename const_where_expression<_M, _Tp>::_Impl;
3562 
3563  static_assert(!is_const<_Tp>::value,
3564  "where_expression may only be instantiated with __a non-const "
3565  "_Tp parameter");
3566 
3567  using typename const_where_expression<_M, _Tp>::value_type;
3568  using const_where_expression<_M, _Tp>::_M_k;
3569  using const_where_expression<_M, _Tp>::_M_value;
3570 
3571  static_assert(
3572  is_same<typename _M::abi_type, typename _Tp::abi_type>::value, "");
3573  static_assert(_M::size() == _Tp::size(), "");
3574 
3575  _GLIBCXX_SIMD_INTRINSIC friend constexpr _Tp&
3576  __get_lvalue(where_expression& __x)
3577  { return __x._M_value; }
3578 
3579  public:
3580  where_expression(const where_expression&) = delete;
3581  where_expression& operator=(const where_expression&) = delete;
3582 
3583  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3584  where_expression(const _M& __kk, _Tp& dd)
3585  : const_where_expression<_M, _Tp>(__kk, dd) {}
3586 
3587  template <typename _Up>
3588  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3589  operator=(_Up&& __x) &&
3590  {
3591  _Impl::_S_masked_assign(__data(_M_k), __data(_M_value),
3592  __to_value_type_or_member_type<_Tp>(
3593  static_cast<_Up&&>(__x)));
3594  }
3595 
3596 #define _GLIBCXX_SIMD_OP_(__op, __name) \
3597  template <typename _Up> \
3598  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void \
3599  operator __op##=(_Up&& __x)&& \
3600  { \
3601  _Impl::template _S_masked_cassign( \
3602  __data(_M_k), __data(_M_value), \
3603  __to_value_type_or_member_type<_Tp>(static_cast<_Up&&>(__x)), \
3604  [](auto __impl, auto __lhs, auto __rhs) \
3605  constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \
3606  { return __impl.__name(__lhs, __rhs); }); \
3607  } \
3608  static_assert(true)
3609  _GLIBCXX_SIMD_OP_(+, _S_plus);
3610  _GLIBCXX_SIMD_OP_(-, _S_minus);
3611  _GLIBCXX_SIMD_OP_(*, _S_multiplies);
3612  _GLIBCXX_SIMD_OP_(/, _S_divides);
3613  _GLIBCXX_SIMD_OP_(%, _S_modulus);
3614  _GLIBCXX_SIMD_OP_(&, _S_bit_and);
3615  _GLIBCXX_SIMD_OP_(|, _S_bit_or);
3616  _GLIBCXX_SIMD_OP_(^, _S_bit_xor);
3617  _GLIBCXX_SIMD_OP_(<<, _S_shift_left);
3618  _GLIBCXX_SIMD_OP_(>>, _S_shift_right);
3619 #undef _GLIBCXX_SIMD_OP_
3620 
3621  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3622  operator++() &&
3623  {
3624  __data(_M_value)
3625  = _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value));
3626  }
3627 
3628  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3629  operator++(int) &&
3630  {
3631  __data(_M_value)
3632  = _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value));
3633  }
3634 
3635  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3636  operator--() &&
3637  {
3638  __data(_M_value)
3639  = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value));
3640  }
3641 
3642  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3643  operator--(int) &&
3644  {
3645  __data(_M_value)
3646  = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value));
3647  }
3648 
3649  // intentionally hides const_where_expression::copy_from
3650  template <typename _Up, typename _Flags>
3651  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3652  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
3653  {
3654  __data(_M_value) = _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3655  _Flags::template _S_apply<_Tp>(__mem));
3656  }
3657  };
3658 
3659 // where_expression<bool, T> {{{2
3660 template <typename _Tp>
3661  class where_expression<bool, _Tp>
3662  : public const_where_expression<bool, _Tp>
3663  {
3664  using _M = bool;
3665  using typename const_where_expression<_M, _Tp>::value_type;
3666  using const_where_expression<_M, _Tp>::_M_k;
3667  using const_where_expression<_M, _Tp>::_M_value;
3668 
3669  public:
3670  where_expression(const where_expression&) = delete;
3671  where_expression& operator=(const where_expression&) = delete;
3672 
3673  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3674  where_expression(const _M& __kk, _Tp& dd)
3675  : const_where_expression<_M, _Tp>(__kk, dd) {}
3676 
3677 #define _GLIBCXX_SIMD_OP_(__op) \
3678  template <typename _Up> \
3679  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void \
3680  operator __op(_Up&& __x)&& \
3681  { if (_M_k) _M_value __op static_cast<_Up&&>(__x); }
3682 
3683  _GLIBCXX_SIMD_OP_(=)
3684  _GLIBCXX_SIMD_OP_(+=)
3685  _GLIBCXX_SIMD_OP_(-=)
3686  _GLIBCXX_SIMD_OP_(*=)
3687  _GLIBCXX_SIMD_OP_(/=)
3688  _GLIBCXX_SIMD_OP_(%=)
3689  _GLIBCXX_SIMD_OP_(&=)
3690  _GLIBCXX_SIMD_OP_(|=)
3691  _GLIBCXX_SIMD_OP_(^=)
3692  _GLIBCXX_SIMD_OP_(<<=)
3693  _GLIBCXX_SIMD_OP_(>>=)
3694  #undef _GLIBCXX_SIMD_OP_
3695 
3696  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3697  operator++() &&
3698  { if (_M_k) ++_M_value; }
3699 
3700  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3701  operator++(int) &&
3702  { if (_M_k) ++_M_value; }
3703 
3704  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3705  operator--() &&
3706  { if (_M_k) --_M_value; }
3707 
3708  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3709  operator--(int) &&
3710  { if (_M_k) --_M_value; }
3711 
3712  // intentionally hides const_where_expression::copy_from
3713  template <typename _Up, typename _Flags>
3714  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3715  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
3716  { if (_M_k) _M_value = __mem[0]; }
3717  };
3718 
3719 // where {{{1
3720 template <typename _Tp, typename _Ap>
3721  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3722  where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3723  where(const typename simd<_Tp, _Ap>::mask_type& __k, simd<_Tp, _Ap>& __value)
3724  { return {__k, __value}; }
3725 
3726 template <typename _Tp, typename _Ap>
3727  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3728  const_where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3729  where(const typename simd<_Tp, _Ap>::mask_type& __k, const simd<_Tp, _Ap>& __value)
3730  { return {__k, __value}; }
3731 
3732 template <typename _Tp, typename _Ap>
3733  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3734  where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3735  where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, simd_mask<_Tp, _Ap>& __value)
3736  { return {__k, __value}; }
3737 
3738 template <typename _Tp, typename _Ap>
3739  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3740  const_where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3741  where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, const simd_mask<_Tp, _Ap>& __value)
3742  { return {__k, __value}; }
3743 
3744 template <typename _Tp>
3745  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR where_expression<bool, _Tp>
3746  where(_ExactBool __k, _Tp& __value)
3747  { return {__k, __value}; }
3748 
3749 template <typename _Tp>
3750  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR const_where_expression<bool, _Tp>
3751  where(_ExactBool __k, const _Tp& __value)
3752  { return {__k, __value}; }
3753 
3754 template <typename _Tp, typename _Ap>
3755  _GLIBCXX_SIMD_CONSTEXPR void
3756  where(bool __k, simd<_Tp, _Ap>& __value) = delete;
3757 
3758 template <typename _Tp, typename _Ap>
3759  _GLIBCXX_SIMD_CONSTEXPR void
3760  where(bool __k, const simd<_Tp, _Ap>& __value) = delete;
3761 
3762 // proposed mask iterations {{{1
3763 namespace __proposed {
3764 template <size_t _Np>
3765  class where_range
3766  {
3767  const bitset<_Np> __bits;
3768 
3769  public:
3770  where_range(bitset<_Np> __b) : __bits(__b) {}
3771 
3772  class iterator
3773  {
3774  size_t __mask;
3775  size_t __bit;
3776 
3777  _GLIBCXX_SIMD_INTRINSIC void
3778  __next_bit()
3779  { __bit = __builtin_ctzl(__mask); }
3780 
3781  _GLIBCXX_SIMD_INTRINSIC void
3782  __reset_lsb()
3783  {
3784  // 01100100 - 1 = 01100011
3785  __mask &= (__mask - 1);
3786  // __asm__("btr %1,%0" : "+r"(__mask) : "r"(__bit));
3787  }
3788 
3789  public:
3790  iterator(decltype(__mask) __m) : __mask(__m) { __next_bit(); }
3791  iterator(const iterator&) = default;
3792  iterator(iterator&&) = default;
3793 
3794  _GLIBCXX_SIMD_ALWAYS_INLINE size_t
3795  operator->() const
3796  { return __bit; }
3797 
3798  _GLIBCXX_SIMD_ALWAYS_INLINE size_t
3799  operator*() const
3800  { return __bit; }
3801 
3802  _GLIBCXX_SIMD_ALWAYS_INLINE iterator&
3803  operator++()
3804  {
3805  __reset_lsb();
3806  __next_bit();
3807  return *this;
3808  }
3809 
3810  _GLIBCXX_SIMD_ALWAYS_INLINE iterator
3811  operator++(int)
3812  {
3813  iterator __tmp = *this;
3814  __reset_lsb();
3815  __next_bit();
3816  return __tmp;
3817  }
3818 
3819  _GLIBCXX_SIMD_ALWAYS_INLINE bool
3820  operator==(const iterator& __rhs) const
3821  { return __mask == __rhs.__mask; }
3822 
3823  _GLIBCXX_SIMD_ALWAYS_INLINE bool
3824  operator!=(const iterator& __rhs) const
3825  { return __mask != __rhs.__mask; }
3826  };
3827 
3828  iterator
3829  begin() const
3830  { return __bits.to_ullong(); }
3831 
3832  iterator
3833  end() const
3834  { return 0; }
3835  };
3836 
3837 template <typename _Tp, typename _Ap>
3838  where_range<simd_size_v<_Tp, _Ap>>
3839  where(const simd_mask<_Tp, _Ap>& __k)
3840  { return __k.__to_bitset(); }
3841 
3842 } // namespace __proposed
3843 
3844 // }}}1
3845 // reductions [simd.reductions] {{{1
3846 template <typename _Tp, typename _Abi, typename _BinaryOperation = plus<>>
3847  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3848  reduce(const simd<_Tp, _Abi>& __v, _BinaryOperation __binary_op = _BinaryOperation())
3849  { return _Abi::_SimdImpl::_S_reduce(__v, __binary_op); }
3850 
3851 template <typename _M, typename _V, typename _BinaryOperation = plus<>>
3852  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3853  reduce(const const_where_expression<_M, _V>& __x,
3854  typename _V::value_type __identity_element, _BinaryOperation __binary_op)
3855  {
3856  if (__builtin_expect(none_of(__get_mask(__x)), false))
3857  return __identity_element;
3858 
3859  _V __tmp = __identity_element;
3860  _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3861  __data(__get_lvalue(__x)));
3862  return reduce(__tmp, __binary_op);
3863  }
3864 
3865 template <typename _M, typename _V>
3866  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3867  reduce(const const_where_expression<_M, _V>& __x, plus<> __binary_op = {})
3868  { return reduce(__x, 0, __binary_op); }
3869 
3870 template <typename _M, typename _V>
3871  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3872  reduce(const const_where_expression<_M, _V>& __x, multiplies<> __binary_op)
3873  { return reduce(__x, 1, __binary_op); }
3874 
3875 template <typename _M, typename _V>
3876  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3877  reduce(const const_where_expression<_M, _V>& __x, bit_and<> __binary_op)
3878  { return reduce(__x, ~typename _V::value_type(), __binary_op); }
3879 
3880 template <typename _M, typename _V>
3881  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3882  reduce(const const_where_expression<_M, _V>& __x, bit_or<> __binary_op)
3883  { return reduce(__x, 0, __binary_op); }
3884 
3885 template <typename _M, typename _V>
3886  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3887  reduce(const const_where_expression<_M, _V>& __x, bit_xor<> __binary_op)
3888  { return reduce(__x, 0, __binary_op); }
3889 
3890 template <typename _Tp, typename _Abi>
3891  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3892  hmin(const simd<_Tp, _Abi>& __v) noexcept
3893  { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Minimum()); }
3894 
3895 template <typename _Tp, typename _Abi>
3896  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3897  hmax(const simd<_Tp, _Abi>& __v) noexcept
3898  { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Maximum()); }
3899 
3900 template <typename _M, typename _V>
3901  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3902  typename _V::value_type
3903  hmin(const const_where_expression<_M, _V>& __x) noexcept
3904  {
3905  using _Tp = typename _V::value_type;
3906  constexpr _Tp __id_elem =
3907 #ifdef __FINITE_MATH_ONLY__
3908  __finite_max_v<_Tp>;
3909 #else
3910  __value_or<__infinity, _Tp>(__finite_max_v<_Tp>);
3911 #endif
3912  _V __tmp = __id_elem;
3913  _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3914  __data(__get_lvalue(__x)));
3915  return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Minimum());
3916  }
3917 
3918 template <typename _M, typename _V>
3919  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3920  typename _V::value_type
3921  hmax(const const_where_expression<_M, _V>& __x) noexcept
3922  {
3923  using _Tp = typename _V::value_type;
3924  constexpr _Tp __id_elem =
3925 #ifdef __FINITE_MATH_ONLY__
3926  __finite_min_v<_Tp>;
3927 #else
3928  [] {
3929  if constexpr (__value_exists_v<__infinity, _Tp>)
3930  return -__infinity_v<_Tp>;
3931  else
3932  return __finite_min_v<_Tp>;
3933  }();
3934 #endif
3935  _V __tmp = __id_elem;
3936  _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3937  __data(__get_lvalue(__x)));
3938  return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Maximum());
3939  }
3940 
3941 // }}}1
3942 // algorithms [simd.alg] {{{
3943 template <typename _Tp, typename _Ap>
3944  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3945  min(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3946  { return {__private_init, _Ap::_SimdImpl::_S_min(__data(__a), __data(__b))}; }
3947 
3948 template <typename _Tp, typename _Ap>
3949  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3950  max(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3951  { return {__private_init, _Ap::_SimdImpl::_S_max(__data(__a), __data(__b))}; }
3952 
3953 template <typename _Tp, typename _Ap>
3954  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3955  pair<simd<_Tp, _Ap>, simd<_Tp, _Ap>>
3956  minmax(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3957  {
3958  const auto pair_of_members
3959  = _Ap::_SimdImpl::_S_minmax(__data(__a), __data(__b));
3960  return {simd<_Tp, _Ap>(__private_init, pair_of_members.first),
3961  simd<_Tp, _Ap>(__private_init, pair_of_members.second)};
3962  }
3963 
3964 template <typename _Tp, typename _Ap>
3965  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3966  clamp(const simd<_Tp, _Ap>& __v, const simd<_Tp, _Ap>& __lo, const simd<_Tp, _Ap>& __hi)
3967  {
3968  using _Impl = typename _Ap::_SimdImpl;
3969  return {__private_init,
3970  _Impl::_S_min(__data(__hi),
3971  _Impl::_S_max(__data(__lo), __data(__v)))};
3972  }
3973 
3974 // }}}
3975 
3976 template <size_t... _Sizes, typename _Tp, typename _Ap,
3977  typename = enable_if_t<((_Sizes + ...) == simd<_Tp, _Ap>::size())>>
3978  inline tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
3979  split(const simd<_Tp, _Ap>&);
3980 
3981 // __extract_part {{{
3982 template <int _Index, int _Total, int _Combine = 1, typename _Tp, size_t _Np>
3983  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST constexpr
3984  conditional_t<_Np == _Total and _Combine == 1, _Tp, _SimdWrapper<_Tp, _Np / _Total * _Combine>>
3985  __extract_part(const _SimdWrapper<_Tp, _Np> __x);
3986 
3987 template <int _Index, int _Parts, int _Combine = 1, typename _Tp, typename _A0, typename... _As>
3988  _GLIBCXX_SIMD_INTRINSIC constexpr auto
3989  __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x);
3990 
3991 // }}}
3992 // _SizeList {{{
3993 template <size_t _V0, size_t... _Values>
3994  struct _SizeList
3995  {
3996  template <size_t _I>
3997  static constexpr size_t
3998  _S_at(_SizeConstant<_I> = {})
3999  {
4000  if constexpr (_I == 0)
4001  return _V0;
4002  else
4003  return _SizeList<_Values...>::template _S_at<_I - 1>();
4004  }
4005 
4006  template <size_t _I>
4007  static constexpr auto
4008  _S_before(_SizeConstant<_I> = {})
4009  {
4010  if constexpr (_I == 0)
4011  return _SizeConstant<0>();
4012  else
4013  return _SizeConstant<
4014  _V0 + _SizeList<_Values...>::template _S_before<_I - 1>()>();
4015  }
4016 
4017  template <size_t _Np>
4018  static constexpr auto
4019  _S_pop_front(_SizeConstant<_Np> = {})
4020  {
4021  if constexpr (_Np == 0)
4022  return _SizeList();
4023  else
4024  return _SizeList<_Values...>::template _S_pop_front<_Np - 1>();
4025  }
4026  };
4027 
4028 // }}}
4029 // __extract_center {{{
4030 template <typename _Tp, size_t _Np>
4031  _GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np / 2>
4032  __extract_center(_SimdWrapper<_Tp, _Np> __x)
4033  {
4034  static_assert(_Np >= 4);
4035  static_assert(_Np % 4 == 0); // x0 - x1 - x2 - x3 -> return {x1, x2}
4036 #if _GLIBCXX_SIMD_X86INTRIN // {{{
4037  if constexpr (__have_avx512f && sizeof(_Tp) * _Np == 64)
4038  {
4039  const auto __intrin = __to_intrin(__x);
4040  if constexpr (is_integral_v<_Tp>)
4041  return __vector_bitcast<_Tp>(_mm512_castsi512_si256(
4042  _mm512_shuffle_i32x4(__intrin, __intrin,
4043  1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
4044  else if constexpr (sizeof(_Tp) == 4)
4045  return __vector_bitcast<_Tp>(_mm512_castps512_ps256(
4046  _mm512_shuffle_f32x4(__intrin, __intrin,
4047  1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
4048  else if constexpr (sizeof(_Tp) == 8)
4049  return __vector_bitcast<_Tp>(_mm512_castpd512_pd256(
4050  _mm512_shuffle_f64x2(__intrin, __intrin,
4051  1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
4052  else
4053  __assert_unreachable<_Tp>();
4054  }
4055  else if constexpr (sizeof(_Tp) * _Np == 32 && is_floating_point_v<_Tp>)
4056  return __vector_bitcast<_Tp>(
4057  _mm_shuffle_pd(__lo128(__vector_bitcast<double>(__x)),
4058  __hi128(__vector_bitcast<double>(__x)), 1));
4059  else if constexpr (sizeof(__x) == 32 && sizeof(_Tp) * _Np <= 32)
4060  return __vector_bitcast<_Tp>(
4061  _mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)),
4062  __lo128(__vector_bitcast<_LLong>(__x)),
4063  sizeof(_Tp) * _Np / 4));
4064  else
4065 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
4066  {
4067  __vector_type_t<_Tp, _Np / 2> __r;
4068  __builtin_memcpy(&__r,
4069  reinterpret_cast<const char*>(&__x)
4070  + sizeof(_Tp) * _Np / 4,
4071  sizeof(_Tp) * _Np / 2);
4072  return __r;
4073  }
4074  }
4075 
4076 template <typename _Tp, typename _A0, typename... _As>
4077  _GLIBCXX_SIMD_INTRINSIC
4078  _SimdWrapper<_Tp, _SimdTuple<_Tp, _A0, _As...>::_S_size() / 2>
4079  __extract_center(const _SimdTuple<_Tp, _A0, _As...>& __x)
4080  {
4081  if constexpr (sizeof...(_As) == 0)
4082  return __extract_center(__x.first);
4083  else
4084  return __extract_part<1, 4, 2>(__x);
4085  }
4086 
4087 // }}}
4088 // __split_wrapper {{{
4089 template <size_t... _Sizes, typename _Tp, typename... _As>
4090  auto
4091  __split_wrapper(_SizeList<_Sizes...>, const _SimdTuple<_Tp, _As...>& __x)
4092  {
4093  return split<_Sizes...>(
4094  fixed_size_simd<_Tp, _SimdTuple<_Tp, _As...>::_S_size()>(__private_init,
4095  __x));
4096  }
4097 
4098 // }}}
4099 
4100 // split<simd>(simd) {{{
4101 template <typename _V, typename _Ap,
4102  size_t _Parts = simd_size_v<typename _V::value_type, _Ap> / _V::size()>
4103  enable_if_t<simd_size_v<typename _V::value_type, _Ap> == _Parts * _V::size()
4104  && is_simd_v<_V>, array<_V, _Parts>>
4105  split(const simd<typename _V::value_type, _Ap>& __x)
4106  {
4107  using _Tp = typename _V::value_type;
4108  if constexpr (_Parts == 1)
4109  {
4110  return {simd_cast<_V>(__x)};
4111  }
4112  else if (__x._M_is_constprop())
4113  {
4114  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4115  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4116  return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4117  { return __x[__i * _V::size() + __j]; });
4118  });
4119  }
4120  else if constexpr (
4121  __is_fixed_size_abi_v<_Ap>
4122  && (is_same_v<typename _V::abi_type, simd_abi::scalar>
4123  || (__is_fixed_size_abi_v<typename _V::abi_type>
4124  && sizeof(_V) == sizeof(_Tp) * _V::size() // _V doesn't have padding
4125  )))
4126  {
4127  // fixed_size -> fixed_size (w/o padding) or scalar
4128 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
4129  const __may_alias<_Tp>* const __element_ptr
4130  = reinterpret_cast<const __may_alias<_Tp>*>(&__data(__x));
4131  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4132  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4133  { return _V(__element_ptr + __i * _V::size(), vector_aligned); });
4134 #else
4135  const auto& __xx = __data(__x);
4136  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4137  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4138  [[maybe_unused]] constexpr size_t __offset
4139  = decltype(__i)::value * _V::size();
4140  return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4141  constexpr _SizeConstant<__j + __offset> __k;
4142  return __xx[__k];
4143  });
4144  });
4145 #endif
4146  }
4147  else if constexpr (is_same_v<typename _V::abi_type, simd_abi::scalar>)
4148  {
4149  // normally memcpy should work here as well
4150  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4151  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
4152  }
4153  else
4154  {
4155  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4156  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4157  if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
4158  return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4159  return __x[__i * _V::size() + __j];
4160  });
4161  else
4162  return _V(__private_init,
4163  __extract_part<decltype(__i)::value, _Parts>(__data(__x)));
4164  });
4165  }
4166  }
4167 
4168 // }}}
4169 // split<simd_mask>(simd_mask) {{{
4170 template <typename _V, typename _Ap,
4171  size_t _Parts = simd_size_v<typename _V::simd_type::value_type, _Ap> / _V::size()>
4172  enable_if_t<is_simd_mask_v<_V> && simd_size_v<typename
4173  _V::simd_type::value_type, _Ap> == _Parts * _V::size(), array<_V, _Parts>>
4174  split(const simd_mask<typename _V::simd_type::value_type, _Ap>& __x)
4175  {
4176  if constexpr (is_same_v<_Ap, typename _V::abi_type>)
4177  return {__x};
4178  else if constexpr (_Parts == 1)
4179  return {__proposed::static_simd_cast<_V>(__x)};
4180  else if constexpr (_Parts == 2 && __is_sse_abi<typename _V::abi_type>()
4181  && __is_avx_abi<_Ap>())
4182  return {_V(__private_init, __lo128(__data(__x))),
4183  _V(__private_init, __hi128(__data(__x)))};
4184  else if constexpr (_V::size() <= __CHAR_BIT__ * sizeof(_ULLong))
4185  {
4186  const bitset __bits = __x.__to_bitset();
4187  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4188  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4189  constexpr size_t __offset = __i * _V::size();
4190  return _V(__bitset_init, (__bits >> __offset).to_ullong());
4191  });
4192  }
4193  else
4194  {
4195  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4196  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4197  constexpr size_t __offset = __i * _V::size();
4198  return _V(__private_init,
4199  [&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4200  return __x[__j + __offset];
4201  });
4202  });
4203  }
4204  }
4205 
4206 // }}}
4207 // split<_Sizes...>(simd) {{{
4208 template <size_t... _Sizes, typename _Tp, typename _Ap, typename>
4209  _GLIBCXX_SIMD_ALWAYS_INLINE
4210  tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
4211  split(const simd<_Tp, _Ap>& __x)
4212  {
4213  using _SL = _SizeList<_Sizes...>;
4214  using _Tuple = tuple<__deduced_simd<_Tp, _Sizes>...>;
4215  constexpr size_t _Np = simd_size_v<_Tp, _Ap>;
4216  constexpr size_t _N0 = _SL::template _S_at<0>();
4217  using _V = __deduced_simd<_Tp, _N0>;
4218 
4219  if (__x._M_is_constprop())
4220  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4221  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4222  using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4223  constexpr size_t __offset = _SL::_S_before(__i);
4224  return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4225  return __x[__offset + __j];
4226  });
4227  });
4228  else if constexpr (_Np == _N0)
4229  {
4230  static_assert(sizeof...(_Sizes) == 1);
4231  return {simd_cast<_V>(__x)};
4232  }
4233  else if constexpr // split from fixed_size, such that __x::first.size == _N0
4234  (__is_fixed_size_abi_v<
4235  _Ap> && __fixed_size_storage_t<_Tp, _Np>::_S_first_size == _N0)
4236  {
4237  static_assert(
4238  !__is_fixed_size_abi_v<typename _V::abi_type>,
4239  "How can <_Tp, _Np> be __a single _SimdTuple entry but __a "
4240  "fixed_size_simd "
4241  "when deduced?");
4242  // extract first and recurse (__split_wrapper is needed to deduce a new
4243  // _Sizes pack)
4244  return tuple_cat(make_tuple(_V(__private_init, __data(__x).first)),
4245  __split_wrapper(_SL::template _S_pop_front<1>(),
4246  __data(__x).second));
4247  }
4248  else if constexpr ((!__is_fixed_size_abi_v<simd_abi::deduce_t<_Tp, _Sizes>> && ...))
4249  {
4250  constexpr array<size_t, sizeof...(_Sizes)> __size = {_Sizes...};
4251  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4252  [&](auto __i) constexpr {
4253  constexpr size_t __offset = [&]() {
4254  size_t __r = 0;
4255  for (unsigned __j = 0; __j < __i; ++__j)
4256  __r += __size[__j];
4257  return __r;
4258  }();
4259  return __deduced_simd<_Tp, __size[__i]>(
4260  __private_init,
4261  __extract_part<__offset, _Np, __size[__i]>(__data(__x)));
4262  });
4263  }
4264 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
4265  const __may_alias<_Tp>* const __element_ptr
4266  = reinterpret_cast<const __may_alias<_Tp>*>(&__x);
4267  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4268  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4269  using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4270  constexpr size_t __offset = _SL::_S_before(__i);
4271  constexpr size_t __base_align = alignof(simd<_Tp, _Ap>);
4272  constexpr size_t __a
4273  = __base_align - ((__offset * sizeof(_Tp)) % __base_align);
4274  constexpr size_t __b = ((__a - 1) & __a) ^ __a;
4275  constexpr size_t __alignment = __b == 0 ? __a : __b;
4276  return _Vi(__element_ptr + __offset, overaligned<__alignment>);
4277  });
4278 #else
4279  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4280  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4281  using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4282  const auto& __xx = __data(__x);
4283  using _Offset = decltype(_SL::_S_before(__i));
4284  return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4285  constexpr _SizeConstant<_Offset::value + __j> __k;
4286  return __xx[__k];
4287  });
4288  });
4289 #endif
4290  }
4291 
4292 // }}}
4293 
4294 // __subscript_in_pack {{{
4295 template <size_t _I, typename _Tp, typename _Ap, typename... _As>
4296  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
4297  __subscript_in_pack(const simd<_Tp, _Ap>& __x, const simd<_Tp, _As>&... __xs)
4298  {
4299  if constexpr (_I < simd_size_v<_Tp, _Ap>)
4300  return __x[_I];
4301  else
4302  return __subscript_in_pack<_I - simd_size_v<_Tp, _Ap>>(__xs...);
4303  }
4304 
4305 // }}}
4306 // __store_pack_of_simd {{{
4307 template <typename _Tp, typename _A0, typename... _As>
4308  _GLIBCXX_SIMD_INTRINSIC void
4309  __store_pack_of_simd(char* __mem, const simd<_Tp, _A0>& __x0, const simd<_Tp, _As>&... __xs)
4310  {
4311  constexpr size_t __n_bytes = sizeof(_Tp) * simd_size_v<_Tp, _A0>;
4312  __builtin_memcpy(__mem, &__data(__x0), __n_bytes);
4313  if constexpr (sizeof...(__xs) > 0)
4314  __store_pack_of_simd(__mem + __n_bytes, __xs...);
4315  }
4316 
4317 // }}}
4318 // concat(simd...) {{{
4319 template <typename _Tp, typename... _As, typename = __detail::__odr_helper>
4320  inline _GLIBCXX_SIMD_CONSTEXPR
4321  simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>
4322  concat(const simd<_Tp, _As>&... __xs)
4323  {
4324  constexpr int _Np = (simd_size_v<_Tp, _As> + ...);
4325  using _Abi = simd_abi::deduce_t<_Tp, _Np>;
4326  using _Rp = simd<_Tp, _Abi>;
4327  using _RW = typename _SimdTraits<_Tp, _Abi>::_SimdMember;
4328  if constexpr (sizeof...(__xs) == 1)
4329  return simd_cast<_Rp>(__xs...);
4330  else if ((... && __xs._M_is_constprop()))
4331  return _Rp([&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4332  { return __subscript_in_pack<__i>(__xs...); });
4333  else if constexpr (__is_simd_wrapper_v<_RW> and sizeof...(__xs) == 2)
4334  {
4335  return {__private_init,
4336  __vec_shuffle(__as_vector(__xs)..., std::make_index_sequence<_RW::_S_full_size>(),
4337  [](int __i) {
4338  constexpr int __sizes[2] = {int(simd_size_v<_Tp, _As>)...};
4339  constexpr int __vsizes[2]
4340  = {int(sizeof(__as_vector(__xs)) / sizeof(_Tp))...};
4341  constexpr int __padding0 = __vsizes[0] - __sizes[0];
4342  return __i >= _Np ? -1 : __i < __sizes[0] ? __i : __i + __padding0;
4343  })};
4344  }
4345  else if constexpr (__is_simd_wrapper_v<_RW> and sizeof...(__xs) == 3)
4346  return [](const auto& __x0, const auto& __x1, const auto& __x2)
4347  _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4348  return concat(concat(__x0, __x1), __x2);
4349  }(__xs...);
4350  else if constexpr (__is_simd_wrapper_v<_RW> and sizeof...(__xs) > 3)
4351  return [](const auto& __x0, const auto& __x1, const auto&... __rest)
4352  _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4353  return concat(concat(__x0, __x1), concat(__rest...));
4354  }(__xs...);
4355  else
4356  {
4357  _Rp __r{};
4358  __store_pack_of_simd(reinterpret_cast<char*>(&__data(__r)), __xs...);
4359  return __r;
4360  }
4361  }
4362 
4363 // }}}
4364 // concat(array<simd>) {{{
4365 template <typename _Tp, typename _Abi, size_t _Np>
4366  _GLIBCXX_SIMD_ALWAYS_INLINE
4367  _GLIBCXX_SIMD_CONSTEXPR __deduced_simd<_Tp, simd_size_v<_Tp, _Abi> * _Np>
4368  concat(const array<simd<_Tp, _Abi>, _Np>& __x)
4369  {
4370  return __call_with_subscripts<_Np>(
4371  __x, [](const auto&... __xs) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4372  return concat(__xs...);
4373  });
4374  }
4375 
4376 // }}}
4377 
4378 /// @cond undocumented
4379 // _SmartReference {{{
4380 template <typename _Up, typename _Accessor = _Up,
4381  typename _ValueType = typename _Up::value_type>
4382  class _SmartReference
4383  {
4384  friend _Accessor;
4385  int _M_index;
4386  _Up& _M_obj;
4387 
4388  _GLIBCXX_SIMD_INTRINSIC constexpr _ValueType
4389  _M_read() const noexcept
4390  {
4391  if constexpr (is_arithmetic_v<_Up>)
4392  return _M_obj;
4393  else
4394  return _M_obj[_M_index];
4395  }
4396 
4397  template <typename _Tp>
4398  _GLIBCXX_SIMD_INTRINSIC constexpr void
4399  _M_write(_Tp&& __x) const
4400  { _Accessor::_S_set(_M_obj, _M_index, static_cast<_Tp&&>(__x)); }
4401 
4402  public:
4403  _GLIBCXX_SIMD_INTRINSIC constexpr
4404  _SmartReference(_Up& __o, int __i) noexcept
4405  : _M_index(__i), _M_obj(__o) {}
4406 
4407  using value_type = _ValueType;
4408 
4409  _GLIBCXX_SIMD_INTRINSIC
4410  _SmartReference(const _SmartReference&) = delete;
4411 
4412  _GLIBCXX_SIMD_INTRINSIC constexpr
4413  operator value_type() const noexcept
4414  { return _M_read(); }
4415 
4416  template <typename _Tp, typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, value_type>>
4417  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4418  operator=(_Tp&& __x) &&
4419  {
4420  _M_write(static_cast<_Tp&&>(__x));
4421  return {_M_obj, _M_index};
4422  }
4423 
4424 #define _GLIBCXX_SIMD_OP_(__op) \
4425  template <typename _Tp, \
4426  typename _TT = decltype(declval<value_type>() __op declval<_Tp>()), \
4427  typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, _TT>, \
4428  typename = _ValuePreservingOrInt<_TT, value_type>> \
4429  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference \
4430  operator __op##=(_Tp&& __x) && \
4431  { \
4432  const value_type& __lhs = _M_read(); \
4433  _M_write(__lhs __op __x); \
4434  return {_M_obj, _M_index}; \
4435  }
4436  _GLIBCXX_SIMD_ALL_ARITHMETICS(_GLIBCXX_SIMD_OP_);
4437  _GLIBCXX_SIMD_ALL_SHIFTS(_GLIBCXX_SIMD_OP_);
4438  _GLIBCXX_SIMD_ALL_BINARY(_GLIBCXX_SIMD_OP_);
4439 #undef _GLIBCXX_SIMD_OP_
4440 
4441  template <typename _Tp = void,
4442  typename = decltype(++declval<conditional_t<true, value_type, _Tp>&>())>
4443  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4444  operator++() &&
4445  {
4446  value_type __x = _M_read();
4447  _M_write(++__x);
4448  return {_M_obj, _M_index};
4449  }
4450 
4451  template <typename _Tp = void,
4452  typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()++)>
4453  _GLIBCXX_SIMD_INTRINSIC constexpr value_type
4454  operator++(int) &&
4455  {
4456  const value_type __r = _M_read();
4457  value_type __x = __r;
4458  _M_write(++__x);
4459  return __r;
4460  }
4461 
4462  template <typename _Tp = void,
4463  typename = decltype(--declval<conditional_t<true, value_type, _Tp>&>())>
4464  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4465  operator--() &&
4466  {
4467  value_type __x = _M_read();
4468  _M_write(--__x);
4469  return {_M_obj, _M_index};
4470  }
4471 
4472  template <typename _Tp = void,
4473  typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()--)>
4474  _GLIBCXX_SIMD_INTRINSIC constexpr value_type
4475  operator--(int) &&
4476  {
4477  const value_type __r = _M_read();
4478  value_type __x = __r;
4479  _M_write(--__x);
4480  return __r;
4481  }
4482 
4483  _GLIBCXX_SIMD_INTRINSIC friend void
4484  swap(_SmartReference&& __a, _SmartReference&& __b) noexcept(
4485  conjunction<
4486  is_nothrow_constructible<value_type, _SmartReference&&>,
4487  is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4488  {
4489  value_type __tmp = static_cast<_SmartReference&&>(__a);
4490  static_cast<_SmartReference&&>(__a) = static_cast<value_type>(__b);
4491  static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4492  }
4493 
4494  _GLIBCXX_SIMD_INTRINSIC friend void
4495  swap(value_type& __a, _SmartReference&& __b) noexcept(
4496  conjunction<
4497  is_nothrow_constructible<value_type, value_type&&>,
4498  is_nothrow_assignable<value_type&, value_type&&>,
4499  is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4500  {
4501  value_type __tmp(std::move(__a));
4502  __a = static_cast<value_type>(__b);
4503  static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4504  }
4505 
4506  _GLIBCXX_SIMD_INTRINSIC friend void
4507  swap(_SmartReference&& __a, value_type& __b) noexcept(
4508  conjunction<
4509  is_nothrow_constructible<value_type, _SmartReference&&>,
4510  is_nothrow_assignable<value_type&, value_type&&>,
4511  is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4512  {
4513  value_type __tmp(__a);
4514  static_cast<_SmartReference&&>(__a) = std::move(__b);
4515  __b = std::move(__tmp);
4516  }
4517  };
4518 
4519 // }}}
4520 // __scalar_abi_wrapper {{{
4521 template <int _Bytes>
4522  struct __scalar_abi_wrapper
4523  {
4524  template <typename _Tp> static constexpr size_t _S_full_size = 1;
4525  template <typename _Tp> static constexpr size_t _S_size = 1;
4526  template <typename _Tp> static constexpr size_t _S_is_partial = false;
4527 
4528  template <typename _Tp, typename _Abi = simd_abi::scalar>
4529  static constexpr bool _S_is_valid_v
4530  = _Abi::template _IsValid<_Tp>::value && sizeof(_Tp) == _Bytes;
4531  };
4532 
4533 // }}}
4534 // __decay_abi metafunction {{{
4535 template <typename _Tp>
4536  struct __decay_abi { using type = _Tp; };
4537 
4538 template <int _Bytes>
4539  struct __decay_abi<__scalar_abi_wrapper<_Bytes>>
4540  { using type = simd_abi::scalar; };
4541 
4542 // }}}
4543 // __find_next_valid_abi metafunction {{{1
4544 // Given an ABI tag A<N>, find an N2 < N such that A<N2>::_S_is_valid_v<_Tp> ==
4545 // true, N2 is a power-of-2, and A<N2>::_S_is_partial<_Tp> is false. Break
4546 // recursion at 2 elements in the resulting ABI tag. In this case
4547 // type::_S_is_valid_v<_Tp> may be false.
4548 template <template <int> class _Abi, int _Bytes, typename _Tp>
4549  struct __find_next_valid_abi
4550  {
4551  static constexpr auto
4552  _S_choose()
4553  {
4554  constexpr int _NextBytes = std::__bit_ceil(_Bytes) / 2;
4555  using _NextAbi = _Abi<_NextBytes>;
4556  if constexpr (_NextBytes < sizeof(_Tp) * 2) // break recursion
4557  return _Abi<_Bytes>();
4558  else if constexpr (_NextAbi::template _S_is_partial<_Tp> == false
4559  && _NextAbi::template _S_is_valid_v<_Tp>)
4560  return _NextAbi();
4561  else
4562  return __find_next_valid_abi<_Abi, _NextBytes, _Tp>::_S_choose();
4563  }
4564 
4565  using type = decltype(_S_choose());
4566  };
4567 
4568 template <int _Bytes, typename _Tp>
4569  struct __find_next_valid_abi<__scalar_abi_wrapper, _Bytes, _Tp>
4570  { using type = simd_abi::scalar; };
4571 
4572 // _AbiList {{{1
4573 template <template <int> class...>
4574  struct _AbiList
4575  {
4576  template <typename, int> static constexpr bool _S_has_valid_abi = false;
4577  template <typename, int> using _FirstValidAbi = void;
4578  template <typename, int> using _BestAbi = void;
4579  };
4580 
4581 template <template <int> class _A0, template <int> class... _Rest>
4582  struct _AbiList<_A0, _Rest...>
4583  {
4584  template <typename _Tp, int _Np>
4585  static constexpr bool _S_has_valid_abi
4586  = _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<
4587  _Tp> || _AbiList<_Rest...>::template _S_has_valid_abi<_Tp, _Np>;
4588 
4589  template <typename _Tp, int _Np>
4590  using _FirstValidAbi = conditional_t<
4591  _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<_Tp>,
4592  typename __decay_abi<_A0<sizeof(_Tp) * _Np>>::type,
4593  typename _AbiList<_Rest...>::template _FirstValidAbi<_Tp, _Np>>;
4594 
4595  template <typename _Tp, int _Np>
4596  static constexpr auto
4597  _S_determine_best_abi()
4598  {
4599  static_assert(_Np >= 1);
4600  constexpr int _Bytes = sizeof(_Tp) * _Np;
4601  if constexpr (_Np == 1)
4602  return __make_dependent_t<_Tp, simd_abi::scalar>{};
4603  else
4604  {
4605  constexpr int __fullsize = _A0<_Bytes>::template _S_full_size<_Tp>;
4606  // _A0<_Bytes> is good if:
4607  // 1. The ABI tag is valid for _Tp
4608  // 2. The storage overhead is no more than padding to fill the next
4609  // power-of-2 number of bytes
4610  if constexpr (_A0<_Bytes>::template _S_is_valid_v<
4611  _Tp> && __fullsize / 2 < _Np)
4612  return typename __decay_abi<_A0<_Bytes>>::type{};
4613  else
4614  {
4615  using _Bp =
4616  typename __find_next_valid_abi<_A0, _Bytes, _Tp>::type;
4617  if constexpr (_Bp::template _S_is_valid_v<
4618  _Tp> && _Bp::template _S_size<_Tp> <= _Np)
4619  return _Bp{};
4620  else
4621  return
4622  typename _AbiList<_Rest...>::template _BestAbi<_Tp, _Np>{};
4623  }
4624  }
4625  }
4626 
4627  template <typename _Tp, int _Np>
4628  using _BestAbi = decltype(_S_determine_best_abi<_Tp, _Np>());
4629  };
4630 
4631 // }}}1
4632 
4633 // the following lists all native ABIs, which makes them accessible to
4634 // simd_abi::deduce and select_best_vector_type_t (for fixed_size). Order
4635 // matters: Whatever comes first has higher priority.
4636 using _AllNativeAbis = _AbiList<simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin,
4637  __scalar_abi_wrapper>;
4638 
4639 // valid _SimdTraits specialization {{{1
4640 template <typename _Tp, typename _Abi>
4641  struct _SimdTraits<_Tp, _Abi, void_t<typename _Abi::template _IsValid<_Tp>>>
4642  : _Abi::template __traits<_Tp> {};
4643 
4644 // __deduce_impl specializations {{{1
4645 // try all native ABIs (including scalar) first
4646 template <typename _Tp, size_t _Np>
4647  struct __deduce_impl<
4648  _Tp, _Np, enable_if_t<_AllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>>
4649  { using type = _AllNativeAbis::_FirstValidAbi<_Tp, _Np>; };
4650 
4651 // fall back to fixed_size only if scalar and native ABIs don't match
4652 template <typename _Tp, size_t _Np, typename = void>
4653  struct __deduce_fixed_size_fallback {};
4654 
4655 template <typename _Tp, size_t _Np>
4656  struct __deduce_fixed_size_fallback<_Tp, _Np,
4657  enable_if_t<simd_abi::fixed_size<_Np>::template _S_is_valid_v<_Tp>>>
4658  { using type = simd_abi::fixed_size<_Np>; };
4659 
4660 template <typename _Tp, size_t _Np, typename>
4661  struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {};
4662 
4663 //}}}1
4664 /// @endcond
4665 
4666 // simd_mask {{{
4667 template <typename _Tp, typename _Abi>
4668  class simd_mask : public _SimdTraits<_Tp, _Abi>::_MaskBase
4669  {
4670  // types, tags, and friends {{{
4671  using _Traits = _SimdTraits<_Tp, _Abi>;
4672  using _MemberType = typename _Traits::_MaskMember;
4673 
4674  // We map all masks with equal element sizeof to a single integer type, the
4675  // one given by __int_for_sizeof_t<_Tp>. This is the approach
4676  // [[gnu::vector_size(N)]] types take as well and it reduces the number of
4677  // template specializations in the implementation classes.
4678  using _Ip = __int_for_sizeof_t<_Tp>;
4679  static constexpr _Ip* _S_type_tag = nullptr;
4680 
4681  friend typename _Traits::_MaskBase;
4682  friend class simd<_Tp, _Abi>; // to construct masks on return
4683  friend typename _Traits::_SimdImpl; // to construct masks on return and
4684  // inspect data on masked operations
4685  public:
4686  using _Impl = typename _Traits::_MaskImpl;
4687  friend _Impl;
4688 
4689  // }}}
4690  // member types {{{
4691  using value_type = bool;
4692  using reference = _SmartReference<_MemberType, _Impl, value_type>;
4693  using simd_type = simd<_Tp, _Abi>;
4694  using abi_type = _Abi;
4695 
4696  // }}}
4697  static constexpr size_t size() // {{{
4698  { return __size_or_zero_v<_Tp, _Abi>; }
4699 
4700  // }}}
4701  // constructors & assignment {{{
4702  simd_mask() = default;
4703  simd_mask(const simd_mask&) = default;
4704  simd_mask(simd_mask&&) = default;
4705  simd_mask& operator=(const simd_mask&) = default;
4706  simd_mask& operator=(simd_mask&&) = default;
4707 
4708  // }}}
4709  // access to internal representation (optional feature) {{{
4710  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR explicit
4711  simd_mask(typename _Traits::_MaskCastType __init)
4712  : _M_data{__init} {}
4713  // conversions to internal type is done in _MaskBase
4714 
4715  // }}}
4716  // bitset interface (extension to be proposed) {{{
4717  // TS_FEEDBACK:
4718  // Conversion of simd_mask to and from bitset makes it much easier to
4719  // interface with other facilities. I suggest adding `static
4720  // simd_mask::from_bitset` and `simd_mask::to_bitset`.
4721  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR static simd_mask
4722  __from_bitset(bitset<size()> bs)
4723  { return {__bitset_init, bs}; }
4724 
4725  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bitset<size()>
4726  __to_bitset() const
4727  { return _Impl::_S_to_bits(_M_data)._M_to_bitset(); }
4728 
4729  // }}}
4730  // explicit broadcast constructor {{{
4731  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4732  simd_mask(value_type __x)
4733  : _M_data(_Impl::template _S_broadcast<_Ip>(__x)) {}
4734 
4735  // }}}
4736  // implicit type conversion constructor {{{
4737  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4738  // proposed improvement
4739  template <typename _Up, typename _A2,
4740  typename = enable_if_t<simd_size_v<_Up, _A2> == size()>>
4741  _GLIBCXX_SIMD_ALWAYS_INLINE explicit(sizeof(_MemberType)
4742  != sizeof(typename _SimdTraits<_Up, _A2>::_MaskMember))
4743  simd_mask(const simd_mask<_Up, _A2>& __x)
4744  : simd_mask(__proposed::static_simd_cast<simd_mask>(__x)) {}
4745  #else
4746  // conforming to ISO/IEC 19570:2018
4747  template <typename _Up, typename = enable_if_t<conjunction<
4748  is_same<abi_type, simd_abi::fixed_size<size()>>,
4749  is_same<_Up, _Up>>::value>>
4750  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4751  simd_mask(const simd_mask<_Up, simd_abi::fixed_size<size()>>& __x)
4752  : _M_data(_Impl::_S_from_bitmask(__data(__x), _S_type_tag)) {}
4753  #endif
4754 
4755  // }}}
4756  // load constructor {{{
4757  template <typename _Flags>
4758  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4759  simd_mask(const value_type* __mem, _IsSimdFlagType<_Flags>)
4760  : _M_data(_Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem))) {}
4761 
4762  template <typename _Flags>
4763  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4764  simd_mask(const value_type* __mem, simd_mask __k, _IsSimdFlagType<_Flags>)
4765  : _M_data{}
4766  {
4767  _M_data = _Impl::_S_masked_load(_M_data, __k._M_data,
4768  _Flags::template _S_apply<simd_mask>(__mem));
4769  }
4770 
4771  // }}}
4772  // loads [simd_mask.load] {{{
4773  template <typename _Flags>
4774  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
4775  copy_from(const value_type* __mem, _IsSimdFlagType<_Flags>)
4776  { _M_data = _Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem)); }
4777 
4778  // }}}
4779  // stores [simd_mask.store] {{{
4780  template <typename _Flags>
4781  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
4782  copy_to(value_type* __mem, _IsSimdFlagType<_Flags>) const
4783  { _Impl::_S_store(_M_data, _Flags::template _S_apply<simd_mask>(__mem)); }
4784 
4785  // }}}
4786  // scalar access {{{
4787  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
4788  operator[](size_t __i)
4789  {
4790  if (__i >= size())
4791  __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4792  return {_M_data, int(__i)};
4793  }
4794 
4795  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
4796  operator[](size_t __i) const
4797  {
4798  if (__i >= size())
4799  __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4800  if constexpr (__is_scalar_abi<_Abi>())
4801  return _M_data;
4802  else
4803  return static_cast<bool>(_M_data[__i]);
4804  }
4805 
4806  // }}}
4807  // negation {{{
4808  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd_mask
4809  operator!() const
4810  { return {__private_init, _Impl::_S_bit_not(_M_data)}; }
4811 
4812  // }}}
4813  // simd_mask binary operators [simd_mask.binary] {{{
4814  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4815  // simd_mask<int> && simd_mask<uint> needs disambiguation
4816  template <typename _Up, typename _A2,
4817  typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4818  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4819  operator&&(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4820  {
4821  return {__private_init,
4822  _Impl::_S_logical_and(__x._M_data, simd_mask(__y)._M_data)};
4823  }
4824 
4825  template <typename _Up, typename _A2,
4826  typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4827  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4828  operator||(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4829  {
4830  return {__private_init,
4831  _Impl::_S_logical_or(__x._M_data, simd_mask(__y)._M_data)};
4832  }
4833  #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4834 
4835  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4836  operator&&(const simd_mask& __x, const simd_mask& __y)
4837  { return {__private_init, _Impl::_S_logical_and(__x._M_data, __y._M_data)}; }
4838 
4839  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4840  operator||(const simd_mask& __x, const simd_mask& __y)
4841  { return {__private_init, _Impl::_S_logical_or(__x._M_data, __y._M_data)}; }
4842 
4843  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4844  operator&(const simd_mask& __x, const simd_mask& __y)
4845  { return {__private_init, _Impl::_S_bit_and(__x._M_data, __y._M_data)}; }
4846 
4847  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4848  operator|(const simd_mask& __x, const simd_mask& __y)
4849  { return {__private_init, _Impl::_S_bit_or(__x._M_data, __y._M_data)}; }
4850 
4851  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4852  operator^(const simd_mask& __x, const simd_mask& __y)
4853  { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4854 
4855  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4856  operator&=(simd_mask& __x, const simd_mask& __y)
4857  {
4858  __x._M_data = _Impl::_S_bit_and(__x._M_data, __y._M_data);
4859  return __x;
4860  }
4861 
4862  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4863  operator|=(simd_mask& __x, const simd_mask& __y)
4864  {
4865  __x._M_data = _Impl::_S_bit_or(__x._M_data, __y._M_data);
4866  return __x;
4867  }
4868 
4869  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4870  operator^=(simd_mask& __x, const simd_mask& __y)
4871  {
4872  __x._M_data = _Impl::_S_bit_xor(__x._M_data, __y._M_data);
4873  return __x;
4874  }
4875 
4876  // }}}
4877  // simd_mask compares [simd_mask.comparison] {{{
4878  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4879  operator==(const simd_mask& __x, const simd_mask& __y)
4880  { return !operator!=(__x, __y); }
4881 
4882  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4883  operator!=(const simd_mask& __x, const simd_mask& __y)
4884  { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4885 
4886  // }}}
4887  // private_init ctor {{{
4888  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
4889  simd_mask(_PrivateInit, typename _Traits::_MaskMember __init)
4890  : _M_data(__init) {}
4891 
4892  // }}}
4893  // private_init generator ctor {{{
4894  template <typename _Fp, typename = decltype(bool(declval<_Fp>()(size_t())))>
4895  _GLIBCXX_SIMD_INTRINSIC constexpr
4896  simd_mask(_PrivateInit, _Fp&& __gen)
4897  : _M_data()
4898  {
4899  __execute_n_times<size()>([&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4900  _Impl::_S_set(_M_data, __i, __gen(__i));
4901  });
4902  }
4903 
4904  // }}}
4905  // bitset_init ctor {{{
4906  _GLIBCXX_SIMD_INTRINSIC constexpr
4907  simd_mask(_BitsetInit, bitset<size()> __init)
4908  : _M_data(_Impl::_S_from_bitmask(_SanitizedBitMask<size()>(__init), _S_type_tag))
4909  {}
4910 
4911  // }}}
4912  // __cvt {{{
4913  // TS_FEEDBACK:
4914  // The conversion operator this implements should be a ctor on simd_mask.
4915  // Once you call .__cvt() on a simd_mask it converts conveniently.
4916  // A useful variation: add `explicit(sizeof(_Tp) != sizeof(_Up))`
4917  struct _CvtProxy
4918  {
4919  template <typename _Up, typename _A2,
4920  typename = enable_if_t<simd_size_v<_Up, _A2> == simd_size_v<_Tp, _Abi>>>
4921  _GLIBCXX_SIMD_ALWAYS_INLINE
4922  operator simd_mask<_Up, _A2>() &&
4923  {
4924  using namespace std::experimental::__proposed;
4925  return static_simd_cast<simd_mask<_Up, _A2>>(_M_data);
4926  }
4927 
4928  const simd_mask<_Tp, _Abi>& _M_data;
4929  };
4930 
4931  _GLIBCXX_SIMD_INTRINSIC _CvtProxy
4932  __cvt() const
4933  { return {*this}; }
4934 
4935  // }}}
4936  // operator?: overloads (suggested extension) {{{
4937  #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
4938  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4939  operator?:(const simd_mask& __k, const simd_mask& __where_true,
4940  const simd_mask& __where_false)
4941  {
4942  auto __ret = __where_false;
4943  _Impl::_S_masked_assign(__k._M_data, __ret._M_data, __where_true._M_data);
4944  return __ret;
4945  }
4946 
4947  template <typename _U1, typename _U2,
4948  typename _Rp = simd<common_type_t<_U1, _U2>, _Abi>,
4949  typename = enable_if_t<conjunction_v<
4950  is_convertible<_U1, _Rp>, is_convertible<_U2, _Rp>,
4951  is_convertible<simd_mask, typename _Rp::mask_type>>>>
4952  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend _Rp
4953  operator?:(const simd_mask& __k, const _U1& __where_true,
4954  const _U2& __where_false)
4955  {
4956  _Rp __ret = __where_false;
4957  _Rp::_Impl::_S_masked_assign(
4958  __data(static_cast<typename _Rp::mask_type>(__k)), __data(__ret),
4959  __data(static_cast<_Rp>(__where_true)));
4960  return __ret;
4961  }
4962 
4963  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4964  template <typename _Kp, typename _Ak, typename _Up, typename _Au,
4965  typename = enable_if_t<
4966  conjunction_v<is_convertible<simd_mask<_Kp, _Ak>, simd_mask>,
4967  is_convertible<simd_mask<_Up, _Au>, simd_mask>>>>
4968  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4969  operator?:(const simd_mask<_Kp, _Ak>& __k, const simd_mask& __where_true,
4970  const simd_mask<_Up, _Au>& __where_false)
4971  {
4972  simd_mask __ret = __where_false;
4973  _Impl::_S_masked_assign(simd_mask(__k)._M_data, __ret._M_data,
4974  __where_true._M_data);
4975  return __ret;
4976  }
4977  #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4978  #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
4979 
4980  // }}}
4981  // _M_is_constprop {{{
4982  _GLIBCXX_SIMD_INTRINSIC constexpr bool
4983  _M_is_constprop() const
4984  {
4985  if constexpr (__is_scalar_abi<_Abi>())
4986  return __builtin_constant_p(_M_data);
4987  else
4988  return _M_data._M_is_constprop();
4989  }
4990 
4991  // }}}
4992 
4993  private:
4994  friend const auto& __data<_Tp, abi_type>(const simd_mask&);
4995  friend auto& __data<_Tp, abi_type>(simd_mask&);
4996  alignas(_Traits::_S_mask_align) _MemberType _M_data;
4997  };
4998 
4999 // }}}
5000 
5001 /// @cond undocumented
5002 // __data(simd_mask) {{{
5003 template <typename _Tp, typename _Ap>
5004  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
5005  __data(const simd_mask<_Tp, _Ap>& __x)
5006  { return __x._M_data; }
5007 
5008 template <typename _Tp, typename _Ap>
5009  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
5010  __data(simd_mask<_Tp, _Ap>& __x)
5011  { return __x._M_data; }
5012 
5013 // }}}
5014 /// @endcond
5015 
5016 // simd_mask reductions [simd_mask.reductions] {{{
5017 template <typename _Tp, typename _Abi>
5018  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5019  all_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5020  {
5021  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5022  {
5023  for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
5024  if (!__k[__i])
5025  return false;
5026  return true;
5027  }
5028  else
5029  return _Abi::_MaskImpl::_S_all_of(__k);
5030  }
5031 
5032 template <typename _Tp, typename _Abi>
5033  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5034  any_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5035  {
5036  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5037  {
5038  for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
5039  if (__k[__i])
5040  return true;
5041  return false;
5042  }
5043  else
5044  return _Abi::_MaskImpl::_S_any_of(__k);
5045  }
5046 
5047 template <typename _Tp, typename _Abi>
5048  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5049  none_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5050  {
5051  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5052  {
5053  for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
5054  if (__k[__i])
5055  return false;
5056  return true;
5057  }
5058  else
5059  return _Abi::_MaskImpl::_S_none_of(__k);
5060  }
5061 
5062 template <typename _Tp, typename _Abi>
5063  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5064  some_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5065  {
5066  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5067  {
5068  for (size_t __i = 1; __i < simd_size_v<_Tp, _Abi>; ++__i)
5069  if (__k[__i] != __k[__i - 1])
5070  return true;
5071  return false;
5072  }
5073  else
5074  return _Abi::_MaskImpl::_S_some_of(__k);
5075  }
5076 
5077 template <typename _Tp, typename _Abi>
5078  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5079  popcount(const simd_mask<_Tp, _Abi>& __k) noexcept
5080  {
5081  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5082  {
5083  const int __r = __call_with_subscripts<simd_size_v<_Tp, _Abi>>(
5084  __k, [](auto... __elements) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5085  return ((__elements != 0) + ...);
5086  });
5087  if (__builtin_is_constant_evaluated() || __builtin_constant_p(__r))
5088  return __r;
5089  }
5090  return _Abi::_MaskImpl::_S_popcount(__k);
5091  }
5092 
5093 template <typename _Tp, typename _Abi>
5094  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5095  find_first_set(const simd_mask<_Tp, _Abi>& __k)
5096  {
5097  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5098  {
5099  constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
5100  const size_t _Idx = __call_with_n_evaluations<_Np>(
5101  [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5102  return std::min({__indexes...});
5103  }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5104  return __k[__i] ? +__i : _Np;
5105  });
5106  if (_Idx >= _Np)
5107  __invoke_ub("find_first_set(empty mask) is UB");
5108  if (__builtin_constant_p(_Idx))
5109  return _Idx;
5110  }
5111  return _Abi::_MaskImpl::_S_find_first_set(__k);
5112  }
5113 
5114 template <typename _Tp, typename _Abi>
5115  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5116  find_last_set(const simd_mask<_Tp, _Abi>& __k)
5117  {
5118  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5119  {
5120  constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
5121  const int _Idx = __call_with_n_evaluations<_Np>(
5122  [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5123  return std::max({__indexes...});
5124  }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5125  return __k[__i] ? int(__i) : -1;
5126  });
5127  if (_Idx < 0)
5128  __invoke_ub("find_first_set(empty mask) is UB");
5129  if (__builtin_constant_p(_Idx))
5130  return _Idx;
5131  }
5132  return _Abi::_MaskImpl::_S_find_last_set(__k);
5133  }
5134 
5135 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5136 all_of(_ExactBool __x) noexcept
5137 { return __x; }
5138 
5139 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5140 any_of(_ExactBool __x) noexcept
5141 { return __x; }
5142 
5143 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5144 none_of(_ExactBool __x) noexcept
5145 { return !__x; }
5146 
5147 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5148 some_of(_ExactBool) noexcept
5149 { return false; }
5150 
5151 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5152 popcount(_ExactBool __x) noexcept
5153 { return __x; }
5154 
5155 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5156 find_first_set(_ExactBool)
5157 { return 0; }
5158 
5159 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5160 find_last_set(_ExactBool)
5161 { return 0; }
5162 
5163 // }}}
5164 
5165 /// @cond undocumented
5166 // _SimdIntOperators{{{1
5167 template <typename _V, typename _Tp, typename _Abi, bool>
5168  class _SimdIntOperators {};
5169 
5170 template <typename _V, typename _Tp, typename _Abi>
5171  class _SimdIntOperators<_V, _Tp, _Abi, true>
5172  {
5173  using _Impl = typename _SimdTraits<_Tp, _Abi>::_SimdImpl;
5174 
5175  _GLIBCXX_SIMD_INTRINSIC constexpr const _V&
5176  __derived() const
5177  { return *static_cast<const _V*>(this); }
5178 
5179  template <typename _Up>
5180  _GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V
5181  _S_make_derived(_Up&& __d)
5182  { return {__private_init, static_cast<_Up&&>(__d)}; }
5183 
5184  public:
5185  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5186  _V&
5187  operator%=(_V& __lhs, const _V& __x)
5188  { return __lhs = __lhs % __x; }
5189 
5190  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5191  _V&
5192  operator&=(_V& __lhs, const _V& __x)
5193  { return __lhs = __lhs & __x; }
5194 
5195  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5196  _V&
5197  operator|=(_V& __lhs, const _V& __x)
5198  { return __lhs = __lhs | __x; }
5199 
5200  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5201  _V&
5202  operator^=(_V& __lhs, const _V& __x)
5203  { return __lhs = __lhs ^ __x; }
5204 
5205  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5206  _V&
5207  operator<<=(_V& __lhs, const _V& __x)
5208  { return __lhs = __lhs << __x; }
5209 
5210  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5211  _V&
5212  operator>>=(_V& __lhs, const _V& __x)
5213  { return __lhs = __lhs >> __x; }
5214 
5215  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5216  _V&
5217  operator<<=(_V& __lhs, int __x)
5218  { return __lhs = __lhs << __x; }
5219 
5220  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5221  _V&
5222  operator>>=(_V& __lhs, int __x)
5223  { return __lhs = __lhs >> __x; }
5224 
5225  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5226  _V
5227  operator%(const _V& __x, const _V& __y)
5228  {
5229  return _SimdIntOperators::_S_make_derived(
5230  _Impl::_S_modulus(__data(__x), __data(__y)));
5231  }
5232 
5233  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5234  _V
5235  operator&(const _V& __x, const _V& __y)
5236  {
5237  return _SimdIntOperators::_S_make_derived(
5238  _Impl::_S_bit_and(__data(__x), __data(__y)));
5239  }
5240 
5241  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5242  _V
5243  operator|(const _V& __x, const _V& __y)
5244  {
5245  return _SimdIntOperators::_S_make_derived(
5246  _Impl::_S_bit_or(__data(__x), __data(__y)));
5247  }
5248 
5249  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5250  _V
5251  operator^(const _V& __x, const _V& __y)
5252  {
5253  return _SimdIntOperators::_S_make_derived(
5254  _Impl::_S_bit_xor(__data(__x), __data(__y)));
5255  }
5256 
5257  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5258  _V
5259  operator<<(const _V& __x, const _V& __y)
5260  {
5261  return _SimdIntOperators::_S_make_derived(
5262  _Impl::_S_bit_shift_left(__data(__x), __data(__y)));
5263  }
5264 
5265  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5266  _V
5267  operator>>(const _V& __x, const _V& __y)
5268  {
5269  return _SimdIntOperators::_S_make_derived(
5270  _Impl::_S_bit_shift_right(__data(__x), __data(__y)));
5271  }
5272 
5273  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5274  _V
5275  operator<<(const _V& __x, int __y)
5276  {
5277  if (__y < 0)
5278  __invoke_ub("The behavior is undefined if the right operand of a "
5279  "shift operation is negative. [expr.shift]\nA shift by "
5280  "%d was requested",
5281  __y);
5282  if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
5283  __invoke_ub(
5284  "The behavior is undefined if the right operand of a "
5285  "shift operation is greater than or equal to the width of the "
5286  "promoted left operand. [expr.shift]\nA shift by %d was requested",
5287  __y);
5288  return _SimdIntOperators::_S_make_derived(
5289  _Impl::_S_bit_shift_left(__data(__x), __y));
5290  }
5291 
5292  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5293  _V
5294  operator>>(const _V& __x, int __y)
5295  {
5296  if (__y < 0)
5297  __invoke_ub(
5298  "The behavior is undefined if the right operand of a shift "
5299  "operation is negative. [expr.shift]\nA shift by %d was requested",
5300  __y);
5301  if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
5302  __invoke_ub(
5303  "The behavior is undefined if the right operand of a shift "
5304  "operation is greater than or equal to the width of the promoted "
5305  "left operand. [expr.shift]\nA shift by %d was requested",
5306  __y);
5307  return _SimdIntOperators::_S_make_derived(
5308  _Impl::_S_bit_shift_right(__data(__x), __y));
5309  }
5310 
5311  // unary operators (for integral _Tp)
5312  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5313  _V
5314  operator~() const
5315  { return {__private_init, _Impl::_S_complement(__derived()._M_data)}; }
5316  };
5317 
5318 //}}}1
5319 /// @endcond
5320 
5321 // simd {{{
5322 template <typename _Tp, typename _Abi>
5323  class simd : public _SimdIntOperators<
5324  simd<_Tp, _Abi>, _Tp, _Abi,
5325  conjunction<is_integral<_Tp>,
5326  typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>,
5327  public _SimdTraits<_Tp, _Abi>::_SimdBase
5328  {
5329  using _Traits = _SimdTraits<_Tp, _Abi>;
5330  using _MemberType = typename _Traits::_SimdMember;
5331  using _CastType = typename _Traits::_SimdCastType;
5332  static constexpr _Tp* _S_type_tag = nullptr;
5333  friend typename _Traits::_SimdBase;
5334 
5335  public:
5336  using _Impl = typename _Traits::_SimdImpl;
5337  friend _Impl;
5338  friend _SimdIntOperators<simd, _Tp, _Abi, true>;
5339 
5340  using value_type = _Tp;
5341  using reference = _SmartReference<_MemberType, _Impl, value_type>;
5342  using mask_type = simd_mask<_Tp, _Abi>;
5343  using abi_type = _Abi;
5344 
5345  static constexpr size_t size()
5346  { return __size_or_zero_v<_Tp, _Abi>; }
5347 
5348  _GLIBCXX_SIMD_CONSTEXPR simd() = default;
5349  _GLIBCXX_SIMD_CONSTEXPR simd(const simd&) = default;
5350  _GLIBCXX_SIMD_CONSTEXPR simd(simd&&) noexcept = default;
5351  _GLIBCXX_SIMD_CONSTEXPR simd& operator=(const simd&) = default;
5352  _GLIBCXX_SIMD_CONSTEXPR simd& operator=(simd&&) noexcept = default;
5353 
5354  // implicit broadcast constructor
5355  template <typename _Up,
5356  typename = enable_if_t<!is_same_v<__remove_cvref_t<_Up>, bool>>>
5357  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5358  simd(_ValuePreservingOrInt<_Up, value_type>&& __x)
5359  : _M_data(
5360  _Impl::_S_broadcast(static_cast<value_type>(static_cast<_Up&&>(__x))))
5361  {}
5362 
5363  // implicit type conversion constructor (convert from fixed_size to
5364  // fixed_size)
5365  template <typename _Up>
5366  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5367  simd(const simd<_Up, simd_abi::fixed_size<size()>>& __x,
5368  enable_if_t<
5369  conjunction<
5370  is_same<simd_abi::fixed_size<size()>, abi_type>,
5371  negation<__is_narrowing_conversion<_Up, value_type>>,
5372  __converts_to_higher_integer_rank<_Up, value_type>>::value,
5373  void*> = nullptr)
5374  : simd{static_cast<array<_Up, size()>>(__x).data(), vector_aligned} {}
5375 
5376  // explicit type conversion constructor
5377 #ifdef _GLIBCXX_SIMD_ENABLE_STATIC_CAST
5378  template <typename _Up, typename _A2,
5379  typename = decltype(static_simd_cast<simd>(
5380  declval<const simd<_Up, _A2>&>()))>
5381  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5382  simd(const simd<_Up, _A2>& __x)
5383  : simd(static_simd_cast<simd>(__x)) {}
5384 #endif // _GLIBCXX_SIMD_ENABLE_STATIC_CAST
5385 
5386  // generator constructor
5387  template <typename _Fp>
5388  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5389  simd(_Fp&& __gen, _ValuePreservingOrInt<decltype(declval<_Fp>()(
5390  declval<_SizeConstant<0>&>())),
5391  value_type>* = nullptr)
5392  : _M_data(_Impl::_S_generator(static_cast<_Fp&&>(__gen), _S_type_tag)) {}
5393 
5394  // load constructor
5395  template <typename _Up, typename _Flags>
5396  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5397  simd(const _Up* __mem, _IsSimdFlagType<_Flags>)
5398  : _M_data(
5399  _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag))
5400  {}
5401 
5402  // loads [simd.load]
5403  template <typename _Up, typename _Flags>
5404  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
5405  copy_from(const _Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>)
5406  {
5407  _M_data = static_cast<decltype(_M_data)>(
5408  _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag));
5409  }
5410 
5411  // stores [simd.store]
5412  template <typename _Up, typename _Flags>
5413  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
5414  copy_to(_Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>) const
5415  {
5416  _Impl::_S_store(_M_data, _Flags::template _S_apply<simd>(__mem),
5417  _S_type_tag);
5418  }
5419 
5420  // scalar access
5421  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
5422  operator[](size_t __i)
5423  { return {_M_data, int(__i)}; }
5424 
5425  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
5426  operator[]([[maybe_unused]] size_t __i) const
5427  {
5428  if constexpr (__is_scalar_abi<_Abi>())
5429  {
5430  _GLIBCXX_DEBUG_ASSERT(__i == 0);
5431  return _M_data;
5432  }
5433  else
5434  return _M_data[__i];
5435  }
5436 
5437  // increment and decrement:
5438  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5439  operator++()
5440  {
5441  _Impl::_S_increment(_M_data);
5442  return *this;
5443  }
5444 
5445  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5446  operator++(int)
5447  {
5448  simd __r = *this;
5449  _Impl::_S_increment(_M_data);
5450  return __r;
5451  }
5452 
5453  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5454  operator--()
5455  {
5456  _Impl::_S_decrement(_M_data);
5457  return *this;
5458  }
5459 
5460  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5461  operator--(int)
5462  {
5463  simd __r = *this;
5464  _Impl::_S_decrement(_M_data);
5465  return __r;
5466  }
5467 
5468  // unary operators (for any _Tp)
5469  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR mask_type
5470  operator!() const
5471  { return {__private_init, _Impl::_S_negate(_M_data)}; }
5472 
5473  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5474  operator+() const
5475  { return *this; }
5476 
5477  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5478  operator-() const
5479  { return {__private_init, _Impl::_S_unary_minus(_M_data)}; }
5480 
5481  // access to internal representation (suggested extension)
5482  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5483  simd(_CastType __init) : _M_data(__init) {}
5484 
5485  // compound assignment [simd.cassign]
5486  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5487  operator+=(simd& __lhs, const simd& __x)
5488  { return __lhs = __lhs + __x; }
5489 
5490  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5491  operator-=(simd& __lhs, const simd& __x)
5492  { return __lhs = __lhs - __x; }
5493 
5494  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5495  operator*=(simd& __lhs, const simd& __x)
5496  { return __lhs = __lhs * __x; }
5497 
5498  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5499  operator/=(simd& __lhs, const simd& __x)
5500  { return __lhs = __lhs / __x; }
5501 
5502  // binary operators [simd.binary]
5503  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5504  operator+(const simd& __x, const simd& __y)
5505  { return {__private_init, _Impl::_S_plus(__x._M_data, __y._M_data)}; }
5506 
5507  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5508  operator-(const simd& __x, const simd& __y)
5509  { return {__private_init, _Impl::_S_minus(__x._M_data, __y._M_data)}; }
5510 
5511  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5512  operator*(const simd& __x, const simd& __y)
5513  { return {__private_init, _Impl::_S_multiplies(__x._M_data, __y._M_data)}; }
5514 
5515  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5516  operator/(const simd& __x, const simd& __y)
5517  { return {__private_init, _Impl::_S_divides(__x._M_data, __y._M_data)}; }
5518 
5519  // compares [simd.comparison]
5520  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5521  operator==(const simd& __x, const simd& __y)
5522  { return simd::_S_make_mask(_Impl::_S_equal_to(__x._M_data, __y._M_data)); }
5523 
5524  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5525  operator!=(const simd& __x, const simd& __y)
5526  {
5527  return simd::_S_make_mask(
5528  _Impl::_S_not_equal_to(__x._M_data, __y._M_data));
5529  }
5530 
5531  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5532  operator<(const simd& __x, const simd& __y)
5533  { return simd::_S_make_mask(_Impl::_S_less(__x._M_data, __y._M_data)); }
5534 
5535  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5536  operator<=(const simd& __x, const simd& __y)
5537  {
5538  return simd::_S_make_mask(_Impl::_S_less_equal(__x._M_data, __y._M_data));
5539  }
5540 
5541  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5542  operator>(const simd& __x, const simd& __y)
5543  { return simd::_S_make_mask(_Impl::_S_less(__y._M_data, __x._M_data)); }
5544 
5545  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5546  operator>=(const simd& __x, const simd& __y)
5547  {
5548  return simd::_S_make_mask(_Impl::_S_less_equal(__y._M_data, __x._M_data));
5549  }
5550 
5551  // operator?: overloads (suggested extension) {{{
5552 #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
5553  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5554  operator?:(const mask_type& __k, const simd& __where_true,
5555  const simd& __where_false)
5556  {
5557  auto __ret = __where_false;
5558  _Impl::_S_masked_assign(__data(__k), __data(__ret), __data(__where_true));
5559  return __ret;
5560  }
5561 
5562 #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
5563  // }}}
5564 
5565  // "private" because of the first arguments's namespace
5566  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5567  simd(_PrivateInit, const _MemberType& __init)
5568  : _M_data(__init) {}
5569 
5570  // "private" because of the first arguments's namespace
5571  _GLIBCXX_SIMD_INTRINSIC
5572  simd(_BitsetInit, bitset<size()> __init) : _M_data()
5573  { where(mask_type(__bitset_init, __init), *this) = ~*this; }
5574 
5575  _GLIBCXX_SIMD_INTRINSIC constexpr bool
5576  _M_is_constprop() const
5577  {
5578  if constexpr (__is_scalar_abi<_Abi>())
5579  return __builtin_constant_p(_M_data);
5580  else
5581  return _M_data._M_is_constprop();
5582  }
5583 
5584  private:
5585  _GLIBCXX_SIMD_INTRINSIC static constexpr mask_type
5586  _S_make_mask(typename mask_type::_MemberType __k)
5587  { return {__private_init, __k}; }
5588 
5589  friend const auto& __data<value_type, abi_type>(const simd&);
5590  friend auto& __data<value_type, abi_type>(simd&);
5591  alignas(_Traits::_S_simd_align) _MemberType _M_data;
5592  };
5593 
5594 // }}}
5595 /// @cond undocumented
5596 // __data {{{
5597 template <typename _Tp, typename _Ap>
5598  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
5599  __data(const simd<_Tp, _Ap>& __x)
5600  { return __x._M_data; }
5601 
5602 template <typename _Tp, typename _Ap>
5603  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
5604  __data(simd<_Tp, _Ap>& __x)
5605  { return __x._M_data; }
5606 
5607 // }}}
5608 namespace __float_bitwise_operators { //{{{
5609 template <typename _Tp, typename _Ap>
5610  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5611  operator^(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5612  { return {__private_init, _Ap::_SimdImpl::_S_bit_xor(__data(__a), __data(__b))}; }
5613 
5614 template <typename _Tp, typename _Ap>
5615  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5616  operator|(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5617  { return {__private_init, _Ap::_SimdImpl::_S_bit_or(__data(__a), __data(__b))}; }
5618 
5619 template <typename _Tp, typename _Ap>
5620  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5621  operator&(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5622  { return {__private_init, _Ap::_SimdImpl::_S_bit_and(__data(__a), __data(__b))}; }
5623 
5624 template <typename _Tp, typename _Ap>
5625  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5626  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Ap>>
5627  operator~(const simd<_Tp, _Ap>& __a)
5628  { return {__private_init, _Ap::_SimdImpl::_S_complement(__data(__a))}; }
5629 } // namespace __float_bitwise_operators }}}
5630 /// @endcond
5631 
5632 /// @}
5633 _GLIBCXX_SIMD_END_NAMESPACE
5634 
5635 #endif // __cplusplus >= 201703L
5636 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_H
5637 
5638 // vim: foldmethod=marker foldmarker={{{,}}}
std::remove_reference_t
typename remove_reference< _Tp >::type remove_reference_t
Alias template for remove_reference.
Definition: type_traits:1664
std::make_index_sequence
make_integer_sequence< size_t, _Num > make_index_sequence
Alias template make_index_sequence.
Definition: utility.h:188
std::declval
auto declval() noexcept -> decltype(__declval< _Tp >(0))
Definition: type_traits:2387
std::remove_const_t
typename remove_const< _Tp >::type remove_const_t
Alias template for remove_const.
Definition: type_traits:1595
std::conditional_t
typename conditional< _Cond, _Iftrue, _Iffalse >::type conditional_t
Alias template for conditional.
Definition: type_traits:2612
cstring
std::false_type
integral_constant< bool, false > false_type
The type used as a compile-time boolean with false value.
Definition: type_traits:85
std::operator-
constexpr complex< _Tp > operator-(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x minus y.
Definition: complex:362
std::operator*
constexpr complex< _Tp > operator*(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x times y.
Definition: complex:392
iosfwd
bit
std::max
constexpr const _Tp & max(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:254
std::move
constexpr std::remove_reference< _Tp >::type && move(_Tp &&__t) noexcept
Convert a value to an rvalue.
Definition: move.h:104
std::min
constexpr const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:230
std::enable_if_t
typename enable_if< _Cond, _Tp >::type enable_if_t
Alias template for enable_if.
Definition: type_traits:2608
numeric_traits.h
std::operator+
constexpr complex< _Tp > operator+(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x plus y.
Definition: complex:332
std::operator&
bitset< _Nb > operator&(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition: bitset:1438
std::chrono::operator%
constexpr duration< __common_rep_t< _Rep1, __disable_if_is_duration< _Rep2 > >, _Period > operator%(const duration< _Rep1, _Period > &__d, const _Rep2 &__s)
Definition: chrono.h:757
std::make_unsigned_t
typename make_unsigned< _Tp >::type make_unsigned_t
Alias template for make_unsigned.
Definition: type_traits:2003
std::reduce
constexpr _Tp reduce(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOperation __binary_op)
Calculate reduction of values in a range.
Definition: numeric:287
std::popcount
constexpr _If_is_unsigned_integer< _Tp, int > popcount(_Tp __x) noexcept
The number of bits set in x.
Definition: bit:426
std::remove_pointer_t
typename remove_pointer< _Tp >::type remove_pointer_t
Alias template for remove_pointer.
Definition: type_traits:2078
std::make_tuple
constexpr tuple< typename __decay_and_strip< _Elements >::__type... > make_tuple(_Elements &&... __args)
Create a tuple containing copies of the arguments.
Definition: tuple:1592
std::void_t
void void_t
A metafunction that always yields void, used for detecting valid types.
Definition: type_traits:2630
bitset
std::true_type
integral_constant< bool, true > true_type
The type used as a compile-time boolean with true value.
Definition: type_traits:82
std::operator>>
std::basic_istream< _CharT, _Traits > & operator>>(std::basic_istream< _CharT, _Traits > &__is, bitset< _Nb > &__x)
Global I/O operators for bitsets.
Definition: bitset:1475
std::operator/
constexpr complex< _Tp > operator/(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x divided by y.
Definition: complex:422
functional
std::tuple_cat
constexpr auto tuple_cat(_Tpls &&... __tpls) -> typename __tuple_cat_result< _Tpls... >::__type
Create a tuple containing all elements from multiple tuple-like objects.
Definition: tuple:1746
simd_abi::deduce
Definition: simd.h:141
std::operator|
bitset< _Nb > operator|(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition: bitset:1447
utility
cmath
std::operator^
bitset< _Nb > operator^(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition: bitset:1456
std::operator<<
std::basic_ostream< _CharT, _Traits > & operator<<(std::basic_ostream< _CharT, _Traits > &__os, const bitset< _Nb > &__x)
Global I/O operators for bitsets.
Definition: bitset:1543