Grok 10.0.5
scalar-inl.h
Go to the documentation of this file.
1// Copyright 2019 Google LLC
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16// Single-element vectors and operations.
17// External include guard in highway.h - see comment there.
18
19#include <stddef.h>
20#include <stdint.h>
21
22#include "hwy/base.h"
23#include "hwy/ops/shared-inl.h"
24
26namespace hwy {
27namespace HWY_NAMESPACE {
28
29// Single instruction, single data.
30template <typename T>
32
33// (Wrapper class required for overloading comparison operators.)
34template <typename T>
35struct Vec1 {
36 using PrivateT = T; // only for DFromV
37 static constexpr size_t kPrivateN = 1; // only for DFromV
38
39 HWY_INLINE Vec1() = default;
40 Vec1(const Vec1&) = default;
41 Vec1& operator=(const Vec1&) = default;
42 HWY_INLINE explicit Vec1(const T t) : raw(t) {}
43
45 return *this = (*this * other);
46 }
48 return *this = (*this / other);
49 }
51 return *this = (*this + other);
52 }
54 return *this = (*this - other);
55 }
57 return *this = (*this & other);
58 }
60 return *this = (*this | other);
61 }
63 return *this = (*this ^ other);
64 }
65
66 T raw;
67};
68
69// 0 or FF..FF, same size as Vec1.
70template <typename T>
71class Mask1 {
73
74 public:
75 static HWY_INLINE Mask1<T> FromBool(bool b) {
76 Mask1<T> mask;
77 mask.bits = b ? static_cast<Raw>(~Raw{0}) : 0;
78 return mask;
79 }
80
82};
83
84template <class V>
86
87template <class V>
88using TFromV = typename V::PrivateT;
89
90// ------------------------------ BitCast
91
92template <typename T, typename FromT>
94 static_assert(sizeof(T) <= sizeof(FromT), "Promoting is undefined");
95 T to;
96 CopyBytes<sizeof(FromT)>(&v.raw, &to); // not same size - ok to shrink
97 return Vec1<T>(to);
98}
99
100// ------------------------------ Set
101
102template <typename T>
104 return Vec1<T>(T(0));
105}
106
107template <typename T, typename T2>
108HWY_API Vec1<T> Set(Sisd<T> /* tag */, const T2 t) {
109 return Vec1<T>(static_cast<T>(t));
110}
111
112template <typename T>
114 return Zero(d);
115}
116
117template <typename T, typename T2>
118HWY_API Vec1<T> Iota(const Sisd<T> /* tag */, const T2 first) {
119 return Vec1<T>(static_cast<T>(first));
120}
121
122template <class D>
123using VFromD = decltype(Zero(D()));
124
125// ================================================== LOGICAL
126
127// ------------------------------ Not
128
129template <typename T>
131 using TU = MakeUnsigned<T>;
132 const Sisd<TU> du;
133 return BitCast(Sisd<T>(), Vec1<TU>(static_cast<TU>(~BitCast(du, v).raw)));
134}
135
136// ------------------------------ And
137
138template <typename T>
140 using TU = MakeUnsigned<T>;
141 const Sisd<TU> du;
142 return BitCast(Sisd<T>(), Vec1<TU>(BitCast(du, a).raw & BitCast(du, b).raw));
143}
144template <typename T>
146 return And(a, b);
147}
148
149// ------------------------------ AndNot
150
151template <typename T>
153 using TU = MakeUnsigned<T>;
154 const Sisd<TU> du;
155 return BitCast(Sisd<T>(), Vec1<TU>(static_cast<TU>(~BitCast(du, a).raw &
156 BitCast(du, b).raw)));
157}
158
159// ------------------------------ Or
160
161template <typename T>
163 using TU = MakeUnsigned<T>;
164 const Sisd<TU> du;
165 return BitCast(Sisd<T>(), Vec1<TU>(BitCast(du, a).raw | BitCast(du, b).raw));
166}
167template <typename T>
169 return Or(a, b);
170}
171
172// ------------------------------ Xor
173
174template <typename T>
176 using TU = MakeUnsigned<T>;
177 const Sisd<TU> du;
178 return BitCast(Sisd<T>(), Vec1<TU>(BitCast(du, a).raw ^ BitCast(du, b).raw));
179}
180template <typename T>
182 return Xor(a, b);
183}
184
185// ------------------------------ Xor3
186
187template <typename T>
189 return Xor(x1, Xor(x2, x3));
190}
191
192// ------------------------------ Or3
193
194template <typename T>
196 return Or(o1, Or(o2, o3));
197}
198
199// ------------------------------ OrAnd
200
201template <typename T>
202HWY_API Vec1<T> OrAnd(const Vec1<T> o, const Vec1<T> a1, const Vec1<T> a2) {
203 return Or(o, And(a1, a2));
204}
205
206// ------------------------------ IfVecThenElse
207
208template <typename T>
210 return IfThenElse(MaskFromVec(mask), yes, no);
211}
212
213// ------------------------------ CopySign
214
215template <typename T>
216HWY_API Vec1<T> CopySign(const Vec1<T> magn, const Vec1<T> sign) {
217 static_assert(IsFloat<T>(), "Only makes sense for floating-point");
218 const auto msb = SignBit(Sisd<T>());
219 return Or(AndNot(msb, magn), And(msb, sign));
220}
221
222template <typename T>
224 static_assert(IsFloat<T>(), "Only makes sense for floating-point");
225 return Or(abs, And(SignBit(Sisd<T>()), sign));
226}
227
228// ------------------------------ BroadcastSignBit
229
230template <typename T>
232 // This is used inside ShiftRight, so we cannot implement in terms of it.
233 return v.raw < 0 ? Vec1<T>(T(-1)) : Vec1<T>(0);
234}
235
236// ------------------------------ PopulationCount
237
238#ifdef HWY_NATIVE_POPCNT
239#undef HWY_NATIVE_POPCNT
240#else
241#define HWY_NATIVE_POPCNT
242#endif
243
244template <typename T>
246 return Vec1<T>(static_cast<T>(PopCount(v.raw)));
247}
248
249// ------------------------------ Mask
250
251template <typename TFrom, typename TTo>
253 static_assert(sizeof(TFrom) == sizeof(TTo), "Must have same size");
254 return Mask1<TTo>{m.bits};
255}
256
257// v must be 0 or FF..FF.
258template <typename T>
260 Mask1<T> mask;
261 CopySameSize(&v, &mask);
262 return mask;
263}
264
265template <typename T>
267 Vec1<T> v;
268 CopySameSize(&mask, &v);
269 return v;
270}
271
272template <typename T>
273Vec1<T> VecFromMask(Sisd<T> /* tag */, const Mask1<T> mask) {
274 Vec1<T> v;
275 CopySameSize(&mask, &v);
276 return v;
277}
278
279template <typename T>
280HWY_API Mask1<T> FirstN(Sisd<T> /*tag*/, size_t n) {
281 return Mask1<T>::FromBool(n != 0);
282}
283
284// Returns mask ? yes : no.
285template <typename T>
287 const Vec1<T> no) {
288 return mask.bits ? yes : no;
289}
290
291template <typename T>
293 return mask.bits ? yes : Vec1<T>(0);
294}
295
296template <typename T>
298 return mask.bits ? Vec1<T>(0) : no;
299}
300
301template <typename T>
303 return v.raw < 0 ? yes : no;
304}
305
306template <typename T>
308 return v.raw < 0 ? Vec1<T>(0) : v;
309}
310
311// ------------------------------ Mask logical
312
313template <typename T>
315 return MaskFromVec(Not(VecFromMask(Sisd<T>(), m)));
316}
317
318template <typename T>
320 const Sisd<T> d;
321 return MaskFromVec(And(VecFromMask(d, a), VecFromMask(d, b)));
322}
323
324template <typename T>
326 const Sisd<T> d;
327 return MaskFromVec(AndNot(VecFromMask(d, a), VecFromMask(d, b)));
328}
329
330template <typename T>
332 const Sisd<T> d;
333 return MaskFromVec(Or(VecFromMask(d, a), VecFromMask(d, b)));
334}
335
336template <typename T>
338 const Sisd<T> d;
339 return MaskFromVec(Xor(VecFromMask(d, a), VecFromMask(d, b)));
340}
341
342template <typename T>
347
348// ================================================== SHIFTS
349
350// ------------------------------ ShiftLeft/ShiftRight (BroadcastSignBit)
351
352template <int kBits, typename T>
354 static_assert(0 <= kBits && kBits < sizeof(T) * 8, "Invalid shift");
355 return Vec1<T>(
356 static_cast<T>(static_cast<hwy::MakeUnsigned<T>>(v.raw) << kBits));
357}
358
359template <int kBits, typename T>
361 static_assert(0 <= kBits && kBits < sizeof(T) * 8, "Invalid shift");
362#if __cplusplus >= 202002L
363 // Signed right shift is now guaranteed to be arithmetic (rounding toward
364 // negative infinity, i.e. shifting in the sign bit).
365 return Vec1<T>(static_cast<T>(v.raw >> kBits));
366#else
367 if (IsSigned<T>()) {
368 // Emulate arithmetic shift using only logical (unsigned) shifts, because
369 // signed shifts are still implementation-defined.
370 using TU = hwy::MakeUnsigned<T>;
371 const Sisd<TU> du;
372 const TU shifted = static_cast<TU>(BitCast(du, v).raw >> kBits);
373 const TU sign = BitCast(du, BroadcastSignBit(v)).raw;
374 const size_t sign_shift =
375 static_cast<size_t>(static_cast<int>(sizeof(TU)) * 8 - 1 - kBits);
376 const TU upper = static_cast<TU>(sign << sign_shift);
377 return BitCast(Sisd<T>(), Vec1<TU>(shifted | upper));
378 } else { // T is unsigned
379 return Vec1<T>(static_cast<T>(v.raw >> kBits));
380 }
381#endif
382}
383
384// ------------------------------ RotateRight (ShiftRight)
385
386namespace detail {
387
388// For partial specialization: kBits == 0 results in an invalid shift count
389template <int kBits>
391 template <typename T>
393 return Or(ShiftRight<kBits>(v), ShiftLeft<sizeof(T) * 8 - kBits>(v));
394 }
395};
396
397template <>
398struct RotateRight<0> {
399 template <typename T>
401 return v;
402 }
403};
404
405} // namespace detail
406
407template <int kBits, typename T>
409 static_assert(0 <= kBits && kBits < sizeof(T) * 8, "Invalid shift");
411}
412
413// ------------------------------ ShiftLeftSame (BroadcastSignBit)
414
415template <typename T>
417 return Vec1<T>(
418 static_cast<T>(static_cast<hwy::MakeUnsigned<T>>(v.raw) << bits));
419}
420
421template <typename T>
423#if __cplusplus >= 202002L
424 // Signed right shift is now guaranteed to be arithmetic (rounding toward
425 // negative infinity, i.e. shifting in the sign bit).
426 return Vec1<T>(static_cast<T>(v.raw >> bits));
427#else
428 if (IsSigned<T>()) {
429 // Emulate arithmetic shift using only logical (unsigned) shifts, because
430 // signed shifts are still implementation-defined.
431 using TU = hwy::MakeUnsigned<T>;
432 const Sisd<TU> du;
433 const TU shifted = static_cast<TU>(BitCast(du, v).raw >> bits);
434 const TU sign = BitCast(du, BroadcastSignBit(v)).raw;
435 const size_t sign_shift =
436 static_cast<size_t>(static_cast<int>(sizeof(TU)) * 8 - 1 - bits);
437 const TU upper = static_cast<TU>(sign << sign_shift);
438 return BitCast(Sisd<T>(), Vec1<TU>(shifted | upper));
439 } else { // T is unsigned
440 return Vec1<T>(static_cast<T>(v.raw >> bits));
441 }
442#endif
443}
444
445// ------------------------------ Shl
446
447// Single-lane => same as ShiftLeftSame except for the argument type.
448template <typename T>
450 return ShiftLeftSame(v, static_cast<int>(bits.raw));
451}
452
453template <typename T>
455 return ShiftRightSame(v, static_cast<int>(bits.raw));
456}
457
458// ================================================== ARITHMETIC
459
460template <typename T>
462 const uint64_t a64 = static_cast<uint64_t>(a.raw);
463 const uint64_t b64 = static_cast<uint64_t>(b.raw);
464 return Vec1<T>(static_cast<T>((a64 + b64) & static_cast<uint64_t>(~T(0))));
465}
467 return Vec1<float>(a.raw + b.raw);
468}
470 return Vec1<double>(a.raw + b.raw);
471}
472
473template <typename T>
475 const uint64_t a64 = static_cast<uint64_t>(a.raw);
476 const uint64_t b64 = static_cast<uint64_t>(b.raw);
477 return Vec1<T>(static_cast<T>((a64 - b64) & static_cast<uint64_t>(~T(0))));
478}
480 return Vec1<float>(a.raw - b.raw);
481}
483 return Vec1<double>(a.raw - b.raw);
484}
485
486// ------------------------------ SumsOf8
487
491
492// ------------------------------ SaturatedAdd
493
494// Returns a + b clamped to the destination range.
495
496// Unsigned
498 const Vec1<uint8_t> b) {
499 return Vec1<uint8_t>(
500 static_cast<uint8_t>(HWY_MIN(HWY_MAX(0, a.raw + b.raw), 255)));
501}
503 const Vec1<uint16_t> b) {
504 return Vec1<uint16_t>(
505 static_cast<uint16_t>(HWY_MIN(HWY_MAX(0, a.raw + b.raw), 65535)));
506}
507
508// Signed
510 return Vec1<int8_t>(
511 static_cast<int8_t>(HWY_MIN(HWY_MAX(-128, a.raw + b.raw), 127)));
512}
514 const Vec1<int16_t> b) {
515 return Vec1<int16_t>(
516 static_cast<int16_t>(HWY_MIN(HWY_MAX(-32768, a.raw + b.raw), 32767)));
517}
518
519// ------------------------------ Saturating subtraction
520
521// Returns a - b clamped to the destination range.
522
523// Unsigned
525 const Vec1<uint8_t> b) {
526 return Vec1<uint8_t>(
527 static_cast<uint8_t>(HWY_MIN(HWY_MAX(0, a.raw - b.raw), 255)));
528}
530 const Vec1<uint16_t> b) {
531 return Vec1<uint16_t>(
532 static_cast<uint16_t>(HWY_MIN(HWY_MAX(0, a.raw - b.raw), 65535)));
533}
534
535// Signed
537 return Vec1<int8_t>(
538 static_cast<int8_t>(HWY_MIN(HWY_MAX(-128, a.raw - b.raw), 127)));
539}
541 const Vec1<int16_t> b) {
542 return Vec1<int16_t>(
543 static_cast<int16_t>(HWY_MIN(HWY_MAX(-32768, a.raw - b.raw), 32767)));
544}
545
546// ------------------------------ Average
547
548// Returns (a + b + 1) / 2
549
551 const Vec1<uint8_t> b) {
552 return Vec1<uint8_t>(static_cast<uint8_t>((a.raw + b.raw + 1) / 2));
553}
555 const Vec1<uint16_t> b) {
556 return Vec1<uint16_t>(static_cast<uint16_t>((a.raw + b.raw + 1) / 2));
557}
558
559// ------------------------------ Absolute value
560
561template <typename T>
563 const T i = a.raw;
564 return (i >= 0 || i == hwy::LimitsMin<T>()) ? a : Vec1<T>(static_cast<T>(-i));
565}
567 int32_t i;
568 CopyBytes<sizeof(i)>(&a.raw, &i);
569 i &= 0x7FFFFFFF;
570 CopyBytes<sizeof(i)>(&i, &a.raw);
571 return a;
572}
574 int64_t i;
575 CopyBytes<sizeof(i)>(&a.raw, &i);
576 i &= 0x7FFFFFFFFFFFFFFFL;
577 CopyBytes<sizeof(i)>(&i, &a.raw);
578 return a;
579}
580
581// ------------------------------ Min/Max
582
583// <cmath> may be unavailable, so implement our own.
584namespace detail {
585
586static inline float Abs(float f) {
587 uint32_t i;
588 CopyBytes<4>(&f, &i);
589 i &= 0x7FFFFFFFu;
590 CopyBytes<4>(&i, &f);
591 return f;
592}
593static inline double Abs(double f) {
594 uint64_t i;
595 CopyBytes<8>(&f, &i);
596 i &= 0x7FFFFFFFFFFFFFFFull;
597 CopyBytes<8>(&i, &f);
598 return f;
599}
600
601static inline bool SignBit(float f) {
602 uint32_t i;
603 CopyBytes<4>(&f, &i);
604 return (i >> 31) != 0;
605}
606static inline bool SignBit(double f) {
607 uint64_t i;
608 CopyBytes<8>(&f, &i);
609 return (i >> 63) != 0;
610}
611
612} // namespace detail
613
614template <typename T, HWY_IF_NOT_FLOAT(T)>
616 return Vec1<T>(HWY_MIN(a.raw, b.raw));
617}
618
619template <typename T, HWY_IF_FLOAT(T)>
620HWY_API Vec1<T> Min(const Vec1<T> a, const Vec1<T> b) {
621 if (isnan(a.raw)) return b;
622 if (isnan(b.raw)) return a;
623 return Vec1<T>(HWY_MIN(a.raw, b.raw));
624}
625
626template <typename T, HWY_IF_NOT_FLOAT(T)>
628 return Vec1<T>(HWY_MAX(a.raw, b.raw));
629}
630
631template <typename T, HWY_IF_FLOAT(T)>
632HWY_API Vec1<T> Max(const Vec1<T> a, const Vec1<T> b) {
633 if (isnan(a.raw)) return b;
634 if (isnan(b.raw)) return a;
635 return Vec1<T>(HWY_MAX(a.raw, b.raw));
636}
637
638// ------------------------------ Floating-point negate
639
640template <typename T, HWY_IF_FLOAT(T)>
642 return Xor(v, SignBit(Sisd<T>()));
643}
644
645template <typename T, HWY_IF_NOT_FLOAT(T)>
646HWY_API Vec1<T> Neg(const Vec1<T> v) {
647 return Zero(Sisd<T>()) - v;
648}
649
650// ------------------------------ mul/div
651
652template <typename T, HWY_IF_FLOAT(T)>
654 return Vec1<T>(static_cast<T>(double{a.raw} * b.raw));
655}
656
657template <typename T, HWY_IF_SIGNED(T)>
658HWY_API Vec1<T> operator*(const Vec1<T> a, const Vec1<T> b) {
659 return Vec1<T>(static_cast<T>(static_cast<uint64_t>(a.raw) *
660 static_cast<uint64_t>(b.raw)));
661}
662
663template <typename T, HWY_IF_UNSIGNED(T)>
664HWY_API Vec1<T> operator*(const Vec1<T> a, const Vec1<T> b) {
665 return Vec1<T>(static_cast<T>(static_cast<uint64_t>(a.raw) *
666 static_cast<uint64_t>(b.raw)));
667}
668
669template <typename T>
671 return Vec1<T>(a.raw / b.raw);
672}
673
674// Returns the upper 16 bits of a * b in each lane.
676 return Vec1<int16_t>(static_cast<int16_t>((a.raw * b.raw) >> 16));
677}
679 // Cast to uint32_t first to prevent overflow. Otherwise the result of
680 // uint16_t * uint16_t is in "int" which may overflow. In practice the result
681 // is the same but this way it is also defined.
682 return Vec1<uint16_t>(static_cast<uint16_t>(
683 (static_cast<uint32_t>(a.raw) * static_cast<uint32_t>(b.raw)) >> 16));
684}
685
687 return Vec1<int16_t>(static_cast<int16_t>((2 * a.raw * b.raw + 32768) >> 16));
688}
689
690// Multiplies even lanes (0, 2 ..) and returns the double-wide result.
692 const int64_t a64 = a.raw;
693 return Vec1<int64_t>(a64 * b.raw);
694}
696 const uint64_t a64 = a.raw;
697 return Vec1<uint64_t>(a64 * b.raw);
698}
699
700// Approximate reciprocal
702 // Zero inputs are allowed, but callers are responsible for replacing the
703 // return value with something else (typically using IfThenElse). This check
704 // avoids a ubsan error. The return value is arbitrary.
705 if (v.raw == 0.0f) return Vec1<float>(0.0f);
706 return Vec1<float>(1.0f / v.raw);
707}
708
709// Absolute value of difference.
711 return Abs(a - b);
712}
713
714// ------------------------------ Floating-point multiply-add variants
715
716template <typename T>
717HWY_API Vec1<T> MulAdd(const Vec1<T> mul, const Vec1<T> x, const Vec1<T> add) {
718 return mul * x + add;
719}
720
721template <typename T>
723 const Vec1<T> add) {
724 return add - mul * x;
725}
726
727template <typename T>
728HWY_API Vec1<T> MulSub(const Vec1<T> mul, const Vec1<T> x, const Vec1<T> sub) {
729 return mul * x - sub;
730}
731
732template <typename T>
734 const Vec1<T> sub) {
735 return Neg(mul) * x - sub;
736}
737
738// ------------------------------ Floating-point square root
739
740// Approximate reciprocal square root
742 float f = v.raw;
743 const float half = f * 0.5f;
744 uint32_t bits;
745 CopySameSize(&f, &bits);
746 // Initial guess based on log2(f)
747 bits = 0x5F3759DF - (bits >> 1);
748 CopySameSize(&bits, &f);
749 // One Newton-Raphson iteration
750 return Vec1<float>(f * (1.5f - (half * f * f)));
751}
752
753// Square root
755#if HWY_COMPILER_GCC && defined(HWY_NO_LIBCXX)
756 return Vec1<float>(__builtin_sqrt(v.raw));
757#else
758 return Vec1<float>(sqrtf(v.raw));
759#endif
760}
762#if HWY_COMPILER_GCC && defined(HWY_NO_LIBCXX)
763 return Vec1<float>(__builtin_sqrt(v.raw));
764#else
765 return Vec1<double>(sqrt(v.raw));
766#endif
767}
768
769// ------------------------------ Floating-point rounding
770
771template <typename T>
773 using TI = MakeSigned<T>;
774 if (!(Abs(v).raw < MantissaEnd<T>())) { // Huge or NaN
775 return v;
776 }
777 const T bias = v.raw < T(0.0) ? T(-0.5) : T(0.5);
778 const TI rounded = static_cast<TI>(v.raw + bias);
779 if (rounded == 0) return CopySignToAbs(Vec1<T>(0), v);
780 // Round to even
781 if ((rounded & 1) && detail::Abs(static_cast<T>(rounded) - v.raw) == T(0.5)) {
782 return Vec1<T>(static_cast<T>(rounded - (v.raw < T(0) ? -1 : 1)));
783 }
784 return Vec1<T>(static_cast<T>(rounded));
785}
786
787// Round-to-nearest even.
789 using T = float;
790 using TI = int32_t;
791
792 const T abs = Abs(v).raw;
793 const bool is_sign = detail::SignBit(v.raw);
794
795 if (!(abs < MantissaEnd<T>())) { // Huge or NaN
796 // Check if too large to cast or NaN
797 if (!(abs <= static_cast<T>(LimitsMax<TI>()))) {
798 return Vec1<TI>(is_sign ? LimitsMin<TI>() : LimitsMax<TI>());
799 }
800 return Vec1<int32_t>(static_cast<TI>(v.raw));
801 }
802 const T bias = v.raw < T(0.0) ? T(-0.5) : T(0.5);
803 const TI rounded = static_cast<TI>(v.raw + bias);
804 if (rounded == 0) return Vec1<int32_t>(0);
805 // Round to even
806 if ((rounded & 1) && detail::Abs(static_cast<T>(rounded) - v.raw) == T(0.5)) {
807 return Vec1<TI>(rounded - (is_sign ? -1 : 1));
808 }
809 return Vec1<TI>(rounded);
810}
811
812template <typename T>
814 using TI = MakeSigned<T>;
815 if (!(Abs(v).raw <= MantissaEnd<T>())) { // Huge or NaN
816 return v;
817 }
818 const TI truncated = static_cast<TI>(v.raw);
819 if (truncated == 0) return CopySignToAbs(Vec1<T>(0), v);
820 return Vec1<T>(static_cast<T>(truncated));
821}
822
823template <typename Float, typename Bits, int kMantissaBits, int kExponentBits,
824 class V>
825V Ceiling(const V v) {
826 const Bits kExponentMask = (1ull << kExponentBits) - 1;
827 const Bits kMantissaMask = (1ull << kMantissaBits) - 1;
828 const Bits kBias = kExponentMask / 2;
829
830 Float f = v.raw;
831 const bool positive = f > Float(0.0);
832
833 Bits bits;
834 CopySameSize(&v, &bits);
835
836 const int exponent =
837 static_cast<int>(((bits >> kMantissaBits) & kExponentMask) - kBias);
838 // Already an integer.
839 if (exponent >= kMantissaBits) return v;
840 // |v| <= 1 => 0 or 1.
841 if (exponent < 0) return positive ? V(1) : V(-0.0);
842
843 const Bits mantissa_mask = kMantissaMask >> exponent;
844 // Already an integer
845 if ((bits & mantissa_mask) == 0) return v;
846
847 // Clear fractional bits and round up
848 if (positive) bits += (kMantissaMask + 1) >> exponent;
849 bits &= ~mantissa_mask;
850
851 CopySameSize(&bits, &f);
852 return V(f);
853}
854
855template <typename Float, typename Bits, int kMantissaBits, int kExponentBits,
856 class V>
857V Floor(const V v) {
858 const Bits kExponentMask = (1ull << kExponentBits) - 1;
859 const Bits kMantissaMask = (1ull << kMantissaBits) - 1;
860 const Bits kBias = kExponentMask / 2;
861
862 Float f = v.raw;
863 const bool negative = f < Float(0.0);
864
865 Bits bits;
866 CopySameSize(&v, &bits);
867
868 const int exponent =
869 static_cast<int>(((bits >> kMantissaBits) & kExponentMask) - kBias);
870 // Already an integer.
871 if (exponent >= kMantissaBits) return v;
872 // |v| <= 1 => -1 or 0.
873 if (exponent < 0) return V(negative ? Float(-1.0) : Float(0.0));
874
875 const Bits mantissa_mask = kMantissaMask >> exponent;
876 // Already an integer
877 if ((bits & mantissa_mask) == 0) return v;
878
879 // Clear fractional bits and round down
880 if (negative) bits += (kMantissaMask + 1) >> exponent;
881 bits &= ~mantissa_mask;
882
883 CopySameSize(&bits, &f);
884 return V(f);
885}
886
887// Toward +infinity, aka ceiling
894
895// Toward -infinity, aka floor
902
903// ================================================== COMPARE
904
905template <typename T>
907 return Mask1<T>::FromBool(a.raw == b.raw);
908}
909
910template <typename T>
912 return Mask1<T>::FromBool(a.raw != b.raw);
913}
914
915template <typename T>
917 static_assert(!hwy::IsFloat<T>(), "Only integer vectors supported");
918 return (v & bit) == bit;
919}
920
921template <typename T>
923 return Mask1<T>::FromBool(a.raw < b.raw);
924}
925template <typename T>
927 return Mask1<T>::FromBool(a.raw > b.raw);
928}
929
930template <typename T>
932 return Mask1<T>::FromBool(a.raw <= b.raw);
933}
934template <typename T>
936 return Mask1<T>::FromBool(a.raw >= b.raw);
937}
938
939// ------------------------------ Floating-point classification (==)
940
941template <typename T>
943 // std::isnan returns false for 0x7F..FF in clang AVX3 builds, so DIY.
945 CopySameSize(&v, &bits);
946 bits += bits;
947 bits >>= 1; // clear sign bit
948 // NaN if all exponent bits are set and the mantissa is not zero.
950}
951
953 const Sisd<float> d;
954 const RebindToUnsigned<decltype(d)> du;
955 const Vec1<uint32_t> vu = BitCast(du, v);
956 // 'Shift left' to clear the sign bit, check for exponent=max and mantissa=0.
957 return RebindMask(d, (vu + vu) == Set(du, 0xFF000000u));
958}
960 const Sisd<double> d;
961 const RebindToUnsigned<decltype(d)> du;
962 const Vec1<uint64_t> vu = BitCast(du, v);
963 // 'Shift left' to clear the sign bit, check for exponent=max and mantissa=0.
964 return RebindMask(d, (vu + vu) == Set(du, 0xFFE0000000000000ull));
965}
966
968 const Vec1<uint32_t> vu = BitCast(Sisd<uint32_t>(), v);
969 // Shift left to clear the sign bit, check whether exponent != max value.
970 return Mask1<float>::FromBool((vu.raw << 1) < 0xFF000000u);
971}
973 const Vec1<uint64_t> vu = BitCast(Sisd<uint64_t>(), v);
974 // Shift left to clear the sign bit, check whether exponent != max value.
975 return Mask1<double>::FromBool((vu.raw << 1) < 0xFFE0000000000000ull);
976}
977
978// ================================================== MEMORY
979
980// ------------------------------ Load
981
982template <typename T>
983HWY_API Vec1<T> Load(Sisd<T> /* tag */, const T* HWY_RESTRICT aligned) {
984 T t;
985 CopySameSize(aligned, &t);
986 return Vec1<T>(t);
987}
988
989template <typename T>
991 const T* HWY_RESTRICT aligned) {
992 return IfThenElseZero(m, Load(d, aligned));
993}
994
995template <typename T>
997 return Load(d, p);
998}
999
1000// In some use cases, "load single lane" is sufficient; otherwise avoid this.
1001template <typename T>
1003 return Load(d, aligned);
1004}
1005
1006// ------------------------------ Store
1007
1008template <typename T>
1009HWY_API void Store(const Vec1<T> v, Sisd<T> /* tag */,
1010 T* HWY_RESTRICT aligned) {
1011 CopySameSize(&v.raw, aligned);
1012}
1013
1014template <typename T>
1016 return Store(v, d, p);
1017}
1018
1019template <typename T>
1021 T* HWY_RESTRICT p) {
1022 if (!m.bits) return;
1023 StoreU(v, d, p);
1024}
1025
1026// ------------------------------ LoadInterleaved2/3/4
1027
1028// Per-target flag to prevent generic_ops-inl.h from defining StoreInterleaved2.
1029#ifdef HWY_NATIVE_LOAD_STORE_INTERLEAVED
1030#undef HWY_NATIVE_LOAD_STORE_INTERLEAVED
1031#else
1032#define HWY_NATIVE_LOAD_STORE_INTERLEAVED
1033#endif
1034
1035template <typename T>
1037 Vec1<T>& v0, Vec1<T>& v1) {
1038 v0 = LoadU(d, unaligned + 0);
1039 v1 = LoadU(d, unaligned + 1);
1040}
1041
1042template <typename T>
1044 Vec1<T>& v0, Vec1<T>& v1, Vec1<T>& v2) {
1045 v0 = LoadU(d, unaligned + 0);
1046 v1 = LoadU(d, unaligned + 1);
1047 v2 = LoadU(d, unaligned + 2);
1048}
1049
1050template <typename T>
1052 Vec1<T>& v0, Vec1<T>& v1, Vec1<T>& v2,
1053 Vec1<T>& v3) {
1054 v0 = LoadU(d, unaligned + 0);
1055 v1 = LoadU(d, unaligned + 1);
1056 v2 = LoadU(d, unaligned + 2);
1057 v3 = LoadU(d, unaligned + 3);
1058}
1059
1060// ------------------------------ StoreInterleaved2/3/4
1061
1062template <typename T>
1064 T* HWY_RESTRICT unaligned) {
1065 StoreU(v0, d, unaligned + 0);
1066 StoreU(v1, d, unaligned + 1);
1067}
1068
1069template <typename T>
1071 const Vec1<T> v2, Sisd<T> d,
1072 T* HWY_RESTRICT unaligned) {
1073 StoreU(v0, d, unaligned + 0);
1074 StoreU(v1, d, unaligned + 1);
1075 StoreU(v2, d, unaligned + 2);
1076}
1077
1078template <typename T>
1080 const Vec1<T> v2, const Vec1<T> v3, Sisd<T> d,
1081 T* HWY_RESTRICT unaligned) {
1082 StoreU(v0, d, unaligned + 0);
1083 StoreU(v1, d, unaligned + 1);
1084 StoreU(v2, d, unaligned + 2);
1085 StoreU(v3, d, unaligned + 3);
1086}
1087
1088// ------------------------------ Stream
1089
1090template <typename T>
1091HWY_API void Stream(const Vec1<T> v, Sisd<T> d, T* HWY_RESTRICT aligned) {
1092 return Store(v, d, aligned);
1093}
1094
1095// ------------------------------ Scatter
1096
1097template <typename T, typename Offset>
1099 const Vec1<Offset> offset) {
1100 static_assert(sizeof(T) == sizeof(Offset), "Must match for portability");
1101 uint8_t* const base8 = reinterpret_cast<uint8_t*>(base) + offset.raw;
1102 return Store(v, d, reinterpret_cast<T*>(base8));
1103}
1104
1105template <typename T, typename Index>
1107 const Vec1<Index> index) {
1108 static_assert(sizeof(T) == sizeof(Index), "Must match for portability");
1109 return Store(v, d, base + index.raw);
1110}
1111
1112// ------------------------------ Gather
1113
1114template <typename T, typename Offset>
1116 const Vec1<Offset> offset) {
1117 static_assert(sizeof(T) == sizeof(Offset), "Must match for portability");
1118 const intptr_t addr =
1119 reinterpret_cast<intptr_t>(base) + static_cast<intptr_t>(offset.raw);
1120 return Load(d, reinterpret_cast<const T*>(addr));
1121}
1122
1123template <typename T, typename Index>
1125 const Vec1<Index> index) {
1126 static_assert(sizeof(T) == sizeof(Index), "Must match for portability");
1127 return Load(d, base + index.raw);
1128}
1129
1130// ================================================== CONVERT
1131
1132// ConvertTo and DemoteTo with floating-point input and integer output truncate
1133// (rounding toward zero).
1134
1135template <typename FromT, typename ToT>
1137 static_assert(sizeof(ToT) > sizeof(FromT), "Not promoting");
1138 // For bits Y > X, floatX->floatY and intX->intY are always representable.
1139 return Vec1<ToT>(static_cast<ToT>(from.raw));
1140}
1141
1142// MSVC 19.10 cannot deduce the argument type if HWY_IF_FLOAT(FromT) is here,
1143// so we overload for FromT=double and ToT={float,int32_t}.
1145 // Prevent ubsan errors when converting float to narrower integer/float
1146 if (IsInf(from).bits ||
1147 Abs(from).raw > static_cast<double>(HighestValue<float>())) {
1150 }
1151 return Vec1<float>(static_cast<float>(from.raw));
1152}
1154 // Prevent ubsan errors when converting int32_t to narrower integer/int32_t
1155 if (IsInf(from).bits ||
1156 Abs(from).raw > static_cast<double>(HighestValue<int32_t>())) {
1159 }
1160 return Vec1<int32_t>(static_cast<int32_t>(from.raw));
1161}
1162
1163template <typename FromT, typename ToT>
1165 static_assert(!IsFloat<FromT>(), "FromT=double are handled above");
1166 static_assert(sizeof(ToT) < sizeof(FromT), "Not demoting");
1167
1168 // Int to int: choose closest value in ToT to `from` (avoids UB)
1169 from.raw = HWY_MIN(HWY_MAX(LimitsMin<ToT>(), from.raw), LimitsMax<ToT>());
1170 return Vec1<ToT>(static_cast<ToT>(from.raw));
1171}
1172
1174 uint16_t bits16;
1175 CopySameSize(&v.raw, &bits16);
1176 const uint32_t sign = static_cast<uint32_t>(bits16 >> 15);
1177 const uint32_t biased_exp = (bits16 >> 10) & 0x1F;
1178 const uint32_t mantissa = bits16 & 0x3FF;
1179
1180 // Subnormal or zero
1181 if (biased_exp == 0) {
1182 const float subnormal =
1183 (1.0f / 16384) * (static_cast<float>(mantissa) * (1.0f / 1024));
1184 return Vec1<float>(sign ? -subnormal : subnormal);
1185 }
1186
1187 // Normalized: convert the representation directly (faster than ldexp/tables).
1188 const uint32_t biased_exp32 = biased_exp + (127 - 15);
1189 const uint32_t mantissa32 = mantissa << (23 - 10);
1190 const uint32_t bits32 = (sign << 31) | (biased_exp32 << 23) | mantissa32;
1191 float out;
1192 CopySameSize(&bits32, &out);
1193 return Vec1<float>(out);
1194}
1195
1199
1201 const Vec1<float> v) {
1202 uint32_t bits32;
1203 CopySameSize(&v.raw, &bits32);
1204 const uint32_t sign = bits32 >> 31;
1205 const uint32_t biased_exp32 = (bits32 >> 23) & 0xFF;
1206 const uint32_t mantissa32 = bits32 & 0x7FFFFF;
1207
1208 const int32_t exp = HWY_MIN(static_cast<int32_t>(biased_exp32) - 127, 15);
1209
1210 // Tiny or zero => zero.
1211 Vec1<float16_t> out;
1212 if (exp < -24) {
1213 const uint16_t zero = 0;
1214 CopySameSize(&zero, &out.raw);
1215 return out;
1216 }
1217
1218 uint32_t biased_exp16, mantissa16;
1219
1220 // exp = [-24, -15] => subnormal
1221 if (exp < -14) {
1222 biased_exp16 = 0;
1223 const uint32_t sub_exp = static_cast<uint32_t>(-14 - exp);
1224 HWY_DASSERT(1 <= sub_exp && sub_exp < 11);
1225 mantissa16 = static_cast<uint32_t>((1u << (10 - sub_exp)) +
1226 (mantissa32 >> (13 + sub_exp)));
1227 } else {
1228 // exp = [-14, 15]
1229 biased_exp16 = static_cast<uint32_t>(exp + 15);
1230 HWY_DASSERT(1 <= biased_exp16 && biased_exp16 < 31);
1231 mantissa16 = mantissa32 >> 13;
1232 }
1233
1234 HWY_DASSERT(mantissa16 < 1024);
1235 const uint32_t bits16 = (sign << 15) | (biased_exp16 << 10) | mantissa16;
1236 HWY_DASSERT(bits16 < 0x10000);
1237 const uint16_t narrowed = static_cast<uint16_t>(bits16); // big-endian safe
1238 CopySameSize(&narrowed, &out.raw);
1239 return out;
1240}
1241
1245
1246template <typename FromT, typename ToT, HWY_IF_FLOAT(FromT)>
1248 static_assert(sizeof(ToT) == sizeof(FromT), "Should have same size");
1249 // float## -> int##: return closest representable value. We cannot exactly
1250 // represent LimitsMax<ToT> in FromT, so use double.
1251 const double f = static_cast<double>(from.raw);
1252 if (IsInf(from).bits ||
1253 Abs(Vec1<double>(f)).raw > static_cast<double>(LimitsMax<ToT>())) {
1255 : LimitsMax<ToT>());
1256 }
1257 return Vec1<ToT>(static_cast<ToT>(from.raw));
1258}
1259
1260template <typename FromT, typename ToT, HWY_IF_NOT_FLOAT(FromT)>
1261HWY_API Vec1<ToT> ConvertTo(Sisd<ToT> /* tag */, Vec1<FromT> from) {
1262 static_assert(sizeof(ToT) == sizeof(FromT), "Should have same size");
1263 // int## -> float##: no check needed
1264 return Vec1<ToT>(static_cast<ToT>(from.raw));
1265}
1266
1270
1271// ------------------------------ Truncations
1272
1274 const Vec1<uint64_t> v) {
1275 return Vec1<uint8_t>{static_cast<uint8_t>(v.raw & 0xFF)};
1276}
1277
1279 const Vec1<uint64_t> v) {
1280 return Vec1<uint16_t>{static_cast<uint16_t>(v.raw & 0xFFFF)};
1281}
1282
1284 const Vec1<uint64_t> v) {
1285 return Vec1<uint32_t>{static_cast<uint32_t>(v.raw & 0xFFFFFFFFu)};
1286}
1287
1289 const Vec1<uint32_t> v) {
1290 return Vec1<uint8_t>{static_cast<uint8_t>(v.raw & 0xFF)};
1291}
1292
1294 const Vec1<uint32_t> v) {
1295 return Vec1<uint16_t>{static_cast<uint16_t>(v.raw & 0xFFFF)};
1296}
1297
1299 const Vec1<uint16_t> v) {
1300 return Vec1<uint8_t>{static_cast<uint8_t>(v.raw & 0xFF)};
1301}
1302
1303// ================================================== COMBINE
1304// UpperHalf, ZeroExtendVector, Combine, Concat* are unsupported.
1305
1306template <typename T>
1308 return v;
1309}
1310
1311template <typename T>
1313 return v;
1314}
1315
1316// ================================================== SWIZZLE
1317
1318template <typename T>
1320 return v.raw;
1321}
1322
1323template <typename T>
1324HWY_API T ExtractLane(const Vec1<T> v, size_t i) {
1325 HWY_DASSERT(i == 0);
1326 (void)i;
1327 return v.raw;
1328}
1329
1330template <typename T>
1332 HWY_DASSERT(i == 0);
1333 (void)i;
1334 v.raw = t;
1335 return v;
1336}
1337
1338template <typename T>
1340 return v;
1341}
1342// DupOdd is unsupported.
1343
1344template <typename T>
1346 return even;
1347}
1348
1349template <typename T>
1351 return even;
1352}
1353
1354// ------------------------------ SwapAdjacentBlocks
1355
1356template <typename T>
1360
1361// ------------------------------ TableLookupLanes
1362
1363// Returned by SetTableIndices for use by TableLookupLanes.
1364template <typename T>
1368
1369template <typename T, typename TI>
1371 static_assert(sizeof(T) == sizeof(TI), "Index size must match lane size");
1372 HWY_DASSERT(vec.raw == 0);
1373 return Indices1<T>{vec.raw};
1374}
1375
1376template <typename T, typename TI>
1378 return IndicesFromVec(d, LoadU(Sisd<TI>(), idx));
1379}
1380
1381template <typename T>
1383 return v;
1384}
1385
1386// ------------------------------ ReverseBlocks
1387
1388// Single block: no change
1389template <typename T>
1391 return v;
1392}
1393
1394// ------------------------------ Reverse
1395
1396template <typename T>
1398 return v;
1399}
1400
1401// Must not be called:
1402template <typename T>
1404 return v;
1405}
1406
1407template <typename T>
1409 return v;
1410}
1411
1412template <typename T>
1414 return v;
1415}
1416
1417// ================================================== BLOCKWISE
1418// Shift*Bytes, CombineShiftRightBytes, Interleave*, Shuffle* are unsupported.
1419
1420// ------------------------------ Broadcast/splat any lane
1421
1422template <int kLane, typename T>
1424 static_assert(kLane == 0, "Scalar only has one lane");
1425 return v;
1426}
1427
1428// ------------------------------ TableLookupBytes, TableLookupBytesOr0
1429
1430template <typename T, typename TI>
1432 uint8_t in_bytes[sizeof(T)];
1433 uint8_t idx_bytes[sizeof(T)];
1434 uint8_t out_bytes[sizeof(T)];
1435 CopyBytes<sizeof(T)>(&in, &in_bytes); // copy to bytes
1436 CopyBytes<sizeof(T)>(&indices, &idx_bytes);
1437 for (size_t i = 0; i < sizeof(T); ++i) {
1438 out_bytes[i] = in_bytes[idx_bytes[i]];
1439 }
1440 TI out;
1441 CopyBytes<sizeof(TI)>(&out_bytes, &out);
1442 return Vec1<TI>{out};
1443}
1444
1445template <typename T, typename TI>
1447 uint8_t in_bytes[sizeof(T)];
1448 uint8_t idx_bytes[sizeof(T)];
1449 uint8_t out_bytes[sizeof(T)];
1450 CopyBytes<sizeof(T)>(&in, &in_bytes); // copy to bytes
1451 CopyBytes<sizeof(T)>(&indices, &idx_bytes);
1452 for (size_t i = 0; i < sizeof(T); ++i) {
1453 out_bytes[i] = idx_bytes[i] & 0x80 ? 0 : in_bytes[idx_bytes[i]];
1454 }
1455 TI out;
1456 CopyBytes<sizeof(TI)>(&out_bytes, &out);
1457 return Vec1<TI>{out};
1458}
1459
1460// ------------------------------ ZipLower
1461
1463 return Vec1<uint16_t>(static_cast<uint16_t>((uint32_t{b.raw} << 8) + a.raw));
1464}
1466 const Vec1<uint16_t> b) {
1467 return Vec1<uint32_t>((uint32_t{b.raw} << 16) + a.raw);
1468}
1470 const Vec1<uint32_t> b) {
1471 return Vec1<uint64_t>((uint64_t{b.raw} << 32) + a.raw);
1472}
1474 return Vec1<int16_t>(static_cast<int16_t>((int32_t{b.raw} << 8) + a.raw));
1475}
1477 return Vec1<int32_t>((int32_t{b.raw} << 16) + a.raw);
1478}
1480 return Vec1<int64_t>((int64_t{b.raw} << 32) + a.raw);
1481}
1482
1483template <typename T, typename TW = MakeWide<T>, class VW = Vec1<TW>>
1485 return VW(static_cast<TW>((TW{b.raw} << (sizeof(T) * 8)) + a.raw));
1486}
1487
1488// ================================================== MASK
1489
1490template <typename T>
1491HWY_API bool AllFalse(Sisd<T> /* tag */, const Mask1<T> mask) {
1492 return mask.bits == 0;
1493}
1494
1495template <typename T>
1496HWY_API bool AllTrue(Sisd<T> /* tag */, const Mask1<T> mask) {
1497 return mask.bits != 0;
1498}
1499
1500// `p` points to at least 8 readable bytes, not all of which need be valid.
1501template <typename T>
1503 const uint8_t* HWY_RESTRICT bits) {
1504 return Mask1<T>::FromBool((bits[0] & 1) != 0);
1505}
1506
1507// `p` points to at least 8 writable bytes.
1508template <typename T>
1509HWY_API size_t StoreMaskBits(Sisd<T> d, const Mask1<T> mask, uint8_t* bits) {
1510 *bits = AllTrue(d, mask);
1511 return 1;
1512}
1513
1514template <typename T>
1515HWY_API size_t CountTrue(Sisd<T> /* tag */, const Mask1<T> mask) {
1516 return mask.bits == 0 ? 0 : 1;
1517}
1518
1519template <typename T>
1520HWY_API intptr_t FindFirstTrue(Sisd<T> /* tag */, const Mask1<T> mask) {
1521 return mask.bits == 0 ? -1 : 0;
1522}
1523
1524template <typename T>
1525HWY_API size_t FindKnownFirstTrue(Sisd<T> /* tag */, const Mask1<T> /* m */) {
1526 return 0; // There is only one lane and we know it is true.
1527}
1528
1529// ------------------------------ Compress, CompressBits
1530
1531template <typename T>
1532struct CompressIsPartition {
1533 enum { value = 1 };
1534};
1535
1536template <typename T>
1538 // A single lane is already partitioned by definition.
1539 return v;
1540}
1541
1542template <typename T>
1544 // A single lane is already partitioned by definition.
1545 return v;
1546}
1547
1548// ------------------------------ CompressStore
1549template <typename T>
1551 T* HWY_RESTRICT unaligned) {
1552 StoreU(Compress(v, mask), d, unaligned);
1553 return CountTrue(d, mask);
1554}
1555
1556// ------------------------------ CompressBlendedStore
1557template <typename T>
1559 T* HWY_RESTRICT unaligned) {
1560 if (!mask.bits) return 0;
1561 StoreU(v, d, unaligned);
1562 return 1;
1563}
1564
1565// ------------------------------ CompressBits
1566template <typename T>
1568 return v;
1569}
1570
1571// ------------------------------ CompressBitsStore
1572template <typename T>
1574 Sisd<T> d, T* HWY_RESTRICT unaligned) {
1575 const Mask1<T> mask = LoadMaskBits(d, bits);
1576 StoreU(Compress(v, mask), d, unaligned);
1577 return CountTrue(d, mask);
1578}
1579
1580// ------------------------------ ReorderWidenMulAccumulate (MulAdd, ZipLower)
1581
1585 const Vec1<float> sum0,
1586 Vec1<float>& /* sum1 */) {
1587 return MulAdd(Vec1<float>(F32FromBF16(a.raw)),
1588 Vec1<float>(F32FromBF16(b.raw)), sum0);
1589}
1590
1593 Vec1<int16_t> b,
1594 const Vec1<int32_t> sum0,
1595 Vec1<int32_t>& /* sum1 */) {
1596 return Vec1<int32_t>(a.raw * b.raw + sum0.raw);
1597}
1598
1599// ------------------------------ RearrangeToOddPlusEven
1600template <typename TW>
1602 Vec1<TW> /* sum1 */) {
1603 return sum0; // invariant already holds
1604}
1605
1606// ================================================== REDUCTIONS
1607
1608// Sum of all lanes, i.e. the only one.
1609template <typename T>
1611 return v;
1612}
1613template <typename T>
1615 return v;
1616}
1617template <typename T>
1619 return v;
1620}
1621
1622// NOLINTNEXTLINE(google-readability-namespace-comments)
1623} // namespace HWY_NAMESPACE
1624} // namespace hwy
size_t offset
Definition BitIO.h:80
uint32_t x
Definition BlockExec.h:38
bool truncated
Definition TileProcessor.h:172
uint8_t * bits
Definition TileProcessor.h:59
#define HWY_MAX(a, b)
Definition base.h:135
#define HWY_RESTRICT
Definition base.h:64
#define HWY_API
Definition base.h:129
#define HWY_MIN(a, b)
Definition base.h:134
#define HWY_INLINE
Definition base.h:70
#define HWY_DASSERT(condition)
Definition base.h:238
Definition scalar-inl.h:71
Raw bits
Definition scalar-inl.h:81
hwy::MakeUnsigned< T > Raw
Definition scalar-inl.h:72
static HWY_INLINE Mask1< T > FromBool(bool b)
Definition scalar-inl.h:75
uint32_t a
only used by MQ decoder
Definition mqc.h:48
HWY_INLINE Vec128< T, N > Abs(SignedTag, Vec128< T, N > a)
Definition emu128-inl.h:633
HWY_INLINE Mask128< T, N > Or(hwy::SizeTag< 1 >, const Mask128< T, N > a, const Mask128< T, N > b)
Definition x86_128-inl.h:889
static bool SignBit(float f)
Definition scalar-inl.h:601
d
Definition rvv-inl.h:1998
HWY_API Vec128< T, N > ShiftLeftSame(const Vec128< T, N > v, int bits)
Definition arm_neon-inl.h:1631
HWY_API Vec128< T, N > AverageRound(Vec128< T, N > a, const Vec128< T, N > b)
Definition emu128-inl.h:619
HWY_API Vec128< T, N > CopySign(const Vec128< T, N > magn, const Vec128< T, N > sign)
Definition arm_neon-inl.h:2190
HWY_API Vec128< T, N > OddEvenBlocks(Vec128< T, N >, Vec128< T, N > even)
Definition arm_neon-inl.h:4697
HWY_API Mask128< T, N > operator>(Vec128< T, N > a, Vec128< T, N > b)
Definition arm_neon-inl.h:2445
HWY_API Vec128< T, N > operator-(Vec128< T, N > a, const Vec128< T, N > b)
Definition emu128-inl.h:576
HWY_API Mask128< TTo, N > RebindMask(Simd< TTo, N, 0 > dto, Mask128< TFrom, N > m)
Definition arm_neon-inl.h:2230
HWY_API Mask128< T, N > operator==(const Vec128< T, N > a, const Vec128< T, N > b)
Definition emu128-inl.h:1139
HWY_API VFromD< DW > ZipLower(V a, V b)
Definition arm_neon-inl.h:4272
HWY_API bool AllTrue(const Full128< T > d, const Mask128< T > m)
Definition arm_neon-inl.h:5716
HWY_API void LoadInterleaved2(Simd< T, N, 0 >, const T *HWY_RESTRICT unaligned, Vec128< T, N > &v0, Vec128< T, N > &v1)
Definition arm_neon-inl.h:6349
HWY_API void StoreInterleaved4(const Vec128< T, N > v0, const Vec128< T, N > v1, const Vec128< T, N > v2, const Vec128< T, N > v3, Simd< T, N, 0 >, T *HWY_RESTRICT unaligned)
Definition arm_neon-inl.h:6584
HWY_API Vec128< int16_t > MulHigh(const Vec128< int16_t > a, const Vec128< int16_t > b)
Definition arm_neon-inl.h:1684
HWY_API Vec128< float, N > Round(const Vec128< float, N > v)
Definition arm_neon-inl.h:3436
HWY_API Mask128< T, N > IsNaN(const Vec128< T, N > v)
Definition arm_neon-inl.h:3506
HWY_API intptr_t FindFirstTrue(const Simd< T, N, 0 > d, const Mask128< T, N > mask)
Definition arm_neon-inl.h:5691
HWY_API Mask128< T, N > FirstN(const Simd< T, N, 0 > d, size_t num)
Definition arm_neon-inl.h:2456
HWY_API size_t StoreMaskBits(Simd< T, N, 0 >, const Mask128< T, N > mask, uint8_t *bits)
Definition arm_neon-inl.h:5701
HWY_API Vec128< float, N > MulAdd(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > add)
Definition arm_neon-inl.h:1799
HWY_API void Stream(const Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT aligned)
Definition arm_neon-inl.h:2955
HWY_API Vec128< T, N > Xor3(Vec128< T, N > x1, Vec128< T, N > x2, Vec128< T, N > x3)
Definition arm_neon-inl.h:2025
HWY_API Vec128< T, N > And(const Vec128< T, N > a, const Vec128< T, N > b)
Definition arm_neon-inl.h:1949
HWY_API Vec128< T, N > SumOfLanes(Simd< T, N, 0 >, const Vec128< T, N > v)
Definition arm_neon-inl.h:5334
HWY_API Vec128< T, N > BroadcastSignBit(const Vec128< T, N > v)
Definition arm_neon-inl.h:2207
HWY_API Vec128< To, 1 > TruncateTo(Simd< To, 1, 0 >, const Vec128< From, 1 > v)
Definition arm_neon-inl.h:4806
HWY_API Vec128< uint64_t, N > Min(const Vec128< uint64_t, N > a, const Vec128< uint64_t, N > b)
Definition arm_neon-inl.h:2517
HWY_API Vec128< T, N > PopulationCount(Vec128< T, N > v)
Definition arm_neon-inl.h:2137
HWY_API Vec128< uint64_t, N > Max(const Vec128< uint64_t, N > a, const Vec128< uint64_t, N > b)
Definition arm_neon-inl.h:2555
HWY_API Mask128< T, N > MaskFromVec(const Vec128< T, N > v)
Definition arm_neon-inl.h:2217
HWY_API Vec64< int64_t > Neg(const Vec64< int64_t > v)
Definition arm_neon-inl.h:1405
HWY_API Vec128< T, N > SaturatedAdd(Vec128< T, N > a, const Vec128< T, N > b)
Definition emu128-inl.h:597
HWY_API Vec128< T, N > GatherIndex(const Simd< T, N, 0 > d, const T *HWY_RESTRICT base, const Vec128< Index, N > index)
Definition arm_neon-inl.h:5037
HWY_API Vec128< T > Not(const Vec128< T > v)
Definition arm_neon-inl.h:1931
HWY_API Mask128< T, N > IsInf(const Vec128< T, N > v)
Definition arm_neon-inl.h:3511
HWY_API Vec128< T, N/2 > LowerHalf(const Vec128< T, N > v)
Definition arm_neon-inl.h:3540
HWY_API Vec128< T, N > operator&(const Vec128< T, N > a, const Vec128< T, N > b)
Definition arm_neon-inl.h:2055
HWY_API Vec128< T, N > operator|(const Vec128< T, N > a, const Vec128< T, N > b)
Definition arm_neon-inl.h:2060
HWY_API Vec128< int64_t > MulEven(Vec128< int32_t > a, Vec128< int32_t > b)
Definition arm_neon-inl.h:4872
HWY_API Vec128< T, 1 > CompressNot(Vec128< T, 1 > v, Mask128< T, 1 >)
Definition arm_neon-inl.h:6198
HWY_API Vec128< T, N > MaskedLoad(Mask128< T, N > m, Simd< T, N, 0 > d, const T *HWY_RESTRICT aligned)
Definition arm_neon-inl.h:2758
HWY_API Mask128< T, N > operator<(const Vec128< T, N > a, const Vec128< T, N > b)
Definition emu128-inl.h:1163
HWY_API Vec128< float, N > ReorderWidenMulAccumulate(Simd< float, N, 0 > df32, Vec128< bfloat16_t, 2 *N > a, Vec128< bfloat16_t, 2 *N > b, const Vec128< float, N > sum0, Vec128< float, N > &sum1)
Definition arm_neon-inl.h:4288
HWY_API Vec128< T, N > IfVecThenElse(Vec128< T, N > mask, Vec128< T, N > yes, Vec128< T, N > no)
Definition arm_neon-inl.h:2047
HWY_API Vec128< T, N > operator^(const Vec128< T, N > a, const Vec128< T, N > b)
Definition arm_neon-inl.h:2065
HWY_API void BlendedStore(Vec128< T, N > v, Mask128< T, N > m, Simd< T, N, 0 > d, T *HWY_RESTRICT p)
Definition arm_neon-inl.h:2941
HWY_API size_t CountTrue(Full128< T >, const Mask128< T > mask)
Definition arm_neon-inl.h:5671
HWY_API Vec128< T, N > VecFromMask(Simd< T, N, 0 > d, const Mask128< T, N > v)
Definition arm_neon-inl.h:2223
HWY_API Vec128< T, N > DupEven(Vec128< T, N > v)
Definition arm_neon-inl.h:4646
HWY_API Vec128< T, N > IfThenElseZero(const Mask128< T, N > mask, const Vec128< T, N > yes)
Definition arm_neon-inl.h:2253
HWY_API Mask128< uint64_t, N > TestBit(Vec128< uint64_t, N > v, Vec128< uint64_t, N > bit)
Definition arm_neon-inl.h:2477
HWY_API Vec128< T, N > Load(Simd< T, N, 0 > d, const T *HWY_RESTRICT p)
Definition arm_neon-inl.h:2753
HWY_API Vec128< TI > TableLookupBytes(const Vec128< T > bytes, const Vec128< TI > from)
Definition arm_neon-inl.h:4922
HWY_API Vec128< T, N > IfThenElse(const Mask128< T, N > mask, const Vec128< T, N > yes, const Vec128< T, N > no)
Definition emu128-inl.h:303
HWY_API Vec128< T, N > TableLookupLanes(Vec128< T, N > v, Indices128< T, N > idx)
Definition arm_neon-inl.h:4019
HWY_API Vec128< T, N > Xor(const Vec128< T, N > a, const Vec128< T, N > b)
Definition arm_neon-inl.h:1998
HWY_API Vec128< float, N > Floor(const Vec128< float, N > v)
Definition arm_neon-inl.h:3467
HWY_API Vec128< float, N > MulSub(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > sub)
Definition arm_neon-inl.h:1853
HWY_API Vec128< T, N > CopySignToAbs(const Vec128< T, N > abs, const Vec128< T, N > sign)
Definition arm_neon-inl.h:2198
HWY_API void StoreU(const Vec128< uint8_t > v, Full128< uint8_t >, uint8_t *HWY_RESTRICT unaligned)
Definition arm_neon-inl.h:2772
HWY_API Vec128< float, N > Ceil(const Vec128< float, N > v)
Definition arm_neon-inl.h:3453
HWY_API Indices128< T, N > IndicesFromVec(Simd< T, N, 0 > d, Vec128< TI, N > vec)
Definition arm_neon-inl.h:3973
HWY_API Vec128< T, N > SwapAdjacentBlocks(Vec128< T, N > v)
Definition arm_neon-inl.h:4704
HWY_API Vec128< T, N > Reverse2(Simd< T, N, 0 > d, const Vec128< T, N > v)
Definition arm_neon-inl.h:4061
svuint16_t Set(Simd< bfloat16_t, N, kPow2 > d, bfloat16_t arg)
Definition arm_sve-inl.h:322
HWY_API Vec128< T, N > Reverse8(Simd< T, N, 0 > d, const Vec128< T, N > v)
Definition arm_neon-inl.h:4113
HWY_API Vec< D > SignBit(D d)
Definition generic_ops-inl.h:69
HWY_API Vec128< T, N > MaxOfLanes(Simd< T, N, 0 >, const Vec128< T, N > v)
Definition arm_neon-inl.h:5342
Vec128< T, N > Iota(const Simd< T, N, 0 > d, const T2 first)
Definition arm_neon-inl.h:1049
HWY_API Mask128< T, N > ExclusiveNeither(const Mask128< T, N > a, Mask128< T, N > b)
Definition arm_neon-inl.h:2314
Rebind< MakeUnsigned< TFromD< D > >, D > RebindToUnsigned
Definition ops/shared-inl.h:212
HWY_INLINE Vec128< T, N > CompressBits(Vec128< T, N > v, const uint8_t *HWY_RESTRICT bits)
Definition arm_neon-inl.h:6234
HWY_API Mask128< T, N > LoadMaskBits(Simd< T, N, 0 > d, const uint8_t *HWY_RESTRICT bits)
Definition arm_neon-inl.h:5407
HWY_API Vec128< T, N > ZeroIfNegative(Vec128< T, N > v)
Definition arm_neon-inl.h:2277
HWY_API Vec128< float, N > operator/(const Vec128< float, N > a, const Vec128< float, N > b)
Definition arm_neon-inl.h:1761
HWY_API Vec64< uint16_t > DemoteTo(Full64< uint16_t >, const Vec128< int32_t > v)
Definition arm_neon-inl.h:3145
HWY_API Vec128< uint8_t > LoadU(Full128< uint8_t >, const uint8_t *HWY_RESTRICT unaligned)
Definition arm_neon-inl.h:2591
HWY_API Vec128< T, N > OrAnd(Vec128< T, N > o, Vec128< T, N > a1, Vec128< T, N > a2)
Definition arm_neon-inl.h:2040
HWY_API Vec128< T, N > IfNegativeThenElse(Vec128< T, N > v, Vec128< T, N > yes, Vec128< T, N > no)
Definition arm_neon-inl.h:2266
HWY_API Vec128< uint8_t > operator<<(const Vec128< uint8_t > v, const Vec128< uint8_t > bits)
Definition arm_neon-inl.h:1462
HWY_API Vec128< uint16_t > operator*(const Vec128< uint16_t > a, const Vec128< uint16_t > b)
Definition arm_neon-inl.h:1642
HWY_API Vec128< T, N > BitCast(Simd< T, N, 0 > d, Vec128< FromT, N *sizeof(T)/sizeof(FromT)> v)
Definition arm_neon-inl.h:997
HWY_API bool AllFalse(const Simd< T, N, 0 > d, const Mask128< T, N > m)
Definition arm_neon-inl.h:5710
HWY_API T ExtractLane(const Vec128< T, 1 > v, size_t i)
Definition arm_neon-inl.h:1085
HWY_API void ScatterOffset(Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT base, const Vec128< Offset, N > offset)
Definition arm_neon-inl.h:4984
HWY_API Vec128< T, N > Undefined(Simd< T, N, 0 >)
Definition arm_neon-inl.h:1040
HWY_API Vec128< T, N > ShiftRight(Vec128< T, N > v)
Definition emu128-inl.h:386
HWY_API Vec128< float, N > RearrangeToOddPlusEven(const Vec128< float, N > sum0, const Vec128< float, N > sum1)
Definition arm_neon-inl.h:4412
HWY_API Vec128< T, N > Zero(Simd< T, N, 0 > d)
Definition arm_neon-inl.h:1020
decltype(Zero(D())) VFromD
Definition arm_neon-inl.h:1030
HWY_API Mask128< T, N > operator>=(Vec128< T, N > a, Vec128< T, N > b)
Definition arm_neon-inl.h:2449
HWY_API Vec128< T, N > ShiftRightSame(const Vec128< T, N > v, int bits)
Definition arm_neon-inl.h:1635
HWY_API Vec128< T, N > GatherOffset(const Simd< T, N, 0 > d, const T *HWY_RESTRICT base, const Vec128< Offset, N > offset)
Definition arm_neon-inl.h:5020
HWY_API void LoadInterleaved3(Simd< T, N, 0 >, const T *HWY_RESTRICT unaligned, Vec128< T, N > &v0, Vec128< T, N > &v1, Vec128< T, N > &v2)
Definition arm_neon-inl.h:6387
HWY_API Vec128< T, N > IfThenZeroElse(const Mask128< T, N > mask, const Vec128< T, N > no)
Definition arm_neon-inl.h:2260
V Ceiling(const V v)
Definition scalar-inl.h:825
HWY_API Mask128< T, N > operator!=(const Vec128< T, N > a, const Vec128< T, N > b)
Definition emu128-inl.h:1148
HWY_API Vec128< T, N > Or(const Vec128< T, N > a, const Vec128< T, N > b)
Definition arm_neon-inl.h:1986
typename V::PrivateT TFromV
Definition arm_neon-inl.h:845
HWY_API Vec128< int32_t, N > NearestInt(const Vec128< float, N > v)
Definition arm_neon-inl.h:3497
HWY_API Vec128< float > ApproximateReciprocal(const Vec128< float > v)
Definition arm_neon-inl.h:1734
HWY_API Vec32< uint8_t > U8FromU32(const Vec128< uint32_t > v)
Definition arm_neon-inl.h:3287
HWY_API Indices128< T, N > SetTableIndices(Simd< T, N, 0 > d, const TI *idx)
Definition arm_neon-inl.h:4013
HWY_API TFromV< V > GetLane(const V v)
Definition arm_neon-inl.h:1076
HWY_API void ScatterIndex(Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT base, const Vec128< Index, N > index)
Definition arm_neon-inl.h:5002
HWY_API Vec128< float, N > NegMulAdd(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > add)
Definition arm_neon-inl.h:1832
HWY_API Vec128< uint16_t > PromoteTo(Full128< uint16_t >, const Vec64< uint8_t > v)
Definition arm_neon-inl.h:2965
HWY_API Mask128< T, N > operator<=(const Vec128< T, N > a, const Vec128< T, N > b)
Definition emu128-inl.h:1180
HWY_API Vec128< T, N > Or3(Vec128< T, N > o1, Vec128< T, N > o2, Vec128< T, N > o3)
Definition arm_neon-inl.h:2033
HWY_API Vec128< T, N > LoadDup128(Simd< T, N, 0 > d, const T *const HWY_RESTRICT p)
Definition arm_neon-inl.h:2765
HWY_API Vec128< T, N > OddEven(const Vec128< T, N > a, const Vec128< T, N > b)
Definition arm_neon-inl.h:4678
HWY_API Vec128< int16_t > MulFixedPoint15(Vec128< int16_t > a, Vec128< int16_t > b)
Definition arm_neon-inl.h:1720
HWY_API Vec128< float, N > Trunc(const Vec128< float, N > v)
Definition arm_neon-inl.h:3425
HWY_API Vec128< T, N > MinOfLanes(Simd< T, N, 0 >, const Vec128< T, N > v)
Definition arm_neon-inl.h:5338
HWY_API size_t CompressStore(Vec128< T, N > v, const Mask128< T, N > mask, Simd< T, N, 0 > d, T *HWY_RESTRICT unaligned)
Definition arm_neon-inl.h:6248
HWY_API Vec128< int8_t > Abs(const Vec128< int8_t > v)
Definition arm_neon-inl.h:2146
HWY_API Vec128< float > ConvertTo(Full128< float >, const Vec128< int32_t > v)
Definition arm_neon-inl.h:3327
HWY_API Vec128< float, N > Sqrt(const Vec128< float, N > v)
Definition arm_neon-inl.h:1913
HWY_API size_t CompressBitsStore(Vec128< T, N > v, const uint8_t *HWY_RESTRICT bits, Simd< T, N, 0 > d, T *HWY_RESTRICT unaligned)
Definition arm_neon-inl.h:6273
HWY_API Vec128< uint32_t, N > RotateRight(const Vec128< uint32_t, N > v)
Definition arm_neon-inl.h:1444
HWY_API Mask128< T, N > IsFinite(const Vec128< T, N > v)
Definition arm_neon-inl.h:3521
HWY_API Vec128< T, N > AndNot(const Vec128< T, N > not_mask, const Vec128< T, N > mask)
Definition arm_neon-inl.h:1964
HWY_API Vec128< uint64_t > SumsOf8(const Vec128< uint8_t > v)
Definition arm_neon-inl.h:1361
HWY_API Vec128< float > ApproximateReciprocalSqrt(const Vec128< float > v)
Definition arm_neon-inl.h:1885
HWY_API void LoadInterleaved4(Simd< T, N, 0 >, const T *HWY_RESTRICT unaligned, Vec128< T, N > &v0, Vec128< T, N > &v1, Vec128< T, N > &v2, Vec128< T, N > &v3)
Definition arm_neon-inl.h:6428
HWY_API Vec128< T > ReverseBlocks(Full128< T >, const Vec128< T > v)
Definition arm_neon-inl.h:4712
HWY_API size_t CompressBlendedStore(Vec128< T, N > v, Mask128< T, N > m, Simd< T, N, 0 > d, T *HWY_RESTRICT unaligned)
Definition arm_neon-inl.h:6257
HWY_API Vec128< T, N > Reverse4(Simd< T, N, 0 > d, const Vec128< T, N > v)
Definition arm_neon-inl.h:4090
HWY_API size_t FindKnownFirstTrue(const Simd< T, N, 0 > d, const Mask128< T, N > mask)
Definition arm_neon-inl.h:5683
HWY_API Vec128< T, N > operator+(Vec128< T, N > a, const Vec128< T, N > b)
Definition emu128-inl.h:580
HWY_API void StoreInterleaved2(const Vec128< T, N > v0, const Vec128< T, N > v1, Simd< T, N, 0 >, T *HWY_RESTRICT unaligned)
Definition arm_neon-inl.h:6517
HWY_API Vec128< T, 1 > Reverse(Simd< T, 1, 0 >, const Vec128< T, 1 > v)
Definition arm_neon-inl.h:4030
HWY_API Vec128< uint8_t > operator>>(const Vec128< uint8_t > v, const Vec128< uint8_t > bits)
Definition arm_neon-inl.h:1542
HWY_API void Store(Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT aligned)
Definition arm_neon-inl.h:2934
HWY_API Vec128< T, 1 > InsertLane(const Vec128< T, 1 > v, size_t i, T t)
Definition arm_neon-inl.h:1225
HWY_API Vec128< T, N > SaturatedSub(Vec128< T, N > a, const Vec128< T, N > b)
Definition emu128-inl.h:608
Simd< T, 1, 0 > Sisd
Definition scalar-inl.h:31
HWY_API Vec128< T, N > ShiftLeft(Vec128< T, N > v)
Definition emu128-inl.h:376
HWY_API Vec128< uint16_t > Broadcast(const Vec128< uint16_t > v)
Definition arm_neon-inl.h:3885
const vfloat64m1_t v
Definition rvv-inl.h:1998
HWY_API Vec128< float > AbsDiff(const Vec128< float > a, const Vec128< float > b)
Definition arm_neon-inl.h:1773
HWY_API void StoreInterleaved3(const Vec128< T, N > v0, const Vec128< T, N > v1, const Vec128< T, N > v2, Simd< T, N, 0 >, T *HWY_RESTRICT unaligned)
Definition arm_neon-inl.h:6549
HWY_API VI TableLookupBytesOr0(const V bytes, const VI from)
Definition arm_neon-inl.h:4977
HWY_API Vec128< T, 1 > Compress(Vec128< T, 1 > v, Mask128< T, 1 >)
Definition arm_neon-inl.h:6174
HWY_API Vec128< float, N > NegMulSub(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > sub)
Definition arm_neon-inl.h:1861
Definition aligned_allocator.h:27
HWY_API void CopyBytes(const From *from, To *to)
Definition base.h:950
constexpr T MantissaEnd()
Definition base.h:753
HWY_API float F32FromBF16(bfloat16_t bf)
Definition base.h:975
HWY_API bfloat16_t BF16FromF32(float f)
Definition base.h:983
HWY_API constexpr T LimitsMin()
Definition base.h:663
HWY_API constexpr T HighestValue()
Definition base.h:684
constexpr float HighestValue< float >()
Definition base.h:688
typename detail::Relations< T >::Unsigned MakeUnsigned
Definition base.h:593
HWY_API constexpr bool IsSigned()
Definition base.h:642
HWY_API void CopySameSize(const From *HWY_RESTRICT from, To *HWY_RESTRICT to)
Definition base.h:961
constexpr MakeUnsigned< T > ExponentMask()
Definition base.h:740
HWY_API size_t PopCount(uint64_t x)
Definition base.h:865
constexpr float LowestValue< float >()
Definition base.h:675
HWY_API constexpr T LowestValue()
Definition base.h:671
HWY_API constexpr bool IsFloat()
Definition base.h:635
HWY_API constexpr T LimitsMax()
Definition base.h:656
typename detail::Relations< T >::Signed MakeSigned
Definition base.h:595
HWY_AFTER_NAMESPACE()
HWY_BEFORE_NAMESPACE()
#define HWY_NAMESPACE
Definition set_macros-inl.h:82
@ value
Definition arm_neon-inl.h:5730
Definition scalar-inl.h:1365
MakeSigned< T > raw
Definition scalar-inl.h:1366
Definition ops/shared-inl.h:52
Definition scalar-inl.h:35
T raw
Definition scalar-inl.h:66
static constexpr size_t kPrivateN
Definition scalar-inl.h:37
HWY_INLINE Vec1 & operator*=(const Vec1 other)
Definition scalar-inl.h:44
Vec1(const Vec1 &)=default
HWY_INLINE Vec1()=default
HWY_INLINE Vec1 & operator^=(const Vec1 other)
Definition scalar-inl.h:62
HWY_INLINE Vec1(const T t)
Definition scalar-inl.h:42
HWY_INLINE Vec1 & operator&=(const Vec1 other)
Definition scalar-inl.h:56
T PrivateT
Definition scalar-inl.h:36
Vec1 & operator=(const Vec1 &)=default
HWY_INLINE Vec1 & operator-=(const Vec1 other)
Definition scalar-inl.h:53
HWY_INLINE Vec1 & operator+=(const Vec1 other)
Definition scalar-inl.h:50
HWY_INLINE Vec1 & operator|=(const Vec1 other)
Definition scalar-inl.h:59
HWY_INLINE Vec1 & operator/=(const Vec1 other)
Definition scalar-inl.h:47
HWY_INLINE Vec1< T > operator()(const Vec1< T > v) const
Definition scalar-inl.h:400
Definition scalar-inl.h:390
HWY_INLINE Vec1< T > operator()(const Vec1< T > v) const
Definition scalar-inl.h:392
uint32_t x1
Definition t1_common.h:75