16#ifndef HIGHWAY_HWY_BASE_H_
17#define HIGHWAY_HWY_BASE_H_
37#define HWY_STR_IMPL(macro) #macro
38#define HWY_STR(macro) HWY_STR_IMPL(macro)
44#define HWY_RESTRICT __restrict
45#define HWY_INLINE __forceinline
46#define HWY_NOINLINE __declspec(noinline)
48#define HWY_NORETURN __declspec(noreturn)
49#define HWY_LIKELY(expr) (expr)
50#define HWY_UNLIKELY(expr) (expr)
51#define HWY_PRAGMA(tokens) __pragma(tokens)
52#define HWY_DIAGNOSTICS(tokens) HWY_PRAGMA(warning(tokens))
53#define HWY_DIAGNOSTICS_OFF(msc, gcc) HWY_DIAGNOSTICS(msc)
54#define HWY_MAYBE_UNUSED
55#define HWY_HAS_ASSUME_ALIGNED 0
57#define HWY_MUST_USE_RESULT _Check_return_
59#define HWY_MUST_USE_RESULT
64#define HWY_RESTRICT __restrict__
68#define HWY_INLINE inline __attribute__((always_inline))
70#define HWY_INLINE inline
72#define HWY_NOINLINE __attribute__((noinline))
73#define HWY_FLATTEN __attribute__((flatten))
74#define HWY_NORETURN __attribute__((noreturn))
75#define HWY_LIKELY(expr) __builtin_expect(!!(expr), 1)
76#define HWY_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
77#define HWY_PRAGMA(tokens) _Pragma(#tokens)
78#define HWY_DIAGNOSTICS(tokens) HWY_PRAGMA(GCC diagnostic tokens)
79#define HWY_DIAGNOSTICS_OFF(msc, gcc) HWY_DIAGNOSTICS(gcc)
82#define HWY_MAYBE_UNUSED __attribute__((unused))
83#define HWY_MUST_USE_RESULT __attribute__((warn_unused_result))
91#if HWY_HAS_ATTRIBUTE(__format__)
92#define HWY_FORMAT(idx_fmt, idx_arg) \
93 __attribute__((__format__(__printf__, idx_fmt, idx_arg)))
95#define HWY_FORMAT(idx_fmt, idx_arg)
103#if HWY_HAS_BUILTIN(__builtin_assume_aligned)
104#define HWY_ASSUME_ALIGNED(ptr, align) __builtin_assume_aligned((ptr), (align))
106#define HWY_ASSUME_ALIGNED(ptr, align) (ptr)
112#if HWY_COMPILER_CLANG
113#define HWY_PUSH_ATTRIBUTES(targets_str) \
114 HWY_PRAGMA(clang attribute push(__attribute__((target(targets_str))), \
115 apply_to = function))
116#define HWY_POP_ATTRIBUTES HWY_PRAGMA(clang attribute pop)
117#elif HWY_COMPILER_GCC
118#define HWY_PUSH_ATTRIBUTES(targets_str) \
119 HWY_PRAGMA(GCC push_options) HWY_PRAGMA(GCC target targets_str)
120#define HWY_POP_ATTRIBUTES HWY_PRAGMA(GCC pop_options)
122#define HWY_PUSH_ATTRIBUTES(targets_str)
123#define HWY_POP_ATTRIBUTES
129#define HWY_API static HWY_INLINE HWY_FLATTEN HWY_MAYBE_UNUSED
131#define HWY_CONCAT_IMPL(a, b) a##b
132#define HWY_CONCAT(a, b) HWY_CONCAT_IMPL(a, b)
134#define HWY_MIN(a, b) ((a) < (b) ? (a) : (b))
135#define HWY_MAX(a, b) ((a) > (b) ? (a) : (b))
137#if HWY_COMPILER_GCC_ACTUAL
139#define HWY_UNROLL(factor) HWY_PRAGMA(GCC unroll factor)
140#define HWY_DEFAULT_UNROLL HWY_UNROLL(4)
141#elif HWY_COMPILER_CLANG || HWY_COMPILER_ICC || HWY_COMPILER_ICX
142#define HWY_UNROLL(factor) HWY_PRAGMA(unroll factor)
143#define HWY_DEFAULT_UNROLL HWY_UNROLL()
145#define HWY_UNROLL(factor)
146#define HWY_DEFAULT_UNROLL
159#if defined(__has_cpp_attribute) && __has_cpp_attribute(assume)
160#define HWY_ASSUME(expr) [[assume(expr)]]
161#elif HWY_COMPILER_MSVC || HWY_COMPILER_ICC
162#define HWY_ASSUME(expr) __assume(expr)
164#elif HWY_COMPILER_CLANG && HWY_HAS_BUILTIN(__builtin_assume)
165#define HWY_ASSUME(expr) __builtin_assume(expr)
168#elif HWY_COMPILER_GCC_ACTUAL >= 405
169#define HWY_ASSUME(expr) \
170 ((expr) ? static_cast<void>(0) : __builtin_unreachable())
172#define HWY_ASSUME(expr) static_cast<void>(0)
179#define HWY_FENCE std::atomic_thread_fence(std::memory_order_acq_rel)
186#define HWY_REP4(literal) literal, literal, literal, literal
188#define HWY_ABORT(format, ...) \
189 ::hwy::Abort(__FILE__, __LINE__, format, ##__VA_ARGS__)
192#define HWY_ASSERT(condition) \
194 if (!(condition)) { \
195 HWY_ABORT("Assert %s", #condition); \
199#if HWY_HAS_FEATURE(memory_sanitizer) || defined(MEMORY_SANITIZER)
205#if HWY_HAS_FEATURE(address_sanitizer) || defined(ADDRESS_SANITIZER)
211#if HWY_HAS_FEATURE(thread_sanitizer) || defined(THREAD_SANITIZER)
220#define HWY_ATTR_NO_MSAN __attribute__((no_sanitize_memory))
222#define HWY_ATTR_NO_MSAN
226#if !defined(HWY_IS_DEBUG_BUILD)
229#if (!defined(__OPTIMIZE__) && !defined(NDEBUG)) || HWY_IS_ASAN || \
230 HWY_IS_MSAN || HWY_IS_TSAN || defined(__clang_analyzer__)
231#define HWY_IS_DEBUG_BUILD 1
233#define HWY_IS_DEBUG_BUILD 0
237#if HWY_IS_DEBUG_BUILD
238#define HWY_DASSERT(condition) HWY_ASSERT(condition)
240#define HWY_DASSERT(condition) \
252#elif HWY_ARCH_RVV && defined(__riscv_vector)
266#define HWY_ALIGN_MAX alignas(64)
267#elif HWY_ARCH_RVV && defined(__riscv_vector)
268#define HWY_ALIGN_MAX alignas(8)
270#define HWY_ALIGN_MAX alignas(16)
283#if ((HWY_ARCH_ARM_A64 || (__ARM_FP & 2)) && HWY_COMPILER_GCC)
284using float16_t = __fp16;
287#elif HWY_ARCH_RVV && HWY_COMPILER_CLANG && defined(__riscv_zvfh)
288using float16_t = _Float16;
332 return (
a.hi == b.
hi) ?
a.lo < b.
lo :
a.hi < b.
hi;
341 return a.
lo == b.
lo &&
a.hi == b.
hi;
346 return a.key < b.
key;
360 return a.key < b.
key;
375template <
bool Condition>
382template <
bool Condition>
385template <
typename T,
typename U>
395template <
typename T,
typename U>
406#define HWY_IF_LE128(T, N) hwy::EnableIf<N * sizeof(T) <= 16>* = nullptr
407#define HWY_IF_LE64(T, N) hwy::EnableIf<N * sizeof(T) <= 8>* = nullptr
408#define HWY_IF_LE32(T, N) hwy::EnableIf<N * sizeof(T) <= 4>* = nullptr
409#define HWY_IF_GE32(T, N) hwy::EnableIf<N * sizeof(T) >= 4>* = nullptr
410#define HWY_IF_GE64(T, N) hwy::EnableIf<N * sizeof(T) >= 8>* = nullptr
411#define HWY_IF_GE128(T, N) hwy::EnableIf<N * sizeof(T) >= 16>* = nullptr
412#define HWY_IF_GT128(T, N) hwy::EnableIf<(N * sizeof(T) > 16)>* = nullptr
414#define HWY_IF_UNSIGNED(T) hwy::EnableIf<!IsSigned<T>()>* = nullptr
415#define HWY_IF_SIGNED(T) \
416 hwy::EnableIf<IsSigned<T>() && !IsFloat<T>()>* = nullptr
417#define HWY_IF_FLOAT(T) hwy::EnableIf<hwy::IsFloat<T>()>* = nullptr
418#define HWY_IF_NOT_FLOAT(T) hwy::EnableIf<!hwy::IsFloat<T>()>* = nullptr
420#define HWY_IF_LANE_SIZE(T, bytes) \
421 hwy::EnableIf<sizeof(T) == (bytes)>* = nullptr
422#define HWY_IF_NOT_LANE_SIZE(T, bytes) \
423 hwy::EnableIf<sizeof(T) != (bytes)>* = nullptr
427#define HWY_IF_LANE_SIZE_ONE_OF(T, bit_array) \
428 hwy::EnableIf<((size_t{1} << sizeof(T)) & (bit_array)) != 0>* = nullptr
430#define HWY_IF_LANES_PER_BLOCK(T, N, LANES) \
431 hwy::EnableIf<HWY_MIN(sizeof(T) * N, 16) / sizeof(T) == (LANES)>* = nullptr
461 enum { is_signed = 0, is_float = 0 };
468 enum { is_signed = 1, is_float = 0 };
476 enum { is_signed = 0, is_float = 0 };
484 enum { is_signed = 1, is_float = 0 };
493 enum { is_signed = 0, is_float = 0 };
502 enum { is_signed = 1, is_float = 0 };
511 enum { is_signed = 0, is_float = 0 };
519 enum { is_signed = 1, is_float = 0 };
525 enum { is_signed = 0, is_float = 0 };
533 enum { is_signed = 1, is_float = 1 };
540 enum { is_signed = 1, is_float = 1 };
549 enum { is_signed = 1, is_float = 1 };
557 enum { is_signed = 1, is_float = 1 };
618template <
typename T,
class R = detail::Relations<T>>
626template <
typename T,
class R = detail::Relations<T>>
657 static_assert(!
IsFloat<T>(),
"Only for integer types");
659 return static_cast<T
>(
IsSigned<T>() ? (
static_cast<TU
>(~0ull) >> 1)
660 :
static_cast<TU
>(~0ull));
664 static_assert(!
IsFloat<T>(),
"Only for integer types");
676 return -3.402823466e+38F;
680 return -1.7976931348623158e+308;
689 return 3.402823466e+38F;
693 return 1.7976931348623158e+308;
703 return 1.192092896e-7f;
707 return 2.2204460492503131e-16;
713 static_assert(
sizeof(T) == 0,
"Only instantiate the specializations");
754 static_assert(
sizeof(T) == 0,
"Only instantiate the specializations");
764 return 4503599627370496.0;
785template <
typename T1,
typename T2>
787 return (
a + b - 1) / b;
791constexpr inline size_t RoundUpTo(
size_t what,
size_t align) {
792 return DivCeil(what, align) * align;
799 _BitScanForward(&index,
x);
802 return static_cast<size_t>(__builtin_ctz(
x));
810 _BitScanForward64(&index,
x);
814 uint32_t lsb =
static_cast<uint32_t
>(
x & 0xFFFFFFFF);
817 uint32_t msb =
static_cast<uint32_t
>(
x >> 32u);
818 _BitScanForward(&index, msb);
821 _BitScanForward(&index, lsb);
826 return static_cast<size_t>(__builtin_ctzll(
x));
834 _BitScanReverse(&index,
x);
837 return static_cast<size_t>(__builtin_clz(
x));
845 _BitScanReverse64(&index,
x);
849 const uint32_t msb =
static_cast<uint32_t
>(
x >> 32u);
852 const uint32_t lsb =
static_cast<uint32_t
>(
x & 0xFFFFFFFF);
853 _BitScanReverse(&index, lsb);
856 _BitScanReverse(&index, msb);
861 return static_cast<size_t>(__builtin_clzll(
x));
867 return static_cast<size_t>(__builtin_popcountll(
x));
872#elif HWY_COMPILER_MSVC && HWY_ARCH_X86_64 && defined(__AVX__)
873 return _mm_popcnt_u64(
x);
874#elif HWY_COMPILER_MSVC && HWY_ARCH_X86_32 && defined(__AVX__)
875 return _mm_popcnt_u32(
static_cast<uint32_t
>(
x & 0xFFFFFFFFu)) +
876 _mm_popcnt_u32(
static_cast<uint32_t
>(
x >> 32));
878 x -= ((
x >> 1) & 0x5555555555555555ULL);
879 x = (((
x >> 2) & 0x3333333333333333ULL) + (
x & 0x3333333333333333ULL));
880 x = (((
x >> 4) +
x) & 0x0F0F0F0F0F0F0F0FULL);
884 return static_cast<size_t>(
x & 0x7Fu);
891template <
typename TI>
895 :
static_cast<size_t>(
FloorLog2(
static_cast<TI
>(
x >> 1)) + 1);
898template <
typename TI>
902 :
static_cast<size_t>(
FloorLog2(
static_cast<TI
>(
x - 1)) + 1);
907 return t +
static_cast<T
>(n);
914 return static_cast<T
>(
915 static_cast<TU
>(
static_cast<TU
>(t) +
static_cast<TU
>(n)) &
919#if HWY_COMPILER_MSVC && HWY_ARCH_X86_64
920#pragma intrinsic(_umul128)
925#if defined(__SIZEOF_INT128__)
926 __uint128_t product = (__uint128_t)
a * (__uint128_t)b;
927 *upper = (uint64_t)(product >> 64);
928 return (uint64_t)(product & 0xFFFFFFFFFFFFFFFFULL);
929#elif HWY_COMPILER_MSVC && HWY_ARCH_X86_64
930 return _umul128(
a, b, upper);
932 constexpr uint64_t kLo32 = 0xFFFFFFFFU;
933 const uint64_t lo_lo = (
a & kLo32) * (b & kLo32);
934 const uint64_t hi_lo = (
a >> 32) * (b & kLo32);
935 const uint64_t lo_hi = (
a & kLo32) * (b >> 32);
936 const uint64_t hi_hi = (
a >> 32) * (b >> 32);
937 const uint64_t t = (lo_lo >> 32) + (hi_lo & kLo32) + lo_hi;
938 *upper = (hi_lo >> 32) + (t >> 32) + hi_hi;
939 return (t << 32) | (lo_lo & kLo32);
944#pragma intrinsic(memcpy)
945#pragma intrinsic(memset)
949template <
size_t kBytes,
typename From,
typename To>
952 memcpy(to, from, kBytes);
955 static_cast<void*
>(to),
static_cast<const void*
>(from), kBytes);
960template <
typename From,
typename To>
962 static_assert(
sizeof(From) ==
sizeof(To),
"");
966template <
size_t kBytes,
typename To>
969 memset(to, 0, kBytes);
971 __builtin_memset(to, 0, kBytes);
987 bf.
bits =
static_cast<uint16_t
>(
bits >> 16);
uint32_t x
Definition BlockExec.h:38
uint8_t * bits
Definition TileProcessor.h:59
#define HWY_RESTRICT
Definition base.h:64
#define HWY_NORETURN
Definition base.h:74
#define HWY_FORMAT(idx_fmt, idx_arg)
Definition base.h:95
#define HWY_API
Definition base.h:129
#define HWY_INLINE
Definition base.h:70
#define HWY_MAYBE_UNUSED
Definition base.h:82
#define HWY_DLLEXPORT
Definition highway_export.h:13
uint32_t a
only used by MQ decoder
Definition mqc.h:48
Definition aligned_allocator.h:27
double float64_t
Definition base.h:303
HWY_API void CopyBytes(const From *from, To *to)
Definition base.h:950
HWY_INLINE constexpr T AddWithWraparound(hwy::FloatTag, T t, size_t n)
Definition base.h:906
constexpr T MantissaEnd()
Definition base.h:753
HWY_API size_t Num0BitsBelowLS1Bit_Nonzero64(const uint64_t x)
Definition base.h:806
constexpr MakeSigned< T > MaxExponentTimes2()
Definition base.h:728
constexpr MakeUnsigned< T > MantissaMask()
Definition base.h:746
typename RemoveConstT< T >::type RemoveConst
Definition base.h:447
HWY_API constexpr T Epsilon()
Definition base.h:698
HWY_API float F32FromBF16(bfloat16_t bf)
Definition base.h:975
HWY_API void ZeroBytes(To *to)
Definition base.h:967
HWY_API uint64_t Mul128(uint64_t a, uint64_t b, uint64_t *HWY_RESTRICT upper)
Definition base.h:924
HWY_API bfloat16_t BF16FromF32(float f)
Definition base.h:983
HWY_API constexpr T LimitsMin()
Definition base.h:663
HWY_API constexpr T HighestValue()
Definition base.h:684
constexpr float HighestValue< float >()
Definition base.h:688
constexpr T1 DivCeil(T1 a, T2 b)
Definition base.h:786
constexpr float MantissaEnd< float >()
Definition base.h:758
typename detail::Relations< T >::Unsigned MakeUnsigned
Definition base.h:593
static HWY_MAYBE_UNUSED bool operator==(const uint128_t &a, const uint128_t &b)
Definition base.h:339
HWY_API constexpr bool IsSame()
Definition base.h:396
constexpr bool IsSigned< bfloat16_t >()
Definition base.h:650
HWY_API constexpr bool IsSigned()
Definition base.h:642
HWY_API void CopySameSize(const From *HWY_RESTRICT from, To *HWY_RESTRICT to)
Definition base.h:961
constexpr size_t FloorLog2(TI x)
Definition base.h:892
constexpr MakeUnsigned< T > ExponentMask()
Definition base.h:740
typename detail::Relations< T >::Float MakeFloat
Definition base.h:597
HWY_API size_t Num0BitsAboveMS1Bit_Nonzero32(const uint32_t x)
Definition base.h:831
constexpr bool IsSigned< float16_t >()
Definition base.h:646
typename detail::TypeFromSize< N >::Unsigned UnsignedFromSize
Definition base.h:607
constexpr double HighestValue< double >()
Definition base.h:692
constexpr auto IsFloatTag() -> hwy::SizeTag<(R::is_float ? 0x200 :0x400)>
Definition base.h:627
constexpr int MantissaBits< double >()
Definition base.h:721
constexpr auto TypeTag() -> hwy::SizeTag<((R::is_signed+R::is_float)<< 8)>
Definition base.h:619
typename detail::TypeFromSize< N >::Signed SignedFromSize
Definition base.h:609
static HWY_MAYBE_UNUSED bool operator>(const uint128_t &a, const uint128_t &b)
Definition base.h:335
HWY_API size_t PopCount(uint64_t x)
Definition base.h:865
constexpr double MantissaEnd< double >()
Definition base.h:762
float float32_t
Definition base.h:302
constexpr int MantissaBits()
Definition base.h:712
HWY_API size_t Num0BitsBelowLS1Bit_Nonzero32(const uint32_t x)
Definition base.h:796
constexpr float LowestValue< float >()
Definition base.h:675
constexpr MakeSigned< T > MaxExponentField()
Definition base.h:778
constexpr size_t CeilLog2(TI x)
Definition base.h:899
HWY_API size_t Num0BitsAboveMS1Bit_Nonzero64(const uint64_t x)
Definition base.h:841
typename detail::TypeFromSize< N >::Float FloatFromSize
Definition base.h:611
constexpr MakeUnsigned< T > SignMask()
Definition base.h:734
constexpr double LowestValue< double >()
Definition base.h:679
static HWY_MAYBE_UNUSED bool operator<(const uint128_t &a, const uint128_t &b)
Definition base.h:330
HWY_API constexpr T LowestValue()
Definition base.h:671
constexpr float Epsilon< float >()
Definition base.h:702
typename EnableIfT< Condition >::type EnableIf
Definition base.h:383
static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize
Definition base.h:256
typename detail::Relations< T >::Narrow MakeNarrow
Definition base.h:603
HWY_API constexpr bool IsFloat()
Definition base.h:635
constexpr int MantissaBits< float >()
Definition base.h:717
constexpr double Epsilon< double >()
Definition base.h:706
HWY_DLLEXPORT HWY_NORETURN void int const char * format
Definition base.h:992
HWY_DLLEXPORT HWY_NORETURN void int line
Definition base.h:992
HWY_API constexpr T LimitsMax()
Definition base.h:656
constexpr size_t RoundUpTo(size_t what, size_t align)
Definition base.h:791
typename detail::Relations< T >::Wide MakeWide
Definition base.h:601
constexpr int ExponentBits()
Definition base.h:769
typename detail::Relations< T >::Signed MakeSigned
Definition base.h:595
void type
Definition base.h:379
@ value
Definition base.h:387
uint32_t value
Definition base.h:324
uint32_t key
Definition base.h:325
uint64_t value
Definition base.h:317
uint64_t key
Definition base.h:318
T type
Definition base.h:443
T type
Definition base.h:439
uint16_t bits
Definition base.h:297
int16_t Signed
Definition base.h:538
float Wide
Definition base.h:539
uint16_t Unsigned
Definition base.h:537
double Float
Definition base.h:555
uint64_t Unsigned
Definition base.h:553
int64_t Signed
Definition base.h:554
float Narrow
Definition base.h:556
int16_t Signed
Definition base.h:530
float Wide
Definition base.h:532
uint16_t Unsigned
Definition base.h:529
uint32_t Unsigned
Definition base.h:544
double Wide
Definition base.h:547
float Float
Definition base.h:546
int32_t Signed
Definition base.h:545
uint16_t Unsigned
Definition base.h:480
int16_t Signed
Definition base.h:481
int32_t Wide
Definition base.h:482
int8_t Narrow
Definition base.h:483
uint32_t Unsigned
Definition base.h:497
int64_t Wide
Definition base.h:500
float Float
Definition base.h:499
int16_t Narrow
Definition base.h:501
int32_t Signed
Definition base.h:498
int32_t Narrow
Definition base.h:518
double Float
Definition base.h:517
uint64_t Unsigned
Definition base.h:515
int64_t Signed
Definition base.h:516
int16_t Wide
Definition base.h:467
int8_t Signed
Definition base.h:466
uint8_t Unsigned
Definition base.h:465
uint64_t Narrow
Definition base.h:524
uint8_t Narrow
Definition base.h:475
int16_t Signed
Definition base.h:473
uint32_t Wide
Definition base.h:474
uint16_t Unsigned
Definition base.h:472
uint32_t Unsigned
Definition base.h:488
uint64_t Wide
Definition base.h:491
uint16_t Narrow
Definition base.h:492
float Float
Definition base.h:490
int32_t Signed
Definition base.h:489
uint32_t Narrow
Definition base.h:510
int64_t Signed
Definition base.h:507
uint64_t Unsigned
Definition base.h:506
double Float
Definition base.h:508
int8_t Signed
Definition base.h:459
uint8_t Unsigned
Definition base.h:458
uint16_t Wide
Definition base.h:460
int8_t Signed
Definition base.h:565
uint8_t Unsigned
Definition base.h:564
int16_t Signed
Definition base.h:570
uint16_t Unsigned
Definition base.h:569
int32_t Signed
Definition base.h:575
uint32_t Unsigned
Definition base.h:574
float Float
Definition base.h:576
double Float
Definition base.h:582
int64_t Signed
Definition base.h:581
uint64_t Unsigned
Definition base.h:580
uint16_t bits
Definition base.h:292
uint64_t lo
Definition base.h:310
uint64_t hi
Definition base.h:311