17#if defined(HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE) == \
18 defined(HWY_TARGET_TOGGLE)
19#ifdef HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
20#undef HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
22#define HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
36#if VQSORT_ENABLED || HWY_IDE
42 static constexpr bool Is128() {
return true; }
43 constexpr size_t LanesPerKey()
const {
return 2; }
49 HWY_INLINE void Swap(LaneType*
a, LaneType* b)
const {
50 const FixedTag<LaneType, 2>
d;
51 const auto temp =
LoadU(d,
a);
56 template <
class V,
class M>
57 HWY_INLINE V CompressKeys(V keys, M mask)
const {
62 HWY_INLINE Vec<D> SetKey(D d,
const TFromD<D>* key)
const {
67 HWY_INLINE Vec<D> ReverseKeys(D d, Vec<D> v)
const {
72 HWY_INLINE Vec<D> ReverseKeys2(D ,
const Vec<D> v)
const {
78 HWY_INLINE Vec<D> ReverseKeys4(D d,
const Vec<D> v)
const {
80 return ReverseKeys(d, v);
85 HWY_INLINE Vec<D> OddEvenPairs(D d,
const Vec<D> odd,
86 const Vec<D> even)
const {
92 HWY_INLINE V OddEvenKeys(
const V odd,
const V even)
const {
97 HWY_INLINE Vec<D> ReverseKeys8(D, Vec<D>)
const {
102 HWY_INLINE Vec<D> ReverseKeys16(D, Vec<D>)
const {
108 HWY_INLINE Vec<D> SwapAdjacentPairs(D, Vec<D>)
const {
114 HWY_INLINE Vec<D> SwapAdjacentQuads(D, Vec<D>)
const {
120 HWY_INLINE Vec<D> OddEvenQuads(D, Vec<D>, Vec<D>)
const {
126struct Key128 :
public KeyAny128 {
128 static constexpr bool IsKV() {
return false; }
133 std::string KeyString()
const {
return "U128"; }
136 HWY_INLINE Mask<D> EqualKeys(D d, Vec<D>
a, Vec<D> b)
const {
141 HWY_INLINE Mask<D> NotEqualKeys(D d, Vec<D>
a, Vec<D> b)
const {
147 HWY_INLINE bool NoKeyDifference(D , Vec<D> diff)
const {
149 const RebindToUnsigned<D> du;
153 HWY_INLINE bool Equal1(
const LaneType*
a,
const LaneType* b)
const {
154 return a[0] == b[0] &&
a[1] == b[1];
165struct OrderAscending128 :
public Key128 {
166 using Order = SortAscending;
168 HWY_INLINE bool Compare1(
const LaneType*
a,
const LaneType* b) {
169 return (
a[1] == b[1]) ?
a[0] < b[0] :
a[1] < b[1];
173 HWY_INLINE Mask<D> Compare(D d, Vec<D>
a, Vec<D> b)
const {
179 HWY_INLINE Mask<DFromV<V> > CompareLanes(V
a, V b)
const {
184 HWY_INLINE Vec<D> First(D d,
const Vec<D>
a,
const Vec<D> b)
const {
189 HWY_INLINE Vec<D> Last(D d,
const Vec<D>
a,
const Vec<D> b)
const {
205 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v)
const {
206 const Vec<D> k0 =
Zero(d);
207 const Vec<D> k1 =
OddEven(k0,
Set(d, uint64_t{1}));
208 const Mask<D> borrow = Eq(v, k0);
210 const Vec<D> adjust = ShiftLeftLanes<1>(
IfThenElseZero(borrow, k1));
211 return Sub(
Sub(v, k1), adjust);
215struct OrderDescending128 :
public Key128 {
216 using Order = SortDescending;
218 HWY_INLINE bool Compare1(
const LaneType*
a,
const LaneType* b) {
219 return (
a[1] == b[1]) ? b[0] <
a[0] : b[1] <
a[1];
223 HWY_INLINE Mask<D> Compare(D d, Vec<D>
a, Vec<D> b)
const {
229 HWY_INLINE Mask<DFromV<V> > CompareLanes(V
a, V b)
const {
234 HWY_INLINE Vec<D> First(D d,
const Vec<D>
a,
const Vec<D> b)
const {
239 HWY_INLINE Vec<D> Last(D d,
const Vec<D>
a,
const Vec<D> b)
const {
255 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v)
const {
257 const Vec<D> added =
Add(v, k1);
258 const Mask<D> overflowed = Lt(added, v);
260 const Vec<D> adjust = ShiftLeftLanes<1>(
IfThenElseZero(overflowed, k1));
261 return Add(added, adjust);
266struct KeyValue128 :
public KeyAny128 {
269 static constexpr bool IsKV() {
return true; }
272 using KeyType = K64V64;
274 std::string KeyString()
const {
return "KV128"; }
277 HWY_INLINE Mask<D> EqualKeys(D d, Vec<D>
a, Vec<D> b)
const {
282 HWY_INLINE Mask<D> NotEqualKeys(D d, Vec<D>
a, Vec<D> b)
const {
288 HWY_INLINE bool NoKeyDifference(D , Vec<D> diff)
const {
290 const RebindToUnsigned<D> du;
291 const Vec<
decltype(du)> zero =
Zero(du);
292 const Vec<
decltype(du)> keys =
OddEven(diff, zero);
296 HWY_INLINE bool Equal1(
const LaneType*
a,
const LaneType* b)
const {
301struct OrderAscendingKV128 :
public KeyValue128 {
302 using Order = SortAscending;
304 HWY_INLINE bool Compare1(
const LaneType*
a,
const LaneType* b) {
309 HWY_INLINE Mask<D> Compare(D d, Vec<D>
a, Vec<D> b)
const {
315 HWY_INLINE Mask<DFromV<V> > CompareLanes(V
a, V b)
const {
320 HWY_INLINE Vec<D> First(D d,
const Vec<D>
a,
const Vec<D> b)
const {
325 HWY_INLINE Vec<D> Last(D d,
const Vec<D>
a,
const Vec<D> b)
const {
341 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v)
const {
347struct OrderDescendingKV128 :
public KeyValue128 {
348 using Order = SortDescending;
350 HWY_INLINE bool Compare1(
const LaneType*
a,
const LaneType* b) {
355 HWY_INLINE Mask<D> Compare(D d, Vec<D>
a, Vec<D> b)
const {
361 HWY_INLINE Mask<DFromV<V> > CompareLanes(V
a, V b)
const {
366 HWY_INLINE Vec<D> First(D d,
const Vec<D>
a,
const Vec<D> b)
const {
371 HWY_INLINE Vec<D> Last(D d,
const Vec<D>
a,
const Vec<D> b)
const {
387 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v)
const {
395class Traits128 :
public Base {
397#if HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_SVE_256
402 const Base* base =
static_cast<const Base*
>(
this);
403 const Mask<D> eqHL = Eq(
a, b);
404 const Vec<D> ltHL =
VecFromMask(d, base->CompareLanes(
a, b));
405#if HWY_TARGET == HWY_SVE_256
408 const Vec<D> ltLX = ShiftLeftLanes<1>(ltHL);
418#if HWY_TARGET == HWY_SVE_256
419 return svdup_lane_u64(v, 3);
420#elif HWY_TARGET <= HWY_AVX3
421 return V{_mm512_permutex_epi64(
v.raw, _MM_SHUFFLE(3, 3, 3, 3))};
423 return V{_mm256_permute4x64_epi64(
v.raw, _MM_SHUFFLE(3, 3, 3, 3))};
432 const Base* base =
static_cast<const Base*
>(
this);
433 const size_t N =
Lanes(d);
435 v = base->SetKey(d,
buf + 0);
436 for (
size_t i = base->LanesPerKey(); i < N; i += base->LanesPerKey()) {
437 v = base->First(d, v, base->SetKey(d,
buf + i));
445 const Base* base =
static_cast<const Base*
>(
this);
446 const size_t N =
Lanes(d);
448 v = base->SetKey(d,
buf + 0);
449 for (
size_t i = base->LanesPerKey(); i < N; i += base->LanesPerKey()) {
450 v = base->Last(d, v, base->SetKey(d,
buf + i));
456 HWY_INLINE void Sort2(D d, Vec<D>&
a, Vec<D>& b)
const {
457 const Base* base =
static_cast<const Base*
>(
this);
459 const Vec<D> a_copy =
a;
460 const auto lt = base->Compare(d,
a, b);
467 HWY_INLINE Vec<D> SortPairsDistance1(D d, Vec<D> v)
const {
468 const Base* base =
static_cast<const Base*
>(
this);
469 Vec<D> swapped = base->ReverseKeys2(d, v);
471#if HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_SVE_256
472 const Vec<D> select = ReplicateTop4x(CompareTop(d, v, swapped));
475 Sort2(d, v, swapped);
476 return base->OddEvenKeys(swapped, v);
482 HWY_INLINE Vec<D> SortPairsReverse4(D d, Vec<D> v)
const {
483 const Base* base =
static_cast<const Base*
>(
this);
484 Vec<D> swapped = base->ReverseKeys4(d, v);
487#if HWY_TARGET <= HWY_AVX3
488 const Vec512<uint64_t> outHx = CompareTop(d, v, swapped);
491 alignas(64) uint64_t kIndices[8] = {7, 7, 5, 5, 5, 5, 7, 7};
492 const Vec512<uint64_t> select =
496 Sort2(d, v, swapped);
497 return base->OddEvenPairs(d, swapped, v);
503 HWY_INLINE Vec<D> SortPairsDistance4(D, Vec<D>)
const {
uint8_t buf
Definition BitIO.h:84
#define HWY_RESTRICT
Definition base.h:64
#define HWY_INLINE
Definition base.h:70
#define HWY_DASSERT(condition)
Definition base.h:238
#define HWY_MAYBE_UNUSED
Definition base.h:82
#define HWY_ASSERT(condition)
Definition base.h:192
uint32_t a
only used by MQ decoder
Definition mqc.h:48
HWY_INLINE Vec128< T, N > Add(hwy::NonFloatTag, Vec128< T, N > a, Vec128< T, N > b)
Definition emu128-inl.h:535
HWY_INLINE Vec128< T, N > Sub(hwy::NonFloatTag, Vec128< T, N > a, Vec128< T, N > b)
Definition emu128-inl.h:545
d
Definition rvv-inl.h:1998
HWY_API Vec128< T, N > OddEvenBlocks(Vec128< T, N >, Vec128< T, N > even)
Definition arm_neon-inl.h:4697
HWY_INLINE Mask128< T, N > Ne128Upper(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition arm_neon-inl.h:6685
HWY_API bool AllTrue(const Full128< T > d, const Mask128< T > m)
Definition arm_neon-inl.h:5716
HWY_INLINE Mask128< T, N > Eq128Upper(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition arm_neon-inl.h:6668
HWY_INLINE Mask128< T, N > Ne128(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition arm_neon-inl.h:6677
HWY_INLINE Mask128< T, N > Eq128(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition arm_neon-inl.h:6660
HWY_API Vec128< uint64_t > CompressBlocksNot(Vec128< uint64_t > v, Mask128< uint64_t >)
Definition arm_neon-inl.h:6226
HWY_API Vec128< T, N > IfVecThenElse(Vec128< T, N > mask, Vec128< T, N > yes, Vec128< T, N > no)
Definition arm_neon-inl.h:2047
HWY_API Vec128< T, N > VecFromMask(Simd< T, N, 0 > d, const Mask128< T, N > v)
Definition arm_neon-inl.h:2223
HWY_API Vec128< T, N > DupEven(Vec128< T, N > v)
Definition arm_neon-inl.h:4646
HWY_API Vec128< T, N > IfThenElseZero(const Mask128< T, N > mask, const Vec128< T, N > yes)
Definition arm_neon-inl.h:2253
HWY_API constexpr size_t Lanes(Simd< T, N, kPow2 >)
Definition arm_sve-inl.h:243
HWY_API Vec128< T, N > IfThenElse(const Mask128< T, N > mask, const Vec128< T, N > yes, const Vec128< T, N > no)
Definition emu128-inl.h:303
HWY_API Vec128< T, N > TableLookupLanes(Vec128< T, N > v, Indices128< T, N > idx)
Definition arm_neon-inl.h:4019
HWY_API void StoreU(const Vec128< uint8_t > v, Full128< uint8_t >, uint8_t *HWY_RESTRICT unaligned)
Definition arm_neon-inl.h:2772
HWY_INLINE VFromD< D > Min128Upper(D d, const VFromD< D > a, const VFromD< D > b)
Definition arm_neon-inl.h:6705
HWY_API Vec128< T, N > SwapAdjacentBlocks(Vec128< T, N > v)
Definition arm_neon-inl.h:4704
HWY_INLINE VFromD< D > Min128(D d, const VFromD< D > a, const VFromD< D > b)
Definition arm_neon-inl.h:6695
svuint16_t Set(Simd< bfloat16_t, N, kPow2 > d, bfloat16_t arg)
Definition arm_sve-inl.h:322
HWY_INLINE VFromD< D > Max128Upper(D d, const VFromD< D > a, const VFromD< D > b)
Definition arm_neon-inl.h:6710
HWY_INLINE Mask128< T, N > Lt128(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition arm_neon-inl.h:6623
HWY_API Vec128< uint8_t > LoadU(Full128< uint8_t >, const uint8_t *HWY_RESTRICT unaligned)
Definition arm_neon-inl.h:2591
HWY_API Vec128< T, N > OrAnd(Vec128< T, N > o, Vec128< T, N > a1, Vec128< T, N > a2)
Definition arm_neon-inl.h:2040
HWY_API Vec128< T, N > ConcatUpperLower(Simd< T, N, 0 > d, Vec128< T, N > hi, Vec128< T, N > lo)
Definition arm_neon-inl.h:4570
HWY_API Vec128< T, N > BitCast(Simd< T, N, 0 > d, Vec128< FromT, N *sizeof(T)/sizeof(FromT)> v)
Definition arm_neon-inl.h:997
HWY_API Vec128< T, N > Zero(Simd< T, N, 0 > d)
Definition arm_neon-inl.h:1020
HWY_INLINE VFromD< D > Max128(D d, const VFromD< D > a, const VFromD< D > b)
Definition arm_neon-inl.h:6700
HWY_API Indices128< T, N > SetTableIndices(Simd< T, N, 0 > d, const TI *idx)
Definition arm_neon-inl.h:4013
HWY_API Vec128< T, N > LoadDup128(Simd< T, N, 0 > d, const T *const HWY_RESTRICT p)
Definition arm_neon-inl.h:2765
HWY_API Vec128< T, N > OddEven(const Vec128< T, N > a, const Vec128< T, N > b)
Definition arm_neon-inl.h:4678
decltype(Zero(D())) Vec
Definition generic_ops-inl.h:40
N
Definition rvv-inl.h:1998
HWY_API Vec128< T > ReverseBlocks(Full128< T >, const Vec128< T > v)
Definition arm_neon-inl.h:4712
decltype(GetLane(V())) LaneType
Definition generic_ops-inl.h:33
HWY_API void Store(Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT aligned)
Definition arm_neon-inl.h:2934
HWY_INLINE Mask128< T, N > Lt128Upper(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition arm_neon-inl.h:6651
const vfloat64m1_t v
Definition rvv-inl.h:1998
Definition aligned_allocator.h:27
HWY_API constexpr T HighestValue()
Definition base.h:684
HWY_API constexpr T LowestValue()
Definition base.h:671
#define HWY_NAMESPACE
Definition set_macros-inl.h:82