Grok 10.0.5
traits128-inl.h
Go to the documentation of this file.
1// Copyright 2021 Google LLC
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16// Per-target
17#if defined(HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE) == \
18 defined(HWY_TARGET_TOGGLE)
19#ifdef HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
20#undef HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
21#else
22#define HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
23#endif
24
25#include <string>
26
28#include "hwy/contrib/sort/vqsort.h" // SortDescending
29#include "hwy/highway.h"
30
32namespace hwy {
33namespace HWY_NAMESPACE {
34namespace detail {
35
36#if VQSORT_ENABLED || HWY_IDE
37
38// Highway does not provide a lane type for 128-bit keys, so we use uint64_t
39// along with an abstraction layer for single-lane vs. lane-pair, which is
40// independent of the order.
41struct KeyAny128 {
42 static constexpr bool Is128() { return true; }
43 constexpr size_t LanesPerKey() const { return 2; }
44
45 // What type bench_sort should allocate for generating inputs.
46 using LaneType = uint64_t;
47 // KeyType and KeyString are defined by derived classes.
48
49 HWY_INLINE void Swap(LaneType* a, LaneType* b) const {
50 const FixedTag<LaneType, 2> d;
51 const auto temp = LoadU(d, a);
52 StoreU(LoadU(d, b), d, a);
53 StoreU(temp, d, b);
54 }
55
56 template <class V, class M>
57 HWY_INLINE V CompressKeys(V keys, M mask) const {
58 return CompressBlocksNot(keys, mask);
59 }
60
61 template <class D>
62 HWY_INLINE Vec<D> SetKey(D d, const TFromD<D>* key) const {
63 return LoadDup128(d, key);
64 }
65
66 template <class D>
67 HWY_INLINE Vec<D> ReverseKeys(D d, Vec<D> v) const {
68 return ReverseBlocks(d, v);
69 }
70
71 template <class D>
72 HWY_INLINE Vec<D> ReverseKeys2(D /* tag */, const Vec<D> v) const {
73 return SwapAdjacentBlocks(v);
74 }
75
76 // Only called for 4 keys because we do not support >512-bit vectors.
77 template <class D>
78 HWY_INLINE Vec<D> ReverseKeys4(D d, const Vec<D> v) const {
79 HWY_DASSERT(Lanes(d) <= 64 / sizeof(TFromD<D>));
80 return ReverseKeys(d, v);
81 }
82
83 // Only called for 4 keys because we do not support >512-bit vectors.
84 template <class D>
85 HWY_INLINE Vec<D> OddEvenPairs(D d, const Vec<D> odd,
86 const Vec<D> even) const {
87 HWY_DASSERT(Lanes(d) <= 64 / sizeof(TFromD<D>));
88 return ConcatUpperLower(d, odd, even);
89 }
90
91 template <class V>
92 HWY_INLINE V OddEvenKeys(const V odd, const V even) const {
93 return OddEvenBlocks(odd, even);
94 }
95
96 template <class D>
97 HWY_INLINE Vec<D> ReverseKeys8(D, Vec<D>) const {
98 HWY_ASSERT(0); // not supported: would require 1024-bit vectors
99 }
100
101 template <class D>
102 HWY_INLINE Vec<D> ReverseKeys16(D, Vec<D>) const {
103 HWY_ASSERT(0); // not supported: would require 2048-bit vectors
104 }
105
106 // This is only called for 8/16 col networks (not supported).
107 template <class D>
108 HWY_INLINE Vec<D> SwapAdjacentPairs(D, Vec<D>) const {
109 HWY_ASSERT(0);
110 }
111
112 // This is only called for 16 col networks (not supported).
113 template <class D>
114 HWY_INLINE Vec<D> SwapAdjacentQuads(D, Vec<D>) const {
115 HWY_ASSERT(0);
116 }
117
118 // This is only called for 8 col networks (not supported).
119 template <class D>
120 HWY_INLINE Vec<D> OddEvenQuads(D, Vec<D>, Vec<D>) const {
121 HWY_ASSERT(0);
122 }
123};
124
125// Base class shared between OrderAscending128, OrderDescending128.
126struct Key128 : public KeyAny128 {
127 // False indicates the entire key should be compared. KV means key-value.
128 static constexpr bool IsKV() { return false; }
129
130 // What type to pass to Sorter::operator().
131 using KeyType = hwy::uint128_t;
132
133 std::string KeyString() const { return "U128"; }
134
135 template <class D>
136 HWY_INLINE Mask<D> EqualKeys(D d, Vec<D> a, Vec<D> b) const {
137 return Eq128(d, a, b);
138 }
139
140 template <class D>
141 HWY_INLINE Mask<D> NotEqualKeys(D d, Vec<D> a, Vec<D> b) const {
142 return Ne128(d, a, b);
143 }
144
145 // For keys=entire 128 bits, any difference counts.
146 template <class D>
147 HWY_INLINE bool NoKeyDifference(D /*tag*/, Vec<D> diff) const {
148 // Must avoid floating-point comparisons (for -0)
149 const RebindToUnsigned<D> du;
150 return AllTrue(du, Eq(BitCast(du, diff), Zero(du)));
151 }
152
153 HWY_INLINE bool Equal1(const LaneType* a, const LaneType* b) const {
154 return a[0] == b[0] && a[1] == b[1];
155 }
156};
157
158// Anything order-related depends on the key traits *and* the order (see
159// FirstOfLanes). We cannot implement just one Compare function because Lt128
160// only compiles if the lane type is u64. Thus we need either overloaded
161// functions with a tag type, class specializations, or separate classes.
162// We avoid overloaded functions because we want all functions to be callable
163// from a SortTraits without per-function wrappers. Specializing would work, but
164// we are anyway going to specialize at a higher level.
165struct OrderAscending128 : public Key128 {
166 using Order = SortAscending;
167
168 HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) {
169 return (a[1] == b[1]) ? a[0] < b[0] : a[1] < b[1];
170 }
171
172 template <class D>
173 HWY_INLINE Mask<D> Compare(D d, Vec<D> a, Vec<D> b) const {
174 return Lt128(d, a, b);
175 }
176
177 // Used by CompareTop
178 template <class V>
179 HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b) const {
180 return Lt(a, b);
181 }
182
183 template <class D>
184 HWY_INLINE Vec<D> First(D d, const Vec<D> a, const Vec<D> b) const {
185 return Min128(d, a, b);
186 }
187
188 template <class D>
189 HWY_INLINE Vec<D> Last(D d, const Vec<D> a, const Vec<D> b) const {
190 return Max128(d, a, b);
191 }
192
193 // Same as for regular lanes because 128-bit lanes are u64.
194 template <class D>
195 HWY_INLINE Vec<D> FirstValue(D d) const {
196 return Set(d, hwy::LowestValue<TFromD<D> >());
197 }
198
199 template <class D>
200 HWY_INLINE Vec<D> LastValue(D d) const {
201 return Set(d, hwy::HighestValue<TFromD<D> >());
202 }
203
204 template <class D>
205 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
206 const Vec<D> k0 = Zero(d);
207 const Vec<D> k1 = OddEven(k0, Set(d, uint64_t{1}));
208 const Mask<D> borrow = Eq(v, k0); // don't-care, lo == 0
209 // lo == 0? 1 : 0, 0
210 const Vec<D> adjust = ShiftLeftLanes<1>(IfThenElseZero(borrow, k1));
211 return Sub(Sub(v, k1), adjust);
212 }
213};
214
215struct OrderDescending128 : public Key128 {
216 using Order = SortDescending;
217
218 HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) {
219 return (a[1] == b[1]) ? b[0] < a[0] : b[1] < a[1];
220 }
221
222 template <class D>
223 HWY_INLINE Mask<D> Compare(D d, Vec<D> a, Vec<D> b) const {
224 return Lt128(d, b, a);
225 }
226
227 // Used by CompareTop
228 template <class V>
229 HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b) const {
230 return Lt(b, a);
231 }
232
233 template <class D>
234 HWY_INLINE Vec<D> First(D d, const Vec<D> a, const Vec<D> b) const {
235 return Max128(d, a, b);
236 }
237
238 template <class D>
239 HWY_INLINE Vec<D> Last(D d, const Vec<D> a, const Vec<D> b) const {
240 return Min128(d, a, b);
241 }
242
243 // Same as for regular lanes because 128-bit lanes are u64.
244 template <class D>
245 HWY_INLINE Vec<D> FirstValue(D d) const {
246 return Set(d, hwy::HighestValue<TFromD<D> >());
247 }
248
249 template <class D>
250 HWY_INLINE Vec<D> LastValue(D d) const {
251 return Set(d, hwy::LowestValue<TFromD<D> >());
252 }
253
254 template <class D>
255 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
256 const Vec<D> k1 = OddEven(Zero(d), Set(d, uint64_t{1}));
257 const Vec<D> added = Add(v, k1);
258 const Mask<D> overflowed = Lt(added, v); // false, overflowed
259 // overflowed? 1 : 0, 0
260 const Vec<D> adjust = ShiftLeftLanes<1>(IfThenElseZero(overflowed, k1));
261 return Add(added, adjust);
262 }
263};
264
265// Base class shared between OrderAscendingKV128, OrderDescendingKV128.
266struct KeyValue128 : public KeyAny128 {
267 // True indicates only part of the key (the more significant lane) should be
268 // compared. KV stands for key-value.
269 static constexpr bool IsKV() { return true; }
270
271 // What type to pass to Sorter::operator().
272 using KeyType = K64V64;
273
274 std::string KeyString() const { return "KV128"; }
275
276 template <class D>
277 HWY_INLINE Mask<D> EqualKeys(D d, Vec<D> a, Vec<D> b) const {
278 return Eq128Upper(d, a, b);
279 }
280
281 template <class D>
282 HWY_INLINE Mask<D> NotEqualKeys(D d, Vec<D> a, Vec<D> b) const {
283 return Ne128Upper(d, a, b);
284 }
285
286 // Only count differences in the actual key, not the value.
287 template <class D>
288 HWY_INLINE bool NoKeyDifference(D /*tag*/, Vec<D> diff) const {
289 // Must avoid floating-point comparisons (for -0)
290 const RebindToUnsigned<D> du;
291 const Vec<decltype(du)> zero = Zero(du);
292 const Vec<decltype(du)> keys = OddEven(diff, zero); // clear values
293 return AllTrue(du, Eq(BitCast(du, keys), zero));
294 }
295
296 HWY_INLINE bool Equal1(const LaneType* a, const LaneType* b) const {
297 return a[1] == b[1];
298 }
299};
300
301struct OrderAscendingKV128 : public KeyValue128 {
302 using Order = SortAscending;
303
304 HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) {
305 return a[1] < b[1];
306 }
307
308 template <class D>
309 HWY_INLINE Mask<D> Compare(D d, Vec<D> a, Vec<D> b) const {
310 return Lt128Upper(d, a, b);
311 }
312
313 // Used by CompareTop
314 template <class V>
315 HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b) const {
316 return Lt(a, b);
317 }
318
319 template <class D>
320 HWY_INLINE Vec<D> First(D d, const Vec<D> a, const Vec<D> b) const {
321 return Min128Upper(d, a, b);
322 }
323
324 template <class D>
325 HWY_INLINE Vec<D> Last(D d, const Vec<D> a, const Vec<D> b) const {
326 return Max128Upper(d, a, b);
327 }
328
329 // Same as for regular lanes because 128-bit lanes are u64.
330 template <class D>
331 HWY_INLINE Vec<D> FirstValue(D d) const {
332 return Set(d, hwy::LowestValue<TFromD<D> >());
333 }
334
335 template <class D>
336 HWY_INLINE Vec<D> LastValue(D d) const {
337 return Set(d, hwy::HighestValue<TFromD<D> >());
338 }
339
340 template <class D>
341 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
342 const Vec<D> k1 = OddEven(Set(d, uint64_t{1}), Zero(d));
343 return Sub(v, k1);
344 }
345};
346
347struct OrderDescendingKV128 : public KeyValue128 {
348 using Order = SortDescending;
349
350 HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) {
351 return b[1] < a[1];
352 }
353
354 template <class D>
355 HWY_INLINE Mask<D> Compare(D d, Vec<D> a, Vec<D> b) const {
356 return Lt128Upper(d, b, a);
357 }
358
359 // Used by CompareTop
360 template <class V>
361 HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b) const {
362 return Lt(b, a);
363 }
364
365 template <class D>
366 HWY_INLINE Vec<D> First(D d, const Vec<D> a, const Vec<D> b) const {
367 return Max128Upper(d, a, b);
368 }
369
370 template <class D>
371 HWY_INLINE Vec<D> Last(D d, const Vec<D> a, const Vec<D> b) const {
372 return Min128Upper(d, a, b);
373 }
374
375 // Same as for regular lanes because 128-bit lanes are u64.
376 template <class D>
377 HWY_INLINE Vec<D> FirstValue(D d) const {
378 return Set(d, hwy::HighestValue<TFromD<D> >());
379 }
380
381 template <class D>
382 HWY_INLINE Vec<D> LastValue(D d) const {
383 return Set(d, hwy::LowestValue<TFromD<D> >());
384 }
385
386 template <class D>
387 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
388 const Vec<D> k1 = OddEven(Set(d, uint64_t{1}), Zero(d));
389 return Add(v, k1);
390 }
391};
392
393// Shared code that depends on Order.
394template <class Base>
395class Traits128 : public Base {
396 // Special case for >= 256 bit vectors
397#if HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_SVE_256
398 // Returns vector with only the top u64 lane valid. Useful when the next step
399 // is to replicate the mask anyway.
400 template <class D>
401 HWY_INLINE HWY_MAYBE_UNUSED Vec<D> CompareTop(D d, Vec<D> a, Vec<D> b) const {
402 const Base* base = static_cast<const Base*>(this);
403 const Mask<D> eqHL = Eq(a, b);
404 const Vec<D> ltHL = VecFromMask(d, base->CompareLanes(a, b));
405#if HWY_TARGET == HWY_SVE_256
406 return IfThenElse(eqHL, DupEven(ltHL), ltHL);
407#else
408 const Vec<D> ltLX = ShiftLeftLanes<1>(ltHL);
409 return OrAnd(ltHL, VecFromMask(d, eqHL), ltLX);
410#endif
411 }
412
413 // We want to swap 2 u128, i.e. 4 u64 lanes, based on the 0 or FF..FF mask in
414 // the most-significant of those lanes (the result of CompareTop), so
415 // replicate it 4x. Only called for >= 256-bit vectors.
416 template <class V>
417 HWY_INLINE V ReplicateTop4x(V v) const {
418#if HWY_TARGET == HWY_SVE_256
419 return svdup_lane_u64(v, 3);
420#elif HWY_TARGET <= HWY_AVX3
421 return V{_mm512_permutex_epi64(v.raw, _MM_SHUFFLE(3, 3, 3, 3))};
422#else // AVX2
423 return V{_mm256_permute4x64_epi64(v.raw, _MM_SHUFFLE(3, 3, 3, 3))};
424#endif
425 }
426#endif // HWY_TARGET
427
428 public:
429 template <class D>
430 HWY_INLINE Vec<D> FirstOfLanes(D d, Vec<D> v,
431 TFromD<D>* HWY_RESTRICT buf) const {
432 const Base* base = static_cast<const Base*>(this);
433 const size_t N = Lanes(d);
434 Store(v, d, buf);
435 v = base->SetKey(d, buf + 0); // result must be broadcasted
436 for (size_t i = base->LanesPerKey(); i < N; i += base->LanesPerKey()) {
437 v = base->First(d, v, base->SetKey(d, buf + i));
438 }
439 return v;
440 }
441
442 template <class D>
443 HWY_INLINE Vec<D> LastOfLanes(D d, Vec<D> v,
444 TFromD<D>* HWY_RESTRICT buf) const {
445 const Base* base = static_cast<const Base*>(this);
446 const size_t N = Lanes(d);
447 Store(v, d, buf);
448 v = base->SetKey(d, buf + 0); // result must be broadcasted
449 for (size_t i = base->LanesPerKey(); i < N; i += base->LanesPerKey()) {
450 v = base->Last(d, v, base->SetKey(d, buf + i));
451 }
452 return v;
453 }
454
455 template <class D>
456 HWY_INLINE void Sort2(D d, Vec<D>& a, Vec<D>& b) const {
457 const Base* base = static_cast<const Base*>(this);
458
459 const Vec<D> a_copy = a;
460 const auto lt = base->Compare(d, a, b);
461 a = IfThenElse(lt, a, b);
462 b = IfThenElse(lt, b, a_copy);
463 }
464
465 // Conditionally swaps even-numbered lanes with their odd-numbered neighbor.
466 template <class D>
467 HWY_INLINE Vec<D> SortPairsDistance1(D d, Vec<D> v) const {
468 const Base* base = static_cast<const Base*>(this);
469 Vec<D> swapped = base->ReverseKeys2(d, v);
470
471#if HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_SVE_256
472 const Vec<D> select = ReplicateTop4x(CompareTop(d, v, swapped));
473 return IfVecThenElse(select, swapped, v);
474#else
475 Sort2(d, v, swapped);
476 return base->OddEvenKeys(swapped, v);
477#endif
478 }
479
480 // Swaps with the vector formed by reversing contiguous groups of 4 keys.
481 template <class D>
482 HWY_INLINE Vec<D> SortPairsReverse4(D d, Vec<D> v) const {
483 const Base* base = static_cast<const Base*>(this);
484 Vec<D> swapped = base->ReverseKeys4(d, v);
485
486 // Only specialize for AVX3 because this requires 512-bit vectors.
487#if HWY_TARGET <= HWY_AVX3
488 const Vec512<uint64_t> outHx = CompareTop(d, v, swapped);
489 // Similar to ReplicateTop4x, we want to gang together 2 comparison results
490 // (4 lanes). They are not contiguous, so use permute to replicate 4x.
491 alignas(64) uint64_t kIndices[8] = {7, 7, 5, 5, 5, 5, 7, 7};
492 const Vec512<uint64_t> select =
493 TableLookupLanes(outHx, SetTableIndices(d, kIndices));
494 return IfVecThenElse(select, swapped, v);
495#else
496 Sort2(d, v, swapped);
497 return base->OddEvenPairs(d, swapped, v);
498#endif
499 }
500
501 // Conditionally swaps lane 0 with 4, 1 with 5 etc.
502 template <class D>
503 HWY_INLINE Vec<D> SortPairsDistance4(D, Vec<D>) const {
504 // Only used by Merge16, which would require 2048 bit vectors (unsupported).
505 HWY_ASSERT(0);
506 }
507};
508
509#endif // VQSORT_ENABLED
510
511} // namespace detail
512// NOLINTNEXTLINE(google-readability-namespace-comments)
513} // namespace HWY_NAMESPACE
514} // namespace hwy
516
517#endif // HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
uint8_t buf
Definition BitIO.h:84
#define HWY_RESTRICT
Definition base.h:64
#define HWY_INLINE
Definition base.h:70
#define HWY_DASSERT(condition)
Definition base.h:238
#define HWY_MAYBE_UNUSED
Definition base.h:82
#define HWY_ASSERT(condition)
Definition base.h:192
uint32_t a
only used by MQ decoder
Definition mqc.h:48
HWY_INLINE Vec128< T, N > Add(hwy::NonFloatTag, Vec128< T, N > a, Vec128< T, N > b)
Definition emu128-inl.h:535
HWY_INLINE Vec128< T, N > Sub(hwy::NonFloatTag, Vec128< T, N > a, Vec128< T, N > b)
Definition emu128-inl.h:545
d
Definition rvv-inl.h:1998
HWY_API Vec128< T, N > OddEvenBlocks(Vec128< T, N >, Vec128< T, N > even)
Definition arm_neon-inl.h:4697
HWY_INLINE Mask128< T, N > Ne128Upper(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition arm_neon-inl.h:6685
HWY_API bool AllTrue(const Full128< T > d, const Mask128< T > m)
Definition arm_neon-inl.h:5716
HWY_INLINE Mask128< T, N > Eq128Upper(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition arm_neon-inl.h:6668
HWY_INLINE Mask128< T, N > Ne128(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition arm_neon-inl.h:6677
HWY_INLINE Mask128< T, N > Eq128(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition arm_neon-inl.h:6660
HWY_API Vec128< uint64_t > CompressBlocksNot(Vec128< uint64_t > v, Mask128< uint64_t >)
Definition arm_neon-inl.h:6226
HWY_API Vec128< T, N > IfVecThenElse(Vec128< T, N > mask, Vec128< T, N > yes, Vec128< T, N > no)
Definition arm_neon-inl.h:2047
HWY_API Vec128< T, N > VecFromMask(Simd< T, N, 0 > d, const Mask128< T, N > v)
Definition arm_neon-inl.h:2223
HWY_API Vec128< T, N > DupEven(Vec128< T, N > v)
Definition arm_neon-inl.h:4646
HWY_API Vec128< T, N > IfThenElseZero(const Mask128< T, N > mask, const Vec128< T, N > yes)
Definition arm_neon-inl.h:2253
HWY_API constexpr size_t Lanes(Simd< T, N, kPow2 >)
Definition arm_sve-inl.h:243
HWY_API Vec128< T, N > IfThenElse(const Mask128< T, N > mask, const Vec128< T, N > yes, const Vec128< T, N > no)
Definition emu128-inl.h:303
HWY_API Vec128< T, N > TableLookupLanes(Vec128< T, N > v, Indices128< T, N > idx)
Definition arm_neon-inl.h:4019
HWY_API void StoreU(const Vec128< uint8_t > v, Full128< uint8_t >, uint8_t *HWY_RESTRICT unaligned)
Definition arm_neon-inl.h:2772
HWY_INLINE VFromD< D > Min128Upper(D d, const VFromD< D > a, const VFromD< D > b)
Definition arm_neon-inl.h:6705
HWY_API Vec128< T, N > SwapAdjacentBlocks(Vec128< T, N > v)
Definition arm_neon-inl.h:4704
HWY_INLINE VFromD< D > Min128(D d, const VFromD< D > a, const VFromD< D > b)
Definition arm_neon-inl.h:6695
svuint16_t Set(Simd< bfloat16_t, N, kPow2 > d, bfloat16_t arg)
Definition arm_sve-inl.h:322
HWY_INLINE VFromD< D > Max128Upper(D d, const VFromD< D > a, const VFromD< D > b)
Definition arm_neon-inl.h:6710
HWY_INLINE Mask128< T, N > Lt128(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition arm_neon-inl.h:6623
HWY_API Vec128< uint8_t > LoadU(Full128< uint8_t >, const uint8_t *HWY_RESTRICT unaligned)
Definition arm_neon-inl.h:2591
HWY_API Vec128< T, N > OrAnd(Vec128< T, N > o, Vec128< T, N > a1, Vec128< T, N > a2)
Definition arm_neon-inl.h:2040
HWY_API Vec128< T, N > ConcatUpperLower(Simd< T, N, 0 > d, Vec128< T, N > hi, Vec128< T, N > lo)
Definition arm_neon-inl.h:4570
HWY_API Vec128< T, N > BitCast(Simd< T, N, 0 > d, Vec128< FromT, N *sizeof(T)/sizeof(FromT)> v)
Definition arm_neon-inl.h:997
HWY_API Vec128< T, N > Zero(Simd< T, N, 0 > d)
Definition arm_neon-inl.h:1020
HWY_INLINE VFromD< D > Max128(D d, const VFromD< D > a, const VFromD< D > b)
Definition arm_neon-inl.h:6700
HWY_API Indices128< T, N > SetTableIndices(Simd< T, N, 0 > d, const TI *idx)
Definition arm_neon-inl.h:4013
HWY_API Vec128< T, N > LoadDup128(Simd< T, N, 0 > d, const T *const HWY_RESTRICT p)
Definition arm_neon-inl.h:2765
HWY_API Vec128< T, N > OddEven(const Vec128< T, N > a, const Vec128< T, N > b)
Definition arm_neon-inl.h:4678
decltype(Zero(D())) Vec
Definition generic_ops-inl.h:40
N
Definition rvv-inl.h:1998
HWY_API Vec128< T > ReverseBlocks(Full128< T >, const Vec128< T > v)
Definition arm_neon-inl.h:4712
decltype(GetLane(V())) LaneType
Definition generic_ops-inl.h:33
HWY_API void Store(Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT aligned)
Definition arm_neon-inl.h:2934
HWY_INLINE Mask128< T, N > Lt128Upper(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition arm_neon-inl.h:6651
const vfloat64m1_t v
Definition rvv-inl.h:1998
Definition aligned_allocator.h:27
HWY_API constexpr T HighestValue()
Definition base.h:684
HWY_API constexpr T LowestValue()
Definition base.h:671
#define HWY_NAMESPACE
Definition set_macros-inl.h:82
Definition base.h:309
HWY_AFTER_NAMESPACE()
HWY_BEFORE_NAMESPACE()