BitMagic-C++
|
Compute functions for SSE4.2 SIMD instruction set (internal) More...
#include <mmintrin.h>
#include <emmintrin.h>
#include <smmintrin.h>
#include <nmmintrin.h>
#include <immintrin.h>
#include "bmdef.h"
#include "bmsse_util.h"
#include "bmutil.h"
Go to the source code of this file.
Namespaces | |
namespace | bm |
Macros | |
#define | VECT_XOR_ARR_2_MASK(dst, src, src_end, mask) |
#define | VECT_ANDNOT_ARR_2_MASK(dst, src, src_end, mask) |
#define | VECT_BITCOUNT(first, last) |
#define | VECT_BITCOUNT_AND(first, last, mask) |
#define | VECT_BITCOUNT_OR(first, last, mask) |
#define | VECT_BITCOUNT_XOR(first, last, mask) |
#define | VECT_BITCOUNT_SUB(first, last, mask) |
#define | VECT_INVERT_BLOCK(first) |
#define | VECT_AND_BLOCK(dst, src) |
#define | VECT_AND_DIGEST(dst, src) |
#define | VECT_AND_DIGEST_5WAY(dst, src1, src2, src3, src4) |
#define | VECT_AND_DIGEST_2WAY(dst, src1, src2) |
#define | VECT_OR_BLOCK(dst, src) |
#define | VECT_OR_BLOCK_2WAY(dst, src1, src2) |
#define | VECT_OR_BLOCK_3WAY(dst, src1, src2) |
#define | VECT_OR_BLOCK_5WAY(dst, src1, src2, src3, src4) |
#define | VECT_SUB_BLOCK(dst, src) |
#define | VECT_SUB_DIGEST(dst, src) |
#define | VECT_SUB_DIGEST_2WAY(dst, src1, src2) |
#define | VECT_XOR_BLOCK(dst, src) |
#define | VECT_XOR_BLOCK_2WAY(dst, src1, src2) |
#define | VECT_COPY_BLOCK(dst, src) |
#define | VECT_STREAM_BLOCK(dst, src) |
#define | VECT_SET_BLOCK(dst, value) |
#define | VECT_IS_ZERO_BLOCK(dst) |
#define | VECT_IS_ONE_BLOCK(dst) |
#define | VECT_IS_DIGEST_ZERO(start) |
#define | VECT_BLOCK_SET_DIGEST(dst, val) |
#define | VECT_LOWER_BOUND_SCAN_U32(arr, target, from, to) |
#define | VECT_SHIFT_L1(b, acc, co) |
#define | VECT_SHIFT_R1(b, acc, co) |
#define | VECT_SHIFT_R1_AND(b, co, m, digest) |
#define | VECT_ARR_BLOCK_LOOKUP(idx, size, nb, start) |
#define | VECT_SET_BLOCK_BITS(block, idx, start, stop) |
#define | VECT_BLOCK_CHANGE(block, size) |
#define | VECT_BLOCK_XOR_CHANGE(block, xor_block, size) |
#define | VECT_BLOCK_CHANGE_BC(block, gc, bc) |
#define | VECT_BIT_FIND_FIRST(src, pos) |
#define | VECT_BIT_FIND_DIFF(src1, src2, pos) |
#define | VECT_BIT_BLOCK_XOR(t, src, src_xor, d) |
#define | VECT_GAP_BFIND(buf, pos, is_set) |
Functions | |
bm::id_t | bm::sse4_bit_count (const __m128i *block, const __m128i *block_end) |
BMFORCEINLINE unsigned | bm::op_xor (unsigned a, unsigned b) |
BMFORCEINLINE unsigned | bm::op_or (unsigned a, unsigned b) |
BMFORCEINLINE unsigned | bm::op_and (unsigned a, unsigned b) |
template<class Func > | |
bm::id_t | bm::sse4_bit_count_op (const __m128i *BMRESTRICT block, const __m128i *BMRESTRICT block_end, const __m128i *BMRESTRICT mask_block, Func sse2_func) |
bool | bm::sse4_is_all_zero (const __m128i *BMRESTRICT block) |
check if block is all zero bits | |
bool | bm::sse4_is_digest_zero (const __m128i *BMRESTRICT block) |
check if digest stride is all zero bits | |
void | bm::sse4_block_set_digest (__m128i *dst, unsigned value) |
set digest stride to 0xFF.. or 0x0 value | |
unsigned | bm::sse4_and_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) |
AND blocks2 dst &= *src. | |
bool | bm::sse4_and_digest (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) |
AND block digest stride dst &= *src. | |
bool | bm::sse4_and_digest_2way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) |
AND block digest stride dst = *src1 & src2. | |
bool | bm::sse4_and_digest_5way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2, const __m128i *BMRESTRICT src3, const __m128i *BMRESTRICT src4) |
AND block digest stride. | |
bool | bm::sse4_sub_digest (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) |
SUB (AND NOT) block digest stride dst &= ~*src. | |
bool | bm::sse4_sub_digest_2way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) |
2-operand SUB (AND NOT) block digest stride dst = src1 & ~*src2 | |
bool | bm::sse4_is_all_one (const __m128i *BMRESTRICT block) |
check if block is all zero bits | |
BMFORCEINLINE bool | bm::sse42_test_all_one_wave (const void *ptr) |
check if SSE wave is all oxFFFF...FFF | |
BMFORCEINLINE bool | bm::sse42_test_all_zero_wave (const void *ptr) |
check if wave of pointers is all NULL | |
BMFORCEINLINE bool | bm::sse42_test_all_zero_wave2 (const void *ptr0, const void *ptr1) |
check if 2 waves of pointers are all NULL | |
BMFORCEINLINE bool | bm::sse42_test_all_eq_wave2 (const void *ptr0, const void *ptr1) |
check if wave of 2 pointers are the same (null or FULL) | |
unsigned | bm::sse42_bit_block_calc_change (const __m128i *BMRESTRICT block, unsigned size) |
unsigned | bm::sse42_bit_block_calc_xor_change (const __m128i *BMRESTRICT block, const __m128i *BMRESTRICT xor_block, unsigned size) |
void | bm::sse42_bit_block_calc_change_bc (const __m128i *BMRESTRICT block, unsigned *gc, unsigned *bc) |
bool | bm::sse42_bit_find_first_diff (const __m128i *BMRESTRICT block1, const __m128i *BMRESTRICT block2, unsigned *pos) |
Find first bit which is different between two bit-blocks. | |
bool | bm::sse42_bit_find_first (const __m128i *BMRESTRICT block, unsigned *pos) |
Find first non-zero bit. | |
unsigned | bm::sse4_gap_find (const bm::gap_word_t *BMRESTRICT pbuf, const bm::gap_word_t pos, const unsigned size) |
unsigned | bm::sse42_gap_bfind (const unsigned short *BMRESTRICT buf, unsigned pos, unsigned *BMRESTRICT is_set) |
Hybrid binary search, starts as binary, then switches to linear scan. | |
unsigned | bm::sse42_gap_test (const unsigned short *BMRESTRICT buf, unsigned pos) |
Hybrid binary search, starts as binary, then switches to scan. | |
int | bm::sse42_cmpge_u32 (__m128i vect4, unsigned value) |
Experimental (test) function to do SIMD vector search (lower bound) in sorted, growing array. | |
unsigned | bm::sse4_lower_bound_scan_u32 (const unsigned *BMRESTRICT arr, unsigned target, unsigned from, unsigned to) |
lower bound (great or equal) linear scan in ascending order sorted array | |
unsigned | bm::sse42_idx_arr_block_lookup (const unsigned *idx, unsigned size, unsigned nb, unsigned start) |
void | bm::sse42_set_block_bits (bm::word_t *BMRESTRICT block, const unsigned *BMRESTRICT idx, unsigned start, unsigned stop) |
void | bm::sse4_bit_block_gather_scatter (unsigned *BMRESTRICT arr, const unsigned *BMRESTRICT blk, const unsigned *BMRESTRICT idx, unsigned size, unsigned start, unsigned bit_idx) |
bool | bm::sse42_shift_l1 (__m128i *block, unsigned *empty_acc, unsigned co1) |
block shift left by 1 | |
bool | bm::sse42_shift_r1 (__m128i *block, unsigned *empty_acc, unsigned co1) |
block shift right by 1 | |
bool | bm::sse42_shift_r1_and (__m128i *block, bm::word_t co1, const __m128i *BMRESTRICT mask_block, bm::id64_t *digest) |
block shift right by 1 plus AND | |
void | bm::sse42_bit_block_xor (bm::word_t *target_block, const bm::word_t *block, const bm::word_t *xor_block, bm::id64_t digest) |
Build partial XOR product of 2 bit-blocks using digest mask. | |
Compute functions for SSE4.2 SIMD instruction set (internal)
Definition in file bmsse4.h.
#define VECT_AND_BLOCK | ( | dst, | |
src ) |
Definition at line 1737 of file bmsse4.h.
Referenced by bm::bit_block_and().
#define VECT_AND_DIGEST | ( | dst, | |
src ) |
Definition at line 1740 of file bmsse4.h.
Referenced by bm::bit_block_and().
#define VECT_AND_DIGEST_2WAY | ( | dst, | |
src1, | |||
src2 ) |
Definition at line 1746 of file bmsse4.h.
Referenced by bm::bit_block_and_2way().
#define VECT_AND_DIGEST_5WAY | ( | dst, | |
src1, | |||
src2, | |||
src3, | |||
src4 ) |
Definition at line 1743 of file bmsse4.h.
Referenced by bm::bit_block_and_5way().
#define VECT_ANDNOT_ARR_2_MASK | ( | dst, | |
src, | |||
src_end, | |||
mask ) |
Definition at line 1716 of file bmsse4.h.
Referenced by bm::bit_andnot_arr_ffmask().
#define VECT_ARR_BLOCK_LOOKUP | ( | idx, | |
size, | |||
nb, | |||
start ) |
Definition at line 1809 of file bmsse4.h.
Referenced by bm::idx_arr_block_lookup_u32().
#define VECT_BIT_BLOCK_XOR | ( | t, | |
src, | |||
src_xor, | |||
d ) |
Definition at line 1832 of file bmsse4.h.
Referenced by bm::bit_block_xor().
#define VECT_BIT_FIND_DIFF | ( | src1, | |
src2, | |||
pos ) |
Definition at line 1829 of file bmsse4.h.
Referenced by bm::bit_find_first_diff().
#define VECT_BIT_FIND_FIRST | ( | src, | |
pos ) |
Definition at line 1826 of file bmsse4.h.
Referenced by bm::bit_find_first().
#define VECT_BITCOUNT | ( | first, | |
last ) |
Definition at line 1719 of file bmsse4.h.
Referenced by bm::bit_block_count().
#define VECT_BITCOUNT_AND | ( | first, | |
last, | |||
mask ) |
Definition at line 1722 of file bmsse4.h.
Referenced by bm::bit_block_and_count().
#define VECT_BITCOUNT_OR | ( | first, | |
last, | |||
mask ) |
Definition at line 1725 of file bmsse4.h.
Referenced by bm::bit_block_or_count().
#define VECT_BITCOUNT_SUB | ( | first, | |
last, | |||
mask ) |
Definition at line 1731 of file bmsse4.h.
Referenced by bm::bit_block_sub_count().
#define VECT_BITCOUNT_XOR | ( | first, | |
last, | |||
mask ) |
Definition at line 1728 of file bmsse4.h.
Referenced by bm::bit_block_xor_count().
#define VECT_BLOCK_CHANGE | ( | block, | |
size ) |
Definition at line 1815 of file bmsse4.h.
Referenced by bm::bit_block_calc_change(), and bm::compute_complexity_descr().
#define VECT_BLOCK_CHANGE_BC | ( | block, | |
gc, | |||
bc ) |
Definition at line 1822 of file bmsse4.h.
Referenced by bm::bit_block_change_bc().
#define VECT_BLOCK_SET_DIGEST | ( | dst, | |
val ) |
Definition at line 1794 of file bmsse4.h.
Referenced by bm::block_init_digest0().
#define VECT_BLOCK_XOR_CHANGE | ( | block, | |
xor_block, | |||
size ) |
Definition at line 1818 of file bmsse4.h.
Referenced by bm::bit_block_xor_change().
#define VECT_COPY_BLOCK | ( | dst, | |
src ) |
Definition at line 1776 of file bmsse4.h.
Referenced by bm::bit_block_copy().
#define VECT_GAP_BFIND | ( | buf, | |
pos, | |||
is_set ) |
Definition at line 1835 of file bmsse4.h.
Referenced by bm::gap_bfind().
#define VECT_INVERT_BLOCK | ( | first | ) |
Definition at line 1734 of file bmsse4.h.
Referenced by bm::bit_invert().
#define VECT_IS_DIGEST_ZERO | ( | start | ) |
Definition at line 1791 of file bmsse4.h.
Referenced by bm::calc_block_digest0(), and bm::update_block_digest0().
#define VECT_IS_ONE_BLOCK | ( | dst | ) |
Definition at line 1788 of file bmsse4.h.
Referenced by bm::is_bits_one().
#define VECT_IS_ZERO_BLOCK | ( | dst | ) |
Definition at line 1785 of file bmsse4.h.
Referenced by bm::bit_is_all_zero().
#define VECT_LOWER_BOUND_SCAN_U32 | ( | arr, | |
target, | |||
from, | |||
to ) |
Definition at line 1797 of file bmsse4.h.
Referenced by bm::lower_bound_linear_u32().
#define VECT_OR_BLOCK | ( | dst, | |
src ) |
Definition at line 1749 of file bmsse4.h.
Referenced by bm::bit_block_or().
#define VECT_OR_BLOCK_2WAY | ( | dst, | |
src1, | |||
src2 ) |
Definition at line 1752 of file bmsse4.h.
Referenced by bm::bit_block_or_2way().
#define VECT_OR_BLOCK_3WAY | ( | dst, | |
src1, | |||
src2 ) |
Definition at line 1755 of file bmsse4.h.
Referenced by bm::bit_block_or_3way().
#define VECT_OR_BLOCK_5WAY | ( | dst, | |
src1, | |||
src2, | |||
src3, | |||
src4 ) |
Definition at line 1758 of file bmsse4.h.
Referenced by bm::bit_block_or_5way().
#define VECT_SET_BLOCK | ( | dst, | |
value ) |
Definition at line 1782 of file bmsse4.h.
Referenced by bm::bit_block_set().
#define VECT_SET_BLOCK_BITS | ( | block, | |
idx, | |||
start, | |||
stop ) |
Definition at line 1812 of file bmsse4.h.
Referenced by bm::set_block_bits_u32().
#define VECT_SHIFT_L1 | ( | b, | |
acc, | |||
co ) |
Definition at line 1800 of file bmsse4.h.
Referenced by bm::bit_block_shift_l1_unr().
#define VECT_SHIFT_R1 | ( | b, | |
acc, | |||
co ) |
Definition at line 1803 of file bmsse4.h.
Referenced by bm::bit_block_shift_r1_unr().
#define VECT_SHIFT_R1_AND | ( | b, | |
co, | |||
m, | |||
digest ) |
Definition at line 1806 of file bmsse4.h.
Referenced by bm::bit_block_shift_r1_and_unr().
#define VECT_STREAM_BLOCK | ( | dst, | |
src ) |
Definition at line 1779 of file bmsse4.h.
Referenced by bm::bit_block_stream().
#define VECT_SUB_BLOCK | ( | dst, | |
src ) |
Definition at line 1761 of file bmsse4.h.
Referenced by bm::bit_block_sub().
#define VECT_SUB_DIGEST | ( | dst, | |
src ) |
Definition at line 1764 of file bmsse4.h.
Referenced by bm::bit_block_sub().
#define VECT_SUB_DIGEST_2WAY | ( | dst, | |
src1, | |||
src2 ) |
Definition at line 1767 of file bmsse4.h.
Referenced by bm::bit_block_sub_2way().
#define VECT_XOR_ARR_2_MASK | ( | dst, | |
src, | |||
src_end, | |||
mask ) |
#define VECT_XOR_BLOCK | ( | dst, | |
src ) |
Definition at line 1770 of file bmsse4.h.
Referenced by bm::bit_block_xor().
#define VECT_XOR_BLOCK_2WAY | ( | dst, | |
src1, | |||
src2 ) |
Definition at line 1773 of file bmsse4.h.
Referenced by bm::bit_block_xor_2way().