BitMagic-C++
bmsse4.h File Reference

Compute functions for SSE4.2 SIMD instruction set (internal) More...

#include <mmintrin.h>
#include <emmintrin.h>
#include <smmintrin.h>
#include <nmmintrin.h>
#include <immintrin.h>
#include "bmdef.h"
#include "bmsse_util.h"
#include "bmutil.h"
Include dependency graph for bmsse4.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Namespaces

namespace  bm
 

Macros

#define VECT_XOR_ARR_2_MASK(dst, src, src_end, mask)
 
#define VECT_ANDNOT_ARR_2_MASK(dst, src, src_end, mask)
 
#define VECT_BITCOUNT(first, last)
 
#define VECT_BITCOUNT_AND(first, last, mask)
 
#define VECT_BITCOUNT_OR(first, last, mask)
 
#define VECT_BITCOUNT_XOR(first, last, mask)
 
#define VECT_BITCOUNT_SUB(first, last, mask)
 
#define VECT_INVERT_BLOCK(first)
 
#define VECT_AND_BLOCK(dst, src)
 
#define VECT_AND_DIGEST(dst, src)
 
#define VECT_AND_DIGEST_5WAY(dst, src1, src2, src3, src4)
 
#define VECT_AND_DIGEST_2WAY(dst, src1, src2)
 
#define VECT_OR_BLOCK(dst, src)
 
#define VECT_OR_BLOCK_2WAY(dst, src1, src2)
 
#define VECT_OR_BLOCK_3WAY(dst, src1, src2)
 
#define VECT_OR_BLOCK_5WAY(dst, src1, src2, src3, src4)
 
#define VECT_SUB_BLOCK(dst, src)
 
#define VECT_SUB_DIGEST(dst, src)
 
#define VECT_SUB_DIGEST_2WAY(dst, src1, src2)
 
#define VECT_XOR_BLOCK(dst, src)
 
#define VECT_XOR_BLOCK_2WAY(dst, src1, src2)
 
#define VECT_COPY_BLOCK(dst, src)
 
#define VECT_STREAM_BLOCK(dst, src)
 
#define VECT_SET_BLOCK(dst, value)
 
#define VECT_IS_ZERO_BLOCK(dst)
 
#define VECT_IS_ONE_BLOCK(dst)
 
#define VECT_IS_DIGEST_ZERO(start)
 
#define VECT_BLOCK_SET_DIGEST(dst, val)
 
#define VECT_LOWER_BOUND_SCAN_U32(arr, target, from, to)
 
#define VECT_SHIFT_L1(b, acc, co)
 
#define VECT_SHIFT_R1(b, acc, co)
 
#define VECT_SHIFT_R1_AND(b, co, m, digest)
 
#define VECT_ARR_BLOCK_LOOKUP(idx, size, nb, start)
 
#define VECT_SET_BLOCK_BITS(block, idx, start, stop)
 
#define VECT_BLOCK_CHANGE(block, size)
 
#define VECT_BLOCK_XOR_CHANGE(block, xor_block, size)
 
#define VECT_BLOCK_CHANGE_BC(block, gc, bc)
 
#define VECT_BIT_FIND_FIRST(src, pos)
 
#define VECT_BIT_FIND_DIFF(src1, src2, pos)
 
#define VECT_BIT_BLOCK_XOR(t, src, src_xor, d)
 
#define VECT_GAP_BFIND(buf, pos, is_set)
 

Functions

bm::id_t bm::sse4_bit_count (const __m128i *block, const __m128i *block_end)
 
BMFORCEINLINE unsigned bm::op_xor (unsigned a, unsigned b)
 
BMFORCEINLINE unsigned bm::op_or (unsigned a, unsigned b)
 
BMFORCEINLINE unsigned bm::op_and (unsigned a, unsigned b)
 
template<class Func >
bm::id_t bm::sse4_bit_count_op (const __m128i *BMRESTRICT block, const __m128i *BMRESTRICT block_end, const __m128i *BMRESTRICT mask_block, Func sse2_func)
 
bool bm::sse4_is_all_zero (const __m128i *BMRESTRICT block)
 check if block is all zero bits
 
bool bm::sse4_is_digest_zero (const __m128i *BMRESTRICT block)
 check if digest stride is all zero bits
 
void bm::sse4_block_set_digest (__m128i *dst, unsigned value)
 set digest stride to 0xFF.. or 0x0 value
 
unsigned bm::sse4_and_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src)
 AND blocks2 dst &= *src.
 
bool bm::sse4_and_digest (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src)
 AND block digest stride dst &= *src.
 
bool bm::sse4_and_digest_2way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2)
 AND block digest stride dst = *src1 & src2.
 
bool bm::sse4_and_digest_5way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2, const __m128i *BMRESTRICT src3, const __m128i *BMRESTRICT src4)
 AND block digest stride.
 
bool bm::sse4_sub_digest (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src)
 SUB (AND NOT) block digest stride dst &= ~*src.
 
bool bm::sse4_sub_digest_2way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2)
 2-operand SUB (AND NOT) block digest stride dst = src1 & ~*src2
 
bool bm::sse4_is_all_one (const __m128i *BMRESTRICT block)
 check if block is all zero bits
 
BMFORCEINLINE bool bm::sse42_test_all_one_wave (const void *ptr)
 check if SSE wave is all oxFFFF...FFF
 
BMFORCEINLINE bool bm::sse42_test_all_zero_wave (const void *ptr)
 check if wave of pointers is all NULL
 
BMFORCEINLINE bool bm::sse42_test_all_zero_wave2 (const void *ptr0, const void *ptr1)
 check if 2 waves of pointers are all NULL
 
BMFORCEINLINE bool bm::sse42_test_all_eq_wave2 (const void *ptr0, const void *ptr1)
 check if wave of 2 pointers are the same (null or FULL)
 
unsigned bm::sse42_bit_block_calc_change (const __m128i *BMRESTRICT block, unsigned size)
 
unsigned bm::sse42_bit_block_calc_xor_change (const __m128i *BMRESTRICT block, const __m128i *BMRESTRICT xor_block, unsigned size)
 
void bm::sse42_bit_block_calc_change_bc (const __m128i *BMRESTRICT block, unsigned *gc, unsigned *bc)
 
bool bm::sse42_bit_find_first_diff (const __m128i *BMRESTRICT block1, const __m128i *BMRESTRICT block2, unsigned *pos)
 Find first bit which is different between two bit-blocks.
 
bool bm::sse42_bit_find_first (const __m128i *BMRESTRICT block, unsigned *pos)
 Find first non-zero bit.
 
unsigned bm::sse4_gap_find (const bm::gap_word_t *BMRESTRICT pbuf, const bm::gap_word_t pos, const unsigned size)
 
unsigned bm::sse42_gap_bfind (const unsigned short *BMRESTRICT buf, unsigned pos, unsigned *BMRESTRICT is_set)
 Hybrid binary search, starts as binary, then switches to linear scan.
 
unsigned bm::sse42_gap_test (const unsigned short *BMRESTRICT buf, unsigned pos)
 Hybrid binary search, starts as binary, then switches to scan.
 
int bm::sse42_cmpge_u32 (__m128i vect4, unsigned value)
 Experimental (test) function to do SIMD vector search (lower bound) in sorted, growing array.
 
unsigned bm::sse4_lower_bound_scan_u32 (const unsigned *BMRESTRICT arr, unsigned target, unsigned from, unsigned to)
 lower bound (great or equal) linear scan in ascending order sorted array
 
unsigned bm::sse42_idx_arr_block_lookup (const unsigned *idx, unsigned size, unsigned nb, unsigned start)
 
void bm::sse42_set_block_bits (bm::word_t *BMRESTRICT block, const unsigned *BMRESTRICT idx, unsigned start, unsigned stop)
 
void bm::sse4_bit_block_gather_scatter (unsigned *BMRESTRICT arr, const unsigned *BMRESTRICT blk, const unsigned *BMRESTRICT idx, unsigned size, unsigned start, unsigned bit_idx)
 
bool bm::sse42_shift_l1 (__m128i *block, unsigned *empty_acc, unsigned co1)
 block shift left by 1
 
bool bm::sse42_shift_r1 (__m128i *block, unsigned *empty_acc, unsigned co1)
 block shift right by 1
 
bool bm::sse42_shift_r1_and (__m128i *block, bm::word_t co1, const __m128i *BMRESTRICT mask_block, bm::id64_t *digest)
 block shift right by 1 plus AND
 
void bm::sse42_bit_block_xor (bm::word_t *target_block, const bm::word_t *block, const bm::word_t *xor_block, bm::id64_t digest)
 Build partial XOR product of 2 bit-blocks using digest mask.
 

Detailed Description

Compute functions for SSE4.2 SIMD instruction set (internal)

Definition in file bmsse4.h.

Macro Definition Documentation

◆ VECT_AND_BLOCK

#define VECT_AND_BLOCK ( dst,
src )
Value:
sse4_and_block((__m128i*) dst, (__m128i*) (src))

Definition at line 1737 of file bmsse4.h.

Referenced by bm::bit_block_and().

◆ VECT_AND_DIGEST

#define VECT_AND_DIGEST ( dst,
src )
Value:
sse4_and_digest((__m128i*) dst, (const __m128i*) (src))

Definition at line 1740 of file bmsse4.h.

Referenced by bm::bit_block_and().

◆ VECT_AND_DIGEST_2WAY

#define VECT_AND_DIGEST_2WAY ( dst,
src1,
src2 )
Value:
sse4_and_digest_2way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2))

Definition at line 1746 of file bmsse4.h.

Referenced by bm::bit_block_and_2way().

◆ VECT_AND_DIGEST_5WAY

#define VECT_AND_DIGEST_5WAY ( dst,
src1,
src2,
src3,
src4 )
Value:
sse4_and_digest_5way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2), (const __m128i*) (src3), (const __m128i*) (src4))

Definition at line 1743 of file bmsse4.h.

Referenced by bm::bit_block_and_5way().

◆ VECT_ANDNOT_ARR_2_MASK

#define VECT_ANDNOT_ARR_2_MASK ( dst,
src,
src_end,
mask )
Value:
sse2_andnot_arr_2_mask((__m128i*)(dst), (__m128i*)(src), (__m128i*)(src_end), (bm::word_t)mask)
unsigned int word_t
Definition bmconst.h:38

Definition at line 1716 of file bmsse4.h.

Referenced by bm::bit_andnot_arr_ffmask().

◆ VECT_ARR_BLOCK_LOOKUP

#define VECT_ARR_BLOCK_LOOKUP ( idx,
size,
nb,
start )
Value:
sse42_idx_arr_block_lookup(idx, size, nb, start)

Definition at line 1809 of file bmsse4.h.

Referenced by bm::idx_arr_block_lookup_u32().

◆ VECT_BIT_BLOCK_XOR

#define VECT_BIT_BLOCK_XOR ( t,
src,
src_xor,
d )
Value:
sse42_bit_block_xor(t, src, src_xor, d)

Definition at line 1832 of file bmsse4.h.

Referenced by bm::bit_block_xor().

◆ VECT_BIT_FIND_DIFF

#define VECT_BIT_FIND_DIFF ( src1,
src2,
pos )
Value:
sse42_bit_find_first_diff((__m128i*) src1, (__m128i*) (src2), pos)

Definition at line 1829 of file bmsse4.h.

Referenced by bm::bit_find_first_diff().

◆ VECT_BIT_FIND_FIRST

#define VECT_BIT_FIND_FIRST ( src,
pos )
Value:
sse42_bit_find_first((__m128i*) src, pos)

Definition at line 1826 of file bmsse4.h.

Referenced by bm::bit_find_first().

◆ VECT_BITCOUNT

#define VECT_BITCOUNT ( first,
last )
Value:
sse4_bit_count((__m128i*) (first), (__m128i*) (last))

Definition at line 1719 of file bmsse4.h.

Referenced by bm::bit_block_count().

◆ VECT_BITCOUNT_AND

#define VECT_BITCOUNT_AND ( first,
last,
mask )
Value:
sse4_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_and)

Definition at line 1722 of file bmsse4.h.

Referenced by bm::bit_block_and_count().

◆ VECT_BITCOUNT_OR

#define VECT_BITCOUNT_OR ( first,
last,
mask )
Value:
sse4_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_or)

Definition at line 1725 of file bmsse4.h.

Referenced by bm::bit_block_or_count().

◆ VECT_BITCOUNT_SUB

#define VECT_BITCOUNT_SUB ( first,
last,
mask )
Value:
sse4_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_sub)

Definition at line 1731 of file bmsse4.h.

Referenced by bm::bit_block_sub_count().

◆ VECT_BITCOUNT_XOR

#define VECT_BITCOUNT_XOR ( first,
last,
mask )
Value:
sse4_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_xor)

Definition at line 1728 of file bmsse4.h.

Referenced by bm::bit_block_xor_count().

◆ VECT_BLOCK_CHANGE

#define VECT_BLOCK_CHANGE ( block,
size )
Value:
sse42_bit_block_calc_change((__m128i*)block, size)

Definition at line 1815 of file bmsse4.h.

Referenced by bm::bit_block_calc_change(), and bm::compute_complexity_descr().

◆ VECT_BLOCK_CHANGE_BC

#define VECT_BLOCK_CHANGE_BC ( block,
gc,
bc )
Value:
sse42_bit_block_calc_change_bc((__m128i*)block, gc, bc)

Definition at line 1822 of file bmsse4.h.

Referenced by bm::bit_block_change_bc().

◆ VECT_BLOCK_SET_DIGEST

#define VECT_BLOCK_SET_DIGEST ( dst,
val )
Value:
sse4_block_set_digest((__m128i*)dst, val)

Definition at line 1794 of file bmsse4.h.

Referenced by bm::block_init_digest0().

◆ VECT_BLOCK_XOR_CHANGE

#define VECT_BLOCK_XOR_CHANGE ( block,
xor_block,
size )
Value:
sse42_bit_block_calc_xor_change((__m128i*)block, (__m128i*)xor_block, size)

Definition at line 1818 of file bmsse4.h.

Referenced by bm::bit_block_xor_change().

◆ VECT_COPY_BLOCK

#define VECT_COPY_BLOCK ( dst,
src )
Value:
sse2_copy_block((__m128i*) dst, (__m128i*) (src))

Definition at line 1776 of file bmsse4.h.

Referenced by bm::bit_block_copy().

◆ VECT_GAP_BFIND

#define VECT_GAP_BFIND ( buf,
pos,
is_set )
Value:
sse42_gap_bfind(buf, pos, is_set)

Definition at line 1835 of file bmsse4.h.

Referenced by bm::gap_bfind().

◆ VECT_INVERT_BLOCK

#define VECT_INVERT_BLOCK ( first)
Value:
sse2_invert_block((__m128i*)first);

Definition at line 1734 of file bmsse4.h.

Referenced by bm::bit_invert().

◆ VECT_IS_DIGEST_ZERO

#define VECT_IS_DIGEST_ZERO ( start)
Value:
sse4_is_digest_zero((__m128i*)start)

Definition at line 1791 of file bmsse4.h.

Referenced by bm::calc_block_digest0(), and bm::update_block_digest0().

◆ VECT_IS_ONE_BLOCK

#define VECT_IS_ONE_BLOCK ( dst)
Value:
sse4_is_all_one((__m128i*) dst)

Definition at line 1788 of file bmsse4.h.

Referenced by bm::is_bits_one().

◆ VECT_IS_ZERO_BLOCK

#define VECT_IS_ZERO_BLOCK ( dst)
Value:
sse4_is_all_zero((__m128i*) dst)

Definition at line 1785 of file bmsse4.h.

Referenced by bm::bit_is_all_zero().

◆ VECT_LOWER_BOUND_SCAN_U32

#define VECT_LOWER_BOUND_SCAN_U32 ( arr,
target,
from,
to )
Value:
sse4_lower_bound_scan_u32(arr, target, from, to)

Definition at line 1797 of file bmsse4.h.

Referenced by bm::lower_bound_linear_u32().

◆ VECT_OR_BLOCK

#define VECT_OR_BLOCK ( dst,
src )
Value:
sse2_or_block((__m128i*) dst, (__m128i*) (src))

Definition at line 1749 of file bmsse4.h.

Referenced by bm::bit_block_or().

◆ VECT_OR_BLOCK_2WAY

#define VECT_OR_BLOCK_2WAY ( dst,
src1,
src2 )
Value:
sse2_or_block_2way((__m128i*) (dst), (const __m128i*) (src1), (const __m128i*) (src2))

Definition at line 1752 of file bmsse4.h.

Referenced by bm::bit_block_or_2way().

◆ VECT_OR_BLOCK_3WAY

#define VECT_OR_BLOCK_3WAY ( dst,
src1,
src2 )
Value:
sse2_or_block_3way((__m128i*) (dst), (const __m128i*) (src1), (const __m128i*) (src2))

Definition at line 1755 of file bmsse4.h.

Referenced by bm::bit_block_or_3way().

◆ VECT_OR_BLOCK_5WAY

#define VECT_OR_BLOCK_5WAY ( dst,
src1,
src2,
src3,
src4 )
Value:
sse2_or_block_5way((__m128i*) (dst), (__m128i*) (src1), (__m128i*) (src2), (__m128i*) (src3), (__m128i*) (src4))

Definition at line 1758 of file bmsse4.h.

Referenced by bm::bit_block_or_5way().

◆ VECT_SET_BLOCK

#define VECT_SET_BLOCK ( dst,
value )
Value:
sse2_set_block((__m128i*) dst, value)

Definition at line 1782 of file bmsse4.h.

Referenced by bm::bit_block_set().

◆ VECT_SET_BLOCK_BITS

#define VECT_SET_BLOCK_BITS ( block,
idx,
start,
stop )
Value:
sse42_set_block_bits(block, idx, start, stop)

Definition at line 1812 of file bmsse4.h.

Referenced by bm::set_block_bits_u32().

◆ VECT_SHIFT_L1

#define VECT_SHIFT_L1 ( b,
acc,
co )
Value:
sse42_shift_l1((__m128i*)b, acc, co)

Definition at line 1800 of file bmsse4.h.

Referenced by bm::bit_block_shift_l1_unr().

◆ VECT_SHIFT_R1

#define VECT_SHIFT_R1 ( b,
acc,
co )
Value:
sse42_shift_r1((__m128i*)b, acc, co)

Definition at line 1803 of file bmsse4.h.

Referenced by bm::bit_block_shift_r1_unr().

◆ VECT_SHIFT_R1_AND

#define VECT_SHIFT_R1_AND ( b,
co,
m,
digest )
Value:
sse42_shift_r1_and((__m128i*)b, co, (__m128i*)m, digest)

Definition at line 1806 of file bmsse4.h.

Referenced by bm::bit_block_shift_r1_and_unr().

◆ VECT_STREAM_BLOCK

#define VECT_STREAM_BLOCK ( dst,
src )
Value:
sse2_stream_block((__m128i*) dst, (__m128i*) (src))

Definition at line 1779 of file bmsse4.h.

Referenced by bm::bit_block_stream().

◆ VECT_SUB_BLOCK

#define VECT_SUB_BLOCK ( dst,
src )
Value:
sse2_sub_block((__m128i*) dst, (const __m128i*) (src))

Definition at line 1761 of file bmsse4.h.

Referenced by bm::bit_block_sub().

◆ VECT_SUB_DIGEST

#define VECT_SUB_DIGEST ( dst,
src )
Value:
sse4_sub_digest((__m128i*) dst, (const __m128i*) (src))

Definition at line 1764 of file bmsse4.h.

Referenced by bm::bit_block_sub().

◆ VECT_SUB_DIGEST_2WAY

#define VECT_SUB_DIGEST_2WAY ( dst,
src1,
src2 )
Value:
sse4_sub_digest_2way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2))

Definition at line 1767 of file bmsse4.h.

Referenced by bm::bit_block_sub_2way().

◆ VECT_XOR_ARR_2_MASK

#define VECT_XOR_ARR_2_MASK ( dst,
src,
src_end,
mask )
Value:
sse2_xor_arr_2_mask((__m128i*)(dst), (__m128i*)(src), (__m128i*)(src_end), (bm::word_t)mask)

Definition at line 1713 of file bmsse4.h.

◆ VECT_XOR_BLOCK

#define VECT_XOR_BLOCK ( dst,
src )
Value:
sse2_xor_block((__m128i*) dst, (__m128i*) (src))

Definition at line 1770 of file bmsse4.h.

Referenced by bm::bit_block_xor().

◆ VECT_XOR_BLOCK_2WAY

#define VECT_XOR_BLOCK_2WAY ( dst,
src1,
src2 )
Value:
sse2_xor_block_2way((__m128i*) (dst), (const __m128i*) (src1), (const __m128i*) (src2))

Definition at line 1773 of file bmsse4.h.

Referenced by bm::bit_block_xor_2way().