1#ifndef BMENCODING_H__INCLUDED__
2#define BMENCODING_H__INCLUDED__
32#pragma warning (disable : 4702)
74 unsigned char* start_;
161 bool get_32_OR(
bm::word_t* w,
unsigned count);
162 void get_32_AND(
bm::word_t* w,
unsigned count);
173template<
class TEncoder>
178 : dest_(dest), used_bits_(0), accum_(0)
187 void put_bits(
unsigned value,
unsigned count)
BMNOEXCEPT;
193 void put_zero_bits(
unsigned count)
BMNOEXCEPT;
202 bic_encode_u16_cm(arr, sz, lo, hi);
218 void bic_encode_u32_cm(
const bm::word_t* arr,
unsigned sz,
227 dest_.put_32(accum_);
228 used_bits_ = accum_ = 0;
231 bit_out(
const bit_out&);
232 bit_out& operator=(
const bit_out&);
247template<
class TDecoder>
253 used_bits_(
unsigned(
sizeof(accum_) * 8)),
267 bic_decode_u16_cm(arr, sz, lo, hi);
273 bic_decode_u16_cm_bitset(block, sz, lo, hi);
278 bic_decode_u16_cm_dry(sz, lo, hi);
290 void bic_decode_u32_cm(
bm::word_t* arr,
unsigned sz,
295 void bic_decode_u16_rg_bitset(
bm::word_t* block,
unsigned sz,
299 void bic_decode_u16_rg_dry(
unsigned sz,
303 void bic_decode_u16_cm_bitset(
bm::word_t* block,
unsigned sz,
308 void bic_decode_u16_cm_dry(
unsigned sz,
325template<
typename T,
typename TBitIO>
346template<
typename T,
typename TBitIO>
385: buf_(buf), start_(buf)
432#if (BM_UNALIGNED_ACCESS_OK == 1)
436 *buf_++ = (
unsigned char) s;
438 *buf_++ = (
unsigned char) s;
447#if (BM_UNALIGNED_ACCESS_OK == 1)
451 unsigned char* buf = buf_;
456 unsigned char a = (
unsigned char) w16;
457 unsigned char b = (
unsigned char) (w16 >> 8);
464 buf_ = (
unsigned char*)buf;
474 BM_ASSERT((buf_ + count) < (start_ + size_));
487 return size_t(buf_ - start_);
515 buf_[0] = (
unsigned char)w;
516 buf_[1] = (
unsigned char)(w >> 8);
517 buf_[2] = (
unsigned char)(w >> 16);
529#if (BM_UNALIGNED_ACCESS_OK == 1)
533 *buf_++ = (
unsigned char) w;
534 *buf_++ = (
unsigned char) (w >> 8);
535 *buf_++ = (
unsigned char) (w >> 16);
536 *buf_++ = (
unsigned char) (w >> 24);
547 BM_ASSERT((w & ~(0xFFFFFFFFFFFFUL)) == 0);
548 *buf_++ = (
unsigned char)w;
549 *buf_++ = (
unsigned char)(w >> 8);
550 *buf_++ = (
unsigned char)(w >> 16);
551 *buf_++ = (
unsigned char)(w >> 24);
552 *buf_++ = (
unsigned char)(w >> 32);
553 *buf_++ = (
unsigned char)(w >> 40);
564#if (BM_UNALIGNED_ACCESS_OK == 1)
568 *buf_++ = (
unsigned char) w;
569 *buf_++ = (
unsigned char) (w >> 8);
570 *buf_++ = (
unsigned char) (w >> 16);
571 *buf_++ = (
unsigned char) (w >> 24);
572 *buf_++ = (
unsigned char) (w >> 32);
573 *buf_++ = (
unsigned char) (w >> 40);
574 *buf_++ = (
unsigned char) (w >> 48);
575 *buf_++ = (
unsigned char) (w >> 56);
585#if (BM_UNALIGNED_ACCESS_OK == 1)
590 unsigned char* buf = buf_;
595 unsigned char a = (
unsigned char) w32;
596 unsigned char b = (
unsigned char) (w32 >> 8);
597 unsigned char c = (
unsigned char) (w32 >> 16);
598 unsigned char d = (
unsigned char) (w32 >> 24);
606 buf_ = (
unsigned char*)buf;
641#if (BM_UNALIGNED_ACCESS_OK == 1)
657 bm::word_t a = buf_[0] + ((unsigned)buf_[1] << 8) +
658 ((
unsigned)buf_[2] << 16);
670#if (BM_UNALIGNED_ACCESS_OK == 1)
674 bm::word_t a = buf_[0]+ ((unsigned)buf_[1] << 8) +
675 ((
unsigned)buf_[2] << 16) + ((unsigned)buf_[3] << 24);
705#if (BM_UNALIGNED_ACCESS_OK == 1)
736#if (BM_UNALIGNED_ACCESS_OK == 1)
741 const unsigned char* buf = buf_;
745 bm::word_t a = buf[0]+ ((unsigned)buf[1] << 8) +
746 ((
unsigned)buf[2] << 16) + ((unsigned)buf[3] << 24);
750 buf_ = (
unsigned char*)buf;
768#if defined(BMAVX2OPT)
769 __m256i* buf_start = (__m256i*)buf_;
771 __m256i* buf_end = (__m256i*)buf_;
773 return bm::avx2_or_arr_unal((__m256i*)w, buf_start, buf_end);
774#elif defined(BMSSE42OPT) || defined(BMSSE2OPT)
775 __m128i* buf_start = (__m128i*)buf_;
777 __m128i* buf_end = (__m128i*)buf_;
784 for (
unsigned i = 0; i < count; i+=4)
786 acc &= (w[i+0] |= get_32());
787 acc &= (w[i+1] |= get_32());
788 acc &= (w[i+2] |= get_32());
789 acc &= (w[i+3] |= get_32());
791 return acc == not_acc;
809#if defined(BMAVX2OPT)
810 __m256i* buf_start = (__m256i*)buf_;
812 __m256i* buf_end = (__m256i*)buf_;
814 bm::avx2_and_arr_unal((__m256i*)w, buf_start, buf_end);
815#elif defined(BMSSE42OPT) || defined(BMSSE2OPT)
816 __m128i* buf_start = (__m128i*)buf_;
818 __m128i* buf_end = (__m128i*)buf_;
822 for (
unsigned i = 0; i < count; i+=4)
847#if (BM_UNALIGNED_ACCESS_OK == 1)
851 const unsigned char* buf = buf_;
859 buf_ = (
unsigned char*)buf;
887 ((
unsigned)
buf_[2] << 16);
896 ((unsigned)
buf_[2] << 8) + ((
unsigned)
buf_[3]);
938 const unsigned char* buf =
buf_;
942 bm::word_t a = ((unsigned)buf[0] << 24)+ ((
unsigned)buf[1] << 16) +
943 ((unsigned)buf[2] << 8) + ((
unsigned)buf[3]);
947 buf_ = (
unsigned char*)buf;
962 for (
unsigned i = 0; i < count; i+=4)
964 acc &= (w[i+0] |=
get_32());
965 acc &= (w[i+1] |=
get_32());
966 acc &= (w[i+2] |=
get_32());
967 acc &= (w[i+3] |=
get_32());
969 return acc == not_acc;
975 for (
unsigned i = 0; i < count; i+=4)
994 const unsigned char* buf =
buf_;
1003 }
while (s < s_end);
1004 buf_ = (
unsigned char*)buf;
1010template<
typename TEncoder>
1014 accum_ |= (value << used_bits_);
1015 if (++used_bits_ == (
sizeof(accum_) * 8))
1021template<
typename TEncoder>
1024 unsigned used = used_bits_;
1025 unsigned acc = accum_;
1028 unsigned mask = ~0u;
1029 mask >>= (
sizeof(accum_) * 8) - count;
1034 unsigned free_bits = unsigned(
sizeof(accum_) * 8) - used;
1036 acc |= value << used;
1038 if (count <= free_bits)
1045 value >>= free_bits;
1052 if (used == (
sizeof(accum_) * 8))
1063template<
typename TEncoder>
1066 if (++used_bits_ == (
sizeof(accum_) * 8))
1072template<
typename TEncoder>
1075 unsigned used = used_bits_;
1076 unsigned free_bits = (
sizeof(accum_) * 8) - used;
1077 if (count >= free_bits)
1083 for ( ;count >=
sizeof(accum_) * 8; count -=
sizeof(accum_) * 8)
1093 accum_ |= (1u << used);
1094 if (++used == (
sizeof(accum_) * 8))
1102template<
typename TEncoder>
1111 unsigned used = used_bits_;
1112 unsigned acc = accum_;
1113 const unsigned acc_bits = (
sizeof(acc) * 8);
1114 unsigned free_bits = acc_bits - used;
1117 unsigned count = logv;
1118 if (count >= free_bits)
1124 for ( ;count >= acc_bits; count -= acc_bits)
1135 if (++used == acc_bits)
1145 unsigned mask = (~0u);
1146 mask >>= acc_bits - logv;
1151 acc |= value << used;
1152 free_bits = acc_bits - used;
1153 if (logv <= free_bits)
1160 value >>= free_bits;
1174template<
typename TEncoder>
1183 unsigned mid_idx = sz >> 1;
1189 unsigned r = hi - lo - sz + 1;
1192 unsigned value = val - lo - mid_idx;
1194 put_bits(value, logv+1);
1198 bic_encode_u16_rg(arr, mid_idx, lo,
gap_word_t(val-1));
1209template<
typename TEncoder>
1218 unsigned mid_idx = sz >> 1;
1224 unsigned r = hi - lo - sz + 1;
1227 unsigned value = val - lo - mid_idx;
1231 unsigned c = (unsigned)(1ull << (logv + 1)) - n;
1232 int64_t half_c = c >> 1;
1233 int64_t half_r = r >> 1;
1234 int64_t lo1 = half_r - half_c;
1235 int64_t hi1 = half_r + half_c + 1;
1237 logv += (value <= lo1 || value >= hi1);
1239 put_bits(value, logv);
1243 bic_encode_u32_cm(arr, mid_idx, lo, val-1);
1260struct bic_encode_stack_u16
1267 unsigned stack_size_ = 0;
1277 unsigned r = hi - lo - sz + 1;
1280 unsigned s = stack_size_++;
1297template<
typename TEncoder>
1305 bic_encode_stack_u16<bm::bie_cut_off> u16_stack;
1309 BM_ASSERT(sz_i == u16_stack.stack_size_);
1310 for (
unsigned i = 0; i < sz_i; ++i)
1315 unsigned r = u16_stack.r_[i];
1317 unsigned value = val - lo - mid_idx;
1320 unsigned c = (unsigned)(1ull << (logv + 1)) - n;
1322 int64_t half_c = c >> 1;
1323 int64_t half_r = r >> 1;
1324 int64_t lo1 = half_r - half_c;
1325 int64_t hi1 = half_r + half_c + 1;
1327 logv += (value <= lo1 || value >= hi1);
1329 put_bits(value, logv);
1335template<
typename TEncoder>
1344 unsigned mid_idx = sz >> 1;
1350 unsigned r = hi - lo - sz + 1;
1353 unsigned value = val - lo - mid_idx;
1357 unsigned c = (unsigned)(1ull << (logv + 1)) - n;
1358 unsigned half_c = c >> 1;
1359 unsigned half_r = r >> 1;
1360 int64_t lo1 = (int64_t(half_r) - half_c - (n & 1u));
1361 unsigned hi1 = (half_r + half_c);
1362 logv += (value <= lo1 || value > hi1);
1364 put_bits(value, logv);
1387template<
class TDecoder>
1399 unsigned r = hi - lo - sz + 1;
1403 val = get_bits(logv);
1411 unsigned mid_idx = sz >> 1;
1412 val += lo + mid_idx;
1430template<
class TDecoder>
1444 unsigned r = hi - lo - sz + 1;
1449 unsigned c = unsigned((1ull << (logv + 1)) - r - 1);
1450 int64_t half_c = c >> 1;
1451 int64_t half_r = r >> 1;
1452 int64_t lo1 = half_r - half_c - ((r + 1) & 1);
1453 int64_t hi1 = half_r + half_c + 1;
1454 val = get_bits(logv);
1455 if (val <= lo1 || val >= hi1)
1456 val += (get_bits(1) << logv);
1465 unsigned mid_idx = sz >> 1;
1466 val += lo + mid_idx;
1471 bic_decode_u32_cm(arr, mid_idx, lo, val-1);
1482template<
class TDecoder>
1496 unsigned r = hi - lo - sz + 1;
1501 unsigned c = unsigned((1ull << (logv + 1)) - r - 1);
1502 int64_t half_c = c >> 1;
1503 int64_t half_r = r >> 1;
1504 int64_t lo1 = half_r - half_c - ((r + 1) & 1);
1505 int64_t hi1 = half_r + half_c + 1;
1506 val = get_bits(logv);
1507 if (val <= lo1 || val >= hi1)
1508 val += (get_bits(1) << logv);
1517 unsigned mid_idx = sz >> 1;
1518 val += lo + mid_idx;
1534template<
class TDecoder>
1548 unsigned r = hi - lo - sz + 1;
1553 unsigned c = unsigned((1ull << (logv + 1)) - r - 1);
1554 int64_t half_c = c >> 1;
1555 int64_t half_r = r >> 1;
1556 int64_t lo1 = half_r - half_c - ((r + 1) & 1);
1557 int64_t hi1 = half_r + half_c + 1;
1558 val = get_bits(logv);
1559 if (val <= lo1 || val >= hi1)
1560 val += (get_bits(1) << logv);
1569 unsigned mid_idx = sz >> 1;
1570 val += lo + mid_idx;
1581 bic_decode_u16_cm_bitset(block, mid_idx, lo,
bm::gap_word_t(val-1));
1591template<
class TDecoder>
1605 unsigned r = hi - lo - sz + 1;
1610 unsigned c = unsigned((1ull << (logv + 1)) - r - 1);
1611 int64_t half_c = c >> 1;
1612 int64_t half_r = r >> 1;
1613 int64_t lo1 = half_r - half_c - ((r + 1) & 1);
1614 int64_t hi1 = half_r + half_c + 1;
1615 val = get_bits(logv);
1616 if (val <= lo1 || val >= hi1)
1617 val += (get_bits(1) << logv);
1626 unsigned mid_idx = sz >> 1;
1627 val += lo + mid_idx;
1643template<
class TDecoder>
1655 unsigned r = hi - lo - sz + 1;
1659 val = get_bits(logv);
1667 unsigned mid_idx = sz >> 1;
1668 val += lo + mid_idx;
1680 bic_decode_u16_rg_bitset(block, mid_idx, lo,
bm::gap_word_t(val - 1));
1690template<
class TDecoder>
1702 unsigned r = hi - lo - sz + 1;
1706 val = get_bits(logv);
1714 unsigned mid_idx = sz >> 1;
1715 val += lo + mid_idx;
1731template<
class TDecoder>
1734 unsigned acc = accum_;
1735 unsigned used = used_bits_;
1737 if (used == (
sizeof(acc) * 8))
1739 acc = src_.get_32();
1742 unsigned zero_bits = 0;
1747 zero_bits = unsigned(zero_bits +(
sizeof(acc) * 8) - used);
1749 acc = src_.get_32();
1752 unsigned first_bit_idx =
1753 #if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER))
1758 acc >>= first_bit_idx;
1759 zero_bits += first_bit_idx;
1760 used += first_bit_idx;
1766 if (used == (
sizeof(acc) * 8))
1768 acc = src_.get_32();
1780 unsigned free_bits = unsigned((
sizeof(acc) * 8) - used);
1781 if (zero_bits <= free_bits)
1791 if (used == (
sizeof(acc) * 8))
1793 acc = src_.get_32();
1801 acc = src_.get_32();
1802 used = zero_bits - free_bits;
1816template<
class TDecoder>
1820 const unsigned maskFF = ~0u;
1821 unsigned acc = accum_;
1822 unsigned used = used_bits_;
1825 unsigned free_bits = unsigned((
sizeof(acc) * 8) - used);
1826 if (count <= free_bits)
1829 value = acc & (maskFF >> (32 - count));
1834 if (used == (
sizeof(acc) * 8))
1836 acc = src_.get_32();
1841 acc = src_.get_32();
1842 used = count - free_bits;
1843 value |= ((acc & (maskFF >> (32 - used))) << free_bits);
Bit manipulation primitives (internal)
Byte based reader for un-aligned bit streaming.
unsigned gamma() BMNOEXCEPT
decode unsigned value using Elias Gamma coding
void bic_decode_u16_cm(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode.
void bic_decode_u16_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
void bic_decode_u16_cm_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode into /dev/null.
bit_in(TDecoder &decoder) BMNOEXCEPT
void bic_decode_u32_cm(bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative array decode (32-bit)
void bic_decode_u16_rg(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode.
void bic_decode_u16_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
void bic_decode_u16_rg_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode into /dev/null.
unsigned get_bits(unsigned count) BMNOEXCEPT
read number of bits out of the stream
void bic_decode_u16_cm_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode into bitset (32-bit based)
void bic_decode_u16_rg_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode into bitset (32-bit based)
Byte based writer for un-aligned bit streaming.
void bic_encode_u16_rg(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative encoding (array of 16-bit ints)
void flush() BMNOEXCEPT
Flush the incomplete 32-bit accumulator word.
void put_zero_bits(unsigned count) BMNOEXCEPT
issue specified number of 0s
void bic_encode_u32_cm(const bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative encoding (array of 32-bit ints) cm - "center-minimal".
void put_bit(unsigned value) BMNOEXCEPT
issue single bit into encode bit-stream
void put_bits(unsigned value, unsigned count) BMNOEXCEPT
issue count bits out of value
void gamma(unsigned value) BMNOEXCEPT
Elias Gamma encode the specified value.
void put_zero_bit() BMNOEXCEPT
issue 0 into output stream
void bic_encode_u16_cm(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative encoding (array of 16-bit ints) cm - "center-minimal".
Base class for all decoding functionality.
const unsigned char * start_
decoder_base(const unsigned char *buf) BMNOEXCEPT
const unsigned char * buf_
const unsigned char * get_pos() const BMNOEXCEPT
Return current buffer pointer.
void seek(int delta) BMNOEXCEPT
change current position
size_t size() const BMNOEXCEPT
Returns size of the current decoding stream.
unsigned char get_8() BMNOEXCEPT
Reads character from the decoding buffer.
void memcpy(unsigned char *dst, size_t count) BMNOEXCEPT
read bytes from the decode buffer
void set_pos(const unsigned char *pos) BMNOEXCEPT
Set current buffer pointer.
Class for decoding data from memory buffer.
decoder_little_endian(const unsigned char *buf)
bool get_32_OR(bm::word_t *w, unsigned count)
void get_32_AND(bm::word_t *w, unsigned count)
Class for decoding data from memory buffer.
bm::word_t get_32() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
bm::word_t get_24() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
bool get_32_OR(bm::word_t *w, unsigned count) BMNOEXCEPT
Reads block of 32-bit words from the decoding buffer and ORs to the destination.
bm::id64_t get_64() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
void get_32_AND(bm::word_t *w, unsigned count) BMNOEXCEPT
Reads block of 32-bit words from the decoding buffer and ANDs to the destination.
bm::short_t get_16() BMNOEXCEPT
Reads 16-bit word from the decoding buffer.
decoder(const unsigned char *buf) BMNOEXCEPT
Construction.
bm::id64_t get_48() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
size_t size() const BMNOEXCEPT
Returns size of the current encoding stream.
void put_48(bm::id64_t w) BMNOEXCEPT
Puts 48 bits word into encoding buffer.
unsigned char * get_pos() const BMNOEXCEPT
Get current memory stream position.
void put_64(bm::id64_t w) BMNOEXCEPT
Puts 64 bits word into encoding buffer.
encoder(unsigned char *buf, size_t size) BMNOEXCEPT
Construction.
void put_8(unsigned char c) BMNOEXCEPT
Puts one character into the encoding buffer.
void set_pos(unsigned char *buf_pos) BMNOEXCEPT
Set current memory stream position.
void put_prefixed_array_16(unsigned char c, const bm::short_t *s, unsigned count, bool encode_count) BMNOEXCEPT
Encode 8-bit prefix + an array.
unsigned char * position_type
void memcpy(const unsigned char *src, size_t count) BMNOEXCEPT
copy bytes into target buffer or just rewind if src is NULL
void put_32(bm::word_t w) BMNOEXCEPT
Puts 32 bits word into encoding buffer.
void put_24(bm::word_t w) BMNOEXCEPT
Puts 24 bits word into encoding buffer.
void put_16(bm::short_t s) BMNOEXCEPT
Puts short word (16 bits) into the encoding buffer.
void put_prefixed_array_32(unsigned char c, const bm::word_t *w, unsigned count) BMNOEXCEPT
Encode 8-bit prefix + an array.
void stop()
Stop decoding sequence.
void start()
Start encoding sequence.
T operator()(void)
Decode word.
gamma_decoder(TBitIO &bin)
Functor for Elias Gamma encoding.
gamma_encoder(TBitIO &bout)
void operator()(T value)
Encode word.
bool sse2_or_arr_unal(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end)
OR array elements against another array (unaligned) dst |= *src.
unsigned sse2_and_arr_unal(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end)
AND array elements against another array (unaligned) dst &= *src.
decoder decoder_big_endian
Class for decoding data from memory buffer.
unsigned bit_scan_reverse32(unsigned value) BMNOEXCEPT
const unsigned set_word_shift
unsigned long long int id64_t
unsigned short gap_word_t
T bit_scan_fwd(T v) BMNOEXCEPT
const unsigned set_word_mask
Structure keeps all-left/right ON bits masks.