BitMagic-C++
encoding.h
Go to the documentation of this file.
1#ifndef BMENCODING_H__INCLUDED__
2#define BMENCODING_H__INCLUDED__
3/*
4Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17
18For more information please visit: http://bitmagic.io
19*/
20
21/*! \file encoding.h
22 \brief Encoding utilities for serialization (internal)
23*/
24
25
26#include <memory.h>
27#include "bmutil.h"
28
29
30#ifdef _MSVC_VER
31#pragma warning (push)
32#pragma warning (disable : 4702)
33#endif
34
35namespace bm
36{
37
38
39// ----------------------------------------------------------------
40
41/*!
42 \brief Memory encoding.
43
44 Class for encoding data into memory.
45 Class handles aligment issues with the integer data types.
46
47 \ingroup gammacode
48*/
50{
51public:
52 typedef unsigned char* position_type;
53public:
54 encoder(unsigned char* buf, size_t size) BMNOEXCEPT;
55 void put_8(unsigned char c) BMNOEXCEPT;
57 void put_16(const bm::short_t* s, unsigned count) BMNOEXCEPT;
60 void put_32(const bm::word_t* w, unsigned count) BMNOEXCEPT;
63 void put_prefixed_array_32(unsigned char c,
64 const bm::word_t* w, unsigned count) BMNOEXCEPT;
65 void put_prefixed_array_16(unsigned char c,
66 const bm::short_t* s, unsigned count,
67 bool encode_count) BMNOEXCEPT;
68 void memcpy(const unsigned char* src, size_t count) BMNOEXCEPT;
69 size_t size() const BMNOEXCEPT;
70 unsigned char* get_pos() const BMNOEXCEPT;
71 void set_pos(unsigned char* buf_pos) BMNOEXCEPT;
72private:
73 unsigned char* buf_;
74 unsigned char* start_;
75 size_t size_;
76};
77
78// ----------------------------------------------------------------
79/**
80 Base class for all decoding functionality
81 \ingroup gammacode
82*/
84{
85public:
86 decoder_base(const unsigned char* buf) BMNOEXCEPT { buf_ = start_ = buf; }
87
88 /// Reads character from the decoding buffer.
89 unsigned char get_8() BMNOEXCEPT { return *buf_++; }
90
91 /// Returns size of the current decoding stream.
92 size_t size() const BMNOEXCEPT { return size_t(buf_ - start_); }
93
94 /// change current position
95 void seek(int delta) BMNOEXCEPT { buf_ += delta; }
96
97 /// read bytes from the decode buffer
98 void memcpy(unsigned char* dst, size_t count) BMNOEXCEPT;
99
100 /// Return current buffer pointer
101 const unsigned char* get_pos() const BMNOEXCEPT { return buf_; }
102
103 /// Set current buffer pointer
104 void set_pos(const unsigned char* pos) BMNOEXCEPT { buf_ = pos; }
105protected:
106 const unsigned char* buf_;
107 const unsigned char* start_;
108};
109
110
111// ----------------------------------------------------------------
112/**
113 Class for decoding data from memory buffer.
114 Properly handles aligment issues with integer data types.
115 \ingroup gammacode
116*/
117class decoder : public decoder_base
118{
119public:
120 decoder(const unsigned char* buf) BMNOEXCEPT;
121 bm::short_t get_16() BMNOEXCEPT;
122 bm::word_t get_24() BMNOEXCEPT;
123 bm::word_t get_32() BMNOEXCEPT;
124 bm::id64_t get_48() BMNOEXCEPT;
125 bm::id64_t get_64() BMNOEXCEPT;
126 void get_32(bm::word_t* w, unsigned count) BMNOEXCEPT;
127 bool get_32_OR(bm::word_t* w, unsigned count) BMNOEXCEPT;
128 void get_32_AND(bm::word_t* w, unsigned count) BMNOEXCEPT;
129 void get_16(bm::short_t* s, unsigned count) BMNOEXCEPT;
130};
131
132// ----------------------------------------------------------------
133/**
134 Class for decoding data from memory buffer.
135 Properly handles aligment issues with integer data types.
136 Converts data to big endian architecture
137 (presumed it was encoded as little endian)
138 \ingroup gammacode
139*/
141
142
143// ----------------------------------------------------------------
144/**
145 Class for decoding data from memory buffer.
146 Properly handles aligment issues with integer data types.
147 Converts data to little endian architecture
148 (presumed it was encoded as big endian)
149 \ingroup gammacode
150*/
152{
153public:
154 decoder_little_endian(const unsigned char* buf);
155 bm::short_t get_16();
156 bm::word_t get_24();
157 bm::word_t get_32();
158 bm::id64_t get_48();
159 bm::id64_t get_64();
160 void get_32(bm::word_t* w, unsigned count);
161 bool get_32_OR(bm::word_t* w, unsigned count);
162 void get_32_AND(bm::word_t* w, unsigned count);
163 void get_16(bm::short_t* s, unsigned count);
164};
165
166
167/**
168 Byte based writer for un-aligned bit streaming
169
170 @ingroup gammacode
171 @sa encoder
172*/
173template<class TEncoder>
175{
176public:
177 bit_out(TEncoder& dest)
178 : dest_(dest), used_bits_(0), accum_(0)
179 {}
180
181 ~bit_out() { flush(); }
182
183 /// issue single bit into encode bit-stream
184 void put_bit(unsigned value) BMNOEXCEPT;
185
186 /// issue count bits out of value
187 void put_bits(unsigned value, unsigned count) BMNOEXCEPT;
188
189 /// issue 0 into output stream
190 void put_zero_bit() BMNOEXCEPT;
191
192 /// issue specified number of 0s
193 void put_zero_bits(unsigned count) BMNOEXCEPT;
194
195 /// Elias Gamma encode the specified value
196 void gamma(unsigned value) BMNOEXCEPT;
197
198 /// Binary Interpolative array decode
199 void bic_encode_u16(const bm::gap_word_t* arr, unsigned sz,
201 {
202 bic_encode_u16_cm(arr, sz, lo, hi);
203 }
204
205 /// Binary Interpolative encoding (array of 16-bit ints)
206 void bic_encode_u16_rg(const bm::gap_word_t* arr, unsigned sz,
209
210 /// Binary Interpolative encoding (array of 16-bit ints)
211 /// cm - "center-minimal"
212 void bic_encode_u16_cm(const bm::gap_word_t* arr, unsigned sz,
215
216 /// Binary Interpolative encoding (array of 32-bit ints)
217 /// cm - "center-minimal"
218 void bic_encode_u32_cm(const bm::word_t* arr, unsigned sz,
220
221 /// Flush the incomplete 32-bit accumulator word
222 void flush() BMNOEXCEPT { if (used_bits_) flush_accum(); }
223
224private:
225 void flush_accum() BMNOEXCEPT
226 {
227 dest_.put_32(accum_);
228 used_bits_ = accum_ = 0;
229 }
230private:
231 bit_out(const bit_out&);
232 bit_out& operator=(const bit_out&);
233
234private:
235 TEncoder& dest_; ///< Bit stream target
236 unsigned used_bits_; ///< Bits used in the accumulator
237 unsigned accum_; ///< write bit accumulator
238};
239
240
241/**
242 Byte based reader for un-aligned bit streaming
243
244 @ingroup gammacode
245 @sa encoder
246*/
247template<class TDecoder>
249{
250public:
252 : src_(decoder),
253 used_bits_(unsigned(sizeof(accum_) * 8)),
254 accum_(0)
255 {}
256
257 /// decode unsigned value using Elias Gamma coding
258 unsigned gamma() BMNOEXCEPT;
259
260 /// read number of bits out of the stream
261 unsigned get_bits(unsigned count) BMNOEXCEPT;
262
263 /// Binary Interpolative array decode
264 void bic_decode_u16(bm::gap_word_t* arr, unsigned sz,
266 {
267 bic_decode_u16_cm(arr, sz, lo, hi);
268 }
269
270 void bic_decode_u16_bitset(bm::word_t* block, unsigned sz,
272 {
273 bic_decode_u16_cm_bitset(block, sz, lo, hi);
274 }
275 void bic_decode_u16_dry(unsigned sz,
277 {
278 bic_decode_u16_cm_dry(sz, lo, hi);
279 }
280
281
282 /// Binary Interpolative array decode
283 void bic_decode_u16_rg(bm::gap_word_t* arr, unsigned sz,
285 /// Binary Interpolative array decode
286 void bic_decode_u16_cm(bm::gap_word_t* arr, unsigned sz,
288
289 /// Binary Interpolative array decode (32-bit)
290 void bic_decode_u32_cm(bm::word_t* arr, unsigned sz,
292
293
294 /// Binary Interpolative array decode into bitset (32-bit based)
295 void bic_decode_u16_rg_bitset(bm::word_t* block, unsigned sz,
297
298 /// Binary Interpolative array decode into /dev/null
299 void bic_decode_u16_rg_dry(unsigned sz,
301
302 /// Binary Interpolative array decode into bitset (32-bit based)
303 void bic_decode_u16_cm_bitset(bm::word_t* block, unsigned sz,
306
307 /// Binary Interpolative array decode into /dev/null
308 void bic_decode_u16_cm_dry(unsigned sz,
310
311private:
312 bit_in(const bit_in&);
313 bit_in& operator=(const bit_in&);
314private:
315 TDecoder& src_; ///< Source of bytes
316 unsigned used_bits_; ///< Bits used in the accumulator
317 unsigned accum_; ///< read bit accumulator
318};
319
320
321/**
322 Functor for Elias Gamma encoding
323 @ingroup gammacode
324*/
325template<typename T, typename TBitIO>
327{
328public:
329 gamma_encoder(TBitIO& bout) : bout_(bout)
330 {}
331
332 /** Encode word */
333 void operator()(T value) { bout_.gamma(value); }
334private:
336 gamma_encoder& operator=(const gamma_encoder&);
337private:
338 TBitIO& bout_;
339};
340
341
342/**
343 Elias Gamma decoder
344 @ingroup gammacode
345*/
346template<typename T, typename TBitIO>
347class gamma_decoder
348{
349public:
350 gamma_decoder(TBitIO& bin) : bin_(bin) {}
351
352 /**
353 Start encoding sequence
354 */
355 void start() {}
356
357 /**
358 Stop decoding sequence
359 */
360 void stop() {}
361
362 /**
363 Decode word
364 */
365 T operator()(void) { return (T)bin_.gamma(); }
366private:
368 gamma_decoder& operator=(const gamma_decoder&);
369private:
370 TBitIO& bin_;
371};
372
373
374// ----------------------------------------------------------------
375// Implementation details.
376// ----------------------------------------------------------------
377
378/*!
379 \fn encoder::encoder(unsigned char* buf, unsigned size)
380 \brief Construction.
381 \param buf - memory buffer pointer.
382 \param size - size of the buffer
383*/
384inline encoder::encoder(unsigned char* buf, size_t a_size) BMNOEXCEPT
385: buf_(buf), start_(buf)
386{
387 size_ = a_size;
388}
389/*!
390 \brief Encode 8-bit prefix + an array
391*/
392inline void encoder::put_prefixed_array_32(unsigned char c,
393 const bm::word_t* w,
394 unsigned count) BMNOEXCEPT
395{
396 put_8(c);
397 put_32(w, count);
398}
399
400/*!
401 \brief Encode 8-bit prefix + an array
402*/
403inline void encoder::put_prefixed_array_16(unsigned char c,
404 const bm::short_t* s,
405 unsigned count,
406 bool encode_count) BMNOEXCEPT
407{
408 put_8(c);
409 if (encode_count)
410 put_16((bm::short_t) count);
411 put_16(s, count);
412}
413
414
415/*!
416 \fn void encoder::put_8(unsigned char c)
417 \brief Puts one character into the encoding buffer.
418 \param c - character to encode
419*/
421{
422 *buf_++ = c;
423}
424
425/*!
426 \fn encoder::put_16(bm::short_t s)
427 \brief Puts short word (16 bits) into the encoding buffer.
428 \param s - short word to encode
429*/
431{
432#if (BM_UNALIGNED_ACCESS_OK == 1)
433 ::memcpy(buf_, &s, sizeof(bm::short_t)); // optimizer takes care of it
434 buf_ += sizeof(bm::short_t);
435#else
436 *buf_++ = (unsigned char) s;
437 s >>= 8;
438 *buf_++ = (unsigned char) s;
439#endif
440}
441
442/*!
443 \brief Method puts array of short words (16 bits) into the encoding buffer.
444*/
445inline void encoder::put_16(const bm::short_t* s, unsigned count) BMNOEXCEPT
446{
447#if (BM_UNALIGNED_ACCESS_OK == 1)
448 ::memcpy(buf_, s, sizeof(bm::short_t)*count);
449 buf_ += sizeof(bm::short_t) * count;
450#else
451 unsigned char* buf = buf_;
452 const bm::short_t* s_end = s + count;
453 do
454 {
455 bm::short_t w16 = *s++;
456 unsigned char a = (unsigned char) w16;
457 unsigned char b = (unsigned char) (w16 >> 8);
458
459 *buf++ = a;
460 *buf++ = b;
461
462 } while (s < s_end);
463
464 buf_ = (unsigned char*)buf;
465#endif
466}
467
468/*!
469 \brief copy bytes into target buffer or just rewind if src is NULL
470*/
471inline
472void encoder::memcpy(const unsigned char* src, size_t count) BMNOEXCEPT
473{
474 BM_ASSERT((buf_ + count) < (start_ + size_));
475 if (src)
476 ::memcpy(buf_, src, count);
477 buf_ += count;
478}
479
480
481/*!
482 \fn unsigned encoder::size() const
483 \brief Returns size of the current encoding stream.
484*/
485inline size_t encoder::size() const BMNOEXCEPT
486{
487 return size_t(buf_ - start_);
488}
489
490/**
491 \brief Get current memory stream position
492*/
494{
495 return buf_;
496}
497
498/**
499 \brief Set current memory stream position
500*/
502{
503 buf_ = buf_pos;
504}
505
506/*!
507 \fn void encoder::put_24(bm::word_t w)
508 \brief Puts 24 bits word into encoding buffer.
509 \param w - word to encode.
510*/
512{
513 BM_ASSERT((w & ~(0xFFFFFFU)) == 0);
514
515 buf_[0] = (unsigned char)w;
516 buf_[1] = (unsigned char)(w >> 8);
517 buf_[2] = (unsigned char)(w >> 16);
518 buf_ += 3;
519}
520
521
522/*!
523 \fn void encoder::put_32(bm::word_t w)
524 \brief Puts 32 bits word into encoding buffer.
525 \param w - word to encode.
526*/
528{
529#if (BM_UNALIGNED_ACCESS_OK == 1)
530 ::memcpy(buf_, &w, sizeof(bm::word_t));
531 buf_ += sizeof(bm::word_t);
532#else
533 *buf_++ = (unsigned char) w;
534 *buf_++ = (unsigned char) (w >> 8);
535 *buf_++ = (unsigned char) (w >> 16);
536 *buf_++ = (unsigned char) (w >> 24);
537#endif
538}
539
540/*!
541 \fn void encoder::put_48(bm::id64_t w)
542 \brief Puts 48 bits word into encoding buffer.
543 \param w - word to encode.
544*/
546{
547 BM_ASSERT((w & ~(0xFFFFFFFFFFFFUL)) == 0);
548 *buf_++ = (unsigned char)w;
549 *buf_++ = (unsigned char)(w >> 8);
550 *buf_++ = (unsigned char)(w >> 16);
551 *buf_++ = (unsigned char)(w >> 24);
552 *buf_++ = (unsigned char)(w >> 32);
553 *buf_++ = (unsigned char)(w >> 40);
554}
555
556
557/*!
558 \fn void encoder::put_64(bm::id64_t w)
559 \brief Puts 64 bits word into encoding buffer.
560 \param w - word to encode.
561*/
563{
564#if (BM_UNALIGNED_ACCESS_OK == 1)
565 ::memcpy(buf_, &w, sizeof(bm::id64_t));
566 buf_ += sizeof(bm::id64_t);
567#else
568 *buf_++ = (unsigned char) w;
569 *buf_++ = (unsigned char) (w >> 8);
570 *buf_++ = (unsigned char) (w >> 16);
571 *buf_++ = (unsigned char) (w >> 24);
572 *buf_++ = (unsigned char) (w >> 32);
573 *buf_++ = (unsigned char) (w >> 40);
574 *buf_++ = (unsigned char) (w >> 48);
575 *buf_++ = (unsigned char) (w >> 56);
576#endif
577}
578
579
580/*!
581 \brief Encodes array of 32-bit words
582*/
583inline void encoder::put_32(const bm::word_t* w, unsigned count) BMNOEXCEPT
584{
585#if (BM_UNALIGNED_ACCESS_OK == 1)
586 // use memcpy() because compilers now understand it as an idiom and inline
587 ::memcpy(buf_, w, sizeof(bm::word_t) * count);
588 buf_ += sizeof(bm::word_t) * count;
589#else
590 unsigned char* buf = buf_;
591 const bm::word_t* w_end = w + count;
592 do
593 {
594 bm::word_t w32 = *w++;
595 unsigned char a = (unsigned char) w32;
596 unsigned char b = (unsigned char) (w32 >> 8);
597 unsigned char c = (unsigned char) (w32 >> 16);
598 unsigned char d = (unsigned char) (w32 >> 24);
599
600 *buf++ = a;
601 *buf++ = b;
602 *buf++ = c;
603 *buf++ = d;
604 } while (w < w_end);
605
606 buf_ = (unsigned char*)buf;
607#endif
608}
609
610
611// ---------------------------------------------------------------------
612
613
614/*!
615 Load bytes from the decode buffer
616*/
617inline
618void decoder_base::memcpy(unsigned char* dst, size_t count) BMNOEXCEPT
619{
620 if (dst)
621 ::memcpy(dst, buf_, count);
622 buf_ += count;
623}
624
625/*!
626 \fn decoder::decoder(const unsigned char* buf)
627 \brief Construction
628 \param buf - pointer to the decoding memory.
629*/
630inline decoder::decoder(const unsigned char* buf) BMNOEXCEPT
631: decoder_base(buf)
632{
633}
634
635/*!
636 \fn bm::short_t decoder::get_16()
637 \brief Reads 16-bit word from the decoding buffer.
638*/
640{
641#if (BM_UNALIGNED_ACCESS_OK == 1)
642 bm::short_t a;
643 ::memcpy(&a, buf_, sizeof(bm::short_t));
644#else
645 bm::short_t a = (bm::short_t)(buf_[0] + ((bm::short_t)buf_[1] << 8));
646#endif
647 buf_ += sizeof(a);
648 return a;
649}
650
651/*!
652 \fn bm::word_t decoder::get_24()
653 \brief Reads 32-bit word from the decoding buffer.
654*/
656{
657 bm::word_t a = buf_[0] + ((unsigned)buf_[1] << 8) +
658 ((unsigned)buf_[2] << 16);
659 buf_ += 3;
660 return a;
661}
662
663
664/*!
665 \fn bm::word_t decoder::get_32()
666 \brief Reads 32-bit word from the decoding buffer.
667*/
669{
670#if (BM_UNALIGNED_ACCESS_OK == 1)
671 bm::word_t a;
672 ::memcpy(&a, buf_, sizeof(bm::word_t));
673#else
674 bm::word_t a = buf_[0]+ ((unsigned)buf_[1] << 8) +
675 ((unsigned)buf_[2] << 16) + ((unsigned)buf_[3] << 24);
676#endif
677 buf_+=sizeof(a);
678 return a;
679}
680
681/*!
682 \fn bm::word_t decoder::get_48()
683 \brief Reads 64-bit word from the decoding buffer.
684*/
685inline
687{
688 bm::id64_t a = buf_[0] +
689 ((bm::id64_t)buf_[1] << 8) +
690 ((bm::id64_t)buf_[2] << 16) +
691 ((bm::id64_t)buf_[3] << 24) +
692 ((bm::id64_t)buf_[4] << 32) +
693 ((bm::id64_t)buf_[5] << 40);
694 buf_ += 6;
695 return a;
696}
697
698/*!
699 \fn bm::word_t decoder::get_64()
700 \brief Reads 64-bit word from the decoding buffer.
701*/
702inline
704{
705#if (BM_UNALIGNED_ACCESS_OK == 1)
706 bm::id64_t a;
707 ::memcpy(&a, buf_, sizeof(bm::id64_t));
708#else
709 bm::id64_t a = buf_[0]+
710 ((bm::id64_t)buf_[1] << 8) +
711 ((bm::id64_t)buf_[2] << 16) +
712 ((bm::id64_t)buf_[3] << 24) +
713 ((bm::id64_t)buf_[4] << 32) +
714 ((bm::id64_t)buf_[5] << 40) +
715 ((bm::id64_t)buf_[6] << 48) +
716 ((bm::id64_t)buf_[7] << 56);
717#endif
718 buf_ += sizeof(a);
719 return a;
720}
721
722
723/*!
724 \fn void decoder::get_32(bm::word_t* w, unsigned count)
725 \brief Reads block of 32-bit words from the decoding buffer.
726 \param w - pointer on memory block to read into.
727 \param count - size of memory block in words.
728*/
729inline void decoder::get_32(bm::word_t* w, unsigned count) BMNOEXCEPT
730{
731 if (!w)
732 {
733 seek(int(count * sizeof(bm::word_t)));
734 return;
735 }
736#if (BM_UNALIGNED_ACCESS_OK == 1)
737 ::memcpy(w, buf_, count * sizeof(bm::word_t));
738 seek(int(count * sizeof(bm::word_t)));
739 return;
740#else
741 const unsigned char* buf = buf_;
742 const bm::word_t* w_end = w + count;
743 do
744 {
745 bm::word_t a = buf[0]+ ((unsigned)buf[1] << 8) +
746 ((unsigned)buf[2] << 16) + ((unsigned)buf[3] << 24);
747 *w++ = a;
748 buf += sizeof(a);
749 } while (w < w_end);
750 buf_ = (unsigned char*)buf;
751#endif
752}
753
754/*!
755 \brief Reads block of 32-bit words from the decoding buffer and ORs
756 to the destination
757 \param w - pointer on memory block to read into
758 \param count - should match bm::set_block_size
759*/
760inline
762{
763 if (!w)
764 {
765 seek(int(count * sizeof(bm::word_t)));
766 return false;
767 }
768#if defined(BMAVX2OPT)
769 __m256i* buf_start = (__m256i*)buf_;
770 seek(int(count * sizeof(bm::word_t)));
771 __m256i* buf_end = (__m256i*)buf_;
772
773 return bm::avx2_or_arr_unal((__m256i*)w, buf_start, buf_end);
774#elif defined(BMSSE42OPT) || defined(BMSSE2OPT)
775 __m128i* buf_start = (__m128i*)buf_;
776 seek(int(count * sizeof(bm::word_t)));
777 __m128i* buf_end = (__m128i*)buf_;
778
779 return bm::sse2_or_arr_unal((__m128i*)w, buf_start, buf_end);
780#else
781 bm::word_t acc = 0;
782 const bm::word_t not_acc = acc = ~acc;
783
784 for (unsigned i = 0; i < count; i+=4)
785 {
786 acc &= (w[i+0] |= get_32());
787 acc &= (w[i+1] |= get_32());
788 acc &= (w[i+2] |= get_32());
789 acc &= (w[i+3] |= get_32());
790 }
791 return acc == not_acc;
792#endif
793}
794
795/*!
796 \brief Reads block of 32-bit words from the decoding buffer and ANDs
797 to the destination
798 \param w - pointer on memory block to read into
799 \param count - should match bm::set_block_size
800*/
801inline
803{
804 if (!w)
805 {
806 seek(int(count * sizeof(bm::word_t)));
807 return;
808 }
809#if defined(BMAVX2OPT)
810 __m256i* buf_start = (__m256i*)buf_;
811 seek(int(count * sizeof(bm::word_t)));
812 __m256i* buf_end = (__m256i*)buf_;
813
814 bm::avx2_and_arr_unal((__m256i*)w, buf_start, buf_end);
815#elif defined(BMSSE42OPT) || defined(BMSSE2OPT)
816 __m128i* buf_start = (__m128i*)buf_;
817 seek(int(count * sizeof(bm::word_t)));
818 __m128i* buf_end = (__m128i*)buf_;
819
820 bm::sse2_and_arr_unal((__m128i*)w, buf_start, buf_end);
821#else
822 for (unsigned i = 0; i < count; i+=4)
823 {
824 w[i+0] &= get_32();
825 w[i+1] &= get_32();
826 w[i+2] &= get_32();
827 w[i+3] &= get_32();
828 }
829#endif
830}
831
832
833
834/*!
835 \fn void decoder::get_16(bm::short_t* s, unsigned count)
836 \brief Reads block of 32-bit words from the decoding buffer.
837 \param s - pointer on memory block to read into.
838 \param count - size of memory block in words.
839*/
840inline void decoder::get_16(bm::short_t* s, unsigned count) BMNOEXCEPT
841{
842 if (!s)
843 {
844 seek(int(count * sizeof(bm::short_t)));
845 return;
846 }
847#if (BM_UNALIGNED_ACCESS_OK == 1)
848 ::memcpy(s, buf_, sizeof(bm::short_t) * count);
849 buf_ += sizeof(bm::short_t) * count;
850#else
851 const unsigned char* buf = buf_;
852 const bm::short_t* s_end = s + count;
853 do
854 {
855 bm::short_t a = (bm::short_t)(buf[0] + ((bm::short_t)buf[1] << 8));
856 *s++ = a;
857 buf += sizeof(a);
858 } while (s < s_end);
859 buf_ = (unsigned char*)buf;
860#endif
861
862}
863
864
865
866// ---------------------------------------------------------------------
867
868inline decoder_little_endian::decoder_little_endian(const unsigned char* buf)
869: decoder_base(buf)
870{
871}
872
873inline
875{
878 bm::short_t a = bm::short_t((v1 << 8) + v2);
879 buf_ += sizeof(a);
880 return a;
881}
882
884{
885 // TODO: validate if this is a correct for cross endian opts
886 bm::word_t a = buf_[0] + ((unsigned)buf_[1] << 8) +
887 ((unsigned)buf_[2] << 16);
888 buf_ += 3;
889 return a;
890}
891
892inline
894{
895 bm::word_t a = ((unsigned)buf_[0] << 24)+ ((unsigned)buf_[1] << 16) +
896 ((unsigned)buf_[2] << 8) + ((unsigned)buf_[3]);
897 buf_+=sizeof(a);
898 return a;
899}
900
901inline
903{
904 bm::id64_t a = buf_[0] +
905 ((bm::id64_t)buf_[1] << 8) +
906 ((bm::id64_t)buf_[2] << 16) +
907 ((bm::id64_t)buf_[3] << 24) +
908 ((bm::id64_t)buf_[4] << 32) +
909 ((bm::id64_t)buf_[5] << 40);
910 buf_ += 6;
911 return a;
912}
913
914inline
916{
917 bm::id64_t a = buf_[0]+
918 ((bm::id64_t)buf_[1] << 56) +
919 ((bm::id64_t)buf_[2] << 48) +
920 ((bm::id64_t)buf_[3] << 40) +
921 ((bm::id64_t)buf_[4] << 32) +
922 ((bm::id64_t)buf_[5] << 24) +
923 ((bm::id64_t)buf_[6] << 16) +
924 ((bm::id64_t)buf_[7] << 8);
925 buf_+=sizeof(a);
926 return a;
927}
928
929inline
931{
932 if (!w)
933 {
934 seek(int(count * sizeof(bm::word_t)));
935 return;
936 }
937
938 const unsigned char* buf = buf_;
939 const bm::word_t* w_end = w + count;
940 do
941 {
942 bm::word_t a = ((unsigned)buf[0] << 24)+ ((unsigned)buf[1] << 16) +
943 ((unsigned)buf[2] << 8) + ((unsigned)buf[3]);
944 *w++ = a;
945 buf += sizeof(a);
946 } while (w < w_end);
947 buf_ = (unsigned char*)buf;
948}
949
950inline
952{
953 if (!w)
954 {
955 seek(int(count * sizeof(bm::word_t)));
956 return false;
957 }
958
959 bm::word_t acc = 0;
960 const bm::word_t not_acc = acc = ~acc;
961
962 for (unsigned i = 0; i < count; i+=4)
963 {
964 acc &= (w[i+0] |= get_32());
965 acc &= (w[i+1] |= get_32());
966 acc &= (w[i+2] |= get_32());
967 acc &= (w[i+3] |= get_32());
968 }
969 return acc == not_acc;
970}
971
972inline
974{
975 for (unsigned i = 0; i < count; i+=4)
976 {
977 w[i+0] &= get_32();
978 w[i+1] &= get_32();
979 w[i+2] &= get_32();
980 w[i+3] &= get_32();
981 }
982}
983
984
985inline
987{
988 if (!s)
989 {
990 seek(int(count * sizeof(bm::short_t)));
991 return;
992 }
993
994 const unsigned char* buf = buf_;
995 const bm::short_t* s_end = s + count;
996 do
997 {
1000 bm::short_t a = bm::short_t((v1 << 8) + v2);
1001 *s++ = a;
1002 buf += sizeof(a);
1003 } while (s < s_end);
1004 buf_ = (unsigned char*)buf;
1005}
1006
1007// ----------------------------------------------------------------------
1008//
1009
1010template<typename TEncoder>
1012{
1013 BM_ASSERT(value <= 1);
1014 accum_ |= (value << used_bits_);
1015 if (++used_bits_ == (sizeof(accum_) * 8))
1016 flush_accum();
1017}
1018
1019// ----------------------------------------------------------------------
1020
1021template<typename TEncoder>
1022void bit_out<TEncoder>::put_bits(unsigned value, unsigned count) BMNOEXCEPT
1023{
1024 unsigned used = used_bits_;
1025 unsigned acc = accum_;
1026
1027 {
1028 unsigned mask = ~0u;
1029 mask >>= (sizeof(accum_) * 8) - count;
1030 value &= mask;
1031 }
1032 for (;count;)
1033 {
1034 unsigned free_bits = unsigned(sizeof(accum_) * 8) - used;
1035 BM_ASSERT(free_bits);
1036 acc |= value << used;
1037
1038 if (count <= free_bits)
1039 {
1040 used += count;
1041 break;
1042 }
1043 else
1044 {
1045 value >>= free_bits;
1046 count -= free_bits;
1047 dest_.put_32(acc);
1048 acc = used = 0;
1049 continue;
1050 }
1051 }
1052 if (used == (sizeof(accum_) * 8))
1053 {
1054 dest_.put_32(acc);
1055 acc = used = 0;
1056 }
1057 used_bits_ = used;
1058 accum_ = acc;
1059}
1060
1061// ----------------------------------------------------------------------
1062
1063template<typename TEncoder>
1065{
1066 if (++used_bits_ == (sizeof(accum_) * 8))
1067 flush_accum();
1068}
1069
1070// ----------------------------------------------------------------------
1071
1072template<typename TEncoder>
1074{
1075 unsigned used = used_bits_;
1076 unsigned free_bits = (sizeof(accum_) * 8) - used;
1077 if (count >= free_bits)
1078 {
1079 flush_accum();
1080 count -= free_bits;
1081 used = 0;
1082
1083 for ( ;count >= sizeof(accum_) * 8; count -= sizeof(accum_) * 8)
1084 {
1085 dest_.put_32(0);
1086 }
1087 used += count;
1088 }
1089 else
1090 {
1091 used += count;
1092 }
1093 accum_ |= (1u << used);
1094 if (++used == (sizeof(accum_) * 8))
1095 flush_accum();
1096 else
1097 used_bits_ = used;
1098}
1099
1100// ----------------------------------------------------------------------
1101
1102template<typename TEncoder>
1104{
1105 BM_ASSERT(value);
1106
1107 unsigned logv = bm::bit_scan_reverse32(value);
1108
1109 // Put zeroes + 1 bit
1110
1111 unsigned used = used_bits_;
1112 unsigned acc = accum_;
1113 const unsigned acc_bits = (sizeof(acc) * 8);
1114 unsigned free_bits = acc_bits - used;
1115
1116 {
1117 unsigned count = logv;
1118 if (count >= free_bits)
1119 {
1120 dest_.put_32(acc);
1121 acc = used ^= used;
1122 count -= free_bits;
1123
1124 for ( ;count >= acc_bits; count -= acc_bits)
1125 {
1126 dest_.put_32(0);
1127 }
1128 used += count;
1129 }
1130 else
1131 {
1132 used += count;
1133 }
1134 acc |= (1 << used);
1135 if (++used == acc_bits)
1136 {
1137 dest_.put_32(acc);
1138 acc = used ^= used;
1139 }
1140 }
1141
1142 // Put the value bits
1143 //
1144 {
1145 unsigned mask = (~0u);
1146 mask >>= acc_bits - logv;
1147 value &= mask;
1148 }
1149 for (;logv;)
1150 {
1151 acc |= value << used;
1152 free_bits = acc_bits - used;
1153 if (logv <= free_bits)
1154 {
1155 used += logv;
1156 break;
1157 }
1158 else
1159 {
1160 value >>= free_bits;
1161 logv -= free_bits;
1162 dest_.put_32(acc);
1163 acc = used ^= used;
1164 continue;
1165 }
1166 } // for
1167
1168 used_bits_ = used;
1169 accum_ = acc;
1170}
1171
1172// ----------------------------------------------------------------------
1173
1174template<typename TEncoder>
1176 const bm::gap_word_t* arr,
1177 unsigned sz,
1179{
1180 for (;sz;)
1181 {
1182 BM_ASSERT(lo <= hi);
1183 unsigned mid_idx = sz >> 1;
1184 bm::gap_word_t val = arr[mid_idx];
1185
1186 // write the interpolated value
1187 // write(x, r) where x=(arr[mid] - lo - mid) r=(hi - lo - sz + 1);
1188 {
1189 unsigned r = hi - lo - sz + 1;
1190 if (r)
1191 {
1192 unsigned value = val - lo - mid_idx;
1193 unsigned logv = bm::bit_scan_reverse32(r);
1194 put_bits(value, logv+1);
1195 }
1196 }
1197
1198 bic_encode_u16_rg(arr, mid_idx, lo, gap_word_t(val-1));
1199 // tail recursion
1200 // bic_encode_u16(arr + mid_idx + 1, sz - mid_idx - 1, gap_word_t(val+1), hi);
1201 arr += mid_idx + 1;
1202 sz -= mid_idx + 1;
1203 lo = gap_word_t(val + 1);
1204 } // for sz
1205}
1206
1207// ----------------------------------------------------------------------
1208
1209template<typename TEncoder>
1211 unsigned sz,
1212 bm::word_t lo,
1214{
1215 for (;sz;)
1216 {
1217 BM_ASSERT(lo <= hi);
1218 unsigned mid_idx = sz >> 1;
1219 bm::word_t val = arr[mid_idx];
1220
1221 // write the interpolated value
1222 // write(x, r) where x=(arr[mid] - lo - mid) r=(hi - lo - sz + 1);
1223 {
1224 unsigned r = hi - lo - sz + 1;
1225 if (r)
1226 {
1227 unsigned value = val - lo - mid_idx;
1228
1229 unsigned n = r + 1;
1230 unsigned logv = bm::bit_scan_reverse32(n);
1231 unsigned c = (unsigned)(1ull << (logv + 1)) - n;
1232 int64_t half_c = c >> 1; // c / 2;
1233 int64_t half_r = r >> 1; // r / 2;
1234 int64_t lo1 = half_r - half_c;
1235 int64_t hi1 = half_r + half_c + 1;
1236 lo1 -= (n & 1);
1237 logv += (value <= lo1 || value >= hi1);
1238
1239 put_bits(value, logv);
1240 }
1241 }
1242
1243 bic_encode_u32_cm(arr, mid_idx, lo, val-1);
1244 // tail recursive call:
1245 // bic_encode_u32_cm(arr + mid_idx + 1, sz - mid_idx - 1, val+1, hi);
1246 arr += mid_idx + 1;
1247 sz -= mid_idx + 1;
1248 lo = val + 1;
1249 } // for sz
1250}
1251
1252// ----------------------------------------------------------------------
1253
1254#if 0
1255/**
1256 Shuffle structure for BIC
1257 @internal
1258*/
1259template<unsigned N>
1260struct bic_encode_stack_u16
1261{
1262 bm::gap_word_t val_[N];
1263 bm::gap_word_t mid_[N];
1264 bm::gap_word_t lo_[N];
1265 bm::gap_word_t r_[N];
1266
1267 unsigned stack_size_ = 0;
1268
1269 void bic_shuffle(const bm::gap_word_t* arr,
1271 {
1272 for (;sz;)
1273 {
1274 BM_ASSERT(lo <= hi);
1275 bm::gap_word_t mid_idx = sz >> 1;
1276 bm::gap_word_t val = arr[mid_idx];
1277 unsigned r = hi - lo - sz + 1;
1278 if (r)
1279 {
1280 unsigned s = stack_size_++;
1281 r_[s] = (bm::gap_word_t)r;
1282 val_[s] = val;
1283 mid_[s] = mid_idx;
1284 lo_[s] = lo;
1285 }
1286
1287 bic_shuffle(arr, mid_idx, lo, bm::gap_word_t(val-1));
1288 // tail recursive call:
1289 // bic_shuffle(arr + mid_idx + 1, sz - mid_idx - 1, val+1, hi);
1290 arr += mid_idx + 1;
1291 sz -= mid_idx + 1;
1292 lo = bm::gap_word_t(val + 1);
1293 } // for sz
1294 }
1295};
1296
1297template<typename TEncoder>
1299 unsigned sz_i,
1300 bm::gap_word_t lo_i,
1302{
1303 BM_ASSERT(sz_i <= 65535);
1304
1305 bic_encode_stack_u16<bm::bie_cut_off> u16_stack;
1306 // BIC re-ordering
1307 u16_stack.bic_shuffle(arr, bm::gap_word_t(sz_i), lo_i, hi_i);
1308
1309 BM_ASSERT(sz_i == u16_stack.stack_size_);
1310 for (unsigned i = 0; i < sz_i; ++i)
1311 {
1312 bm::gap_word_t val = u16_stack.val_[i];
1313 bm::gap_word_t mid_idx = u16_stack.mid_[i];
1314 bm::gap_word_t lo = u16_stack.lo_[i];
1315 unsigned r = u16_stack.r_[i];
1316
1317 unsigned value = val - lo - mid_idx;
1318 unsigned n = r + 1;
1319 unsigned logv = bm::bit_scan_reverse32(n);
1320 unsigned c = (unsigned)(1ull << (logv + 1)) - n;
1321
1322 int64_t half_c = c >> 1; // c / 2;
1323 int64_t half_r = r >> 1; // r / 2;
1324 int64_t lo1 = half_r - half_c;
1325 int64_t hi1 = half_r + half_c + 1;
1326 lo1 -= (n & 1);
1327 logv += (value <= lo1 || value >= hi1);
1328
1329 put_bits(value, logv);
1330 } // for i
1331}
1332#endif
1333
1334
1335template<typename TEncoder>
1337 unsigned sz,
1338 bm::gap_word_t lo,
1340{
1341 for (;sz;)
1342 {
1343 BM_ASSERT(lo <= hi);
1344 unsigned mid_idx = sz >> 1;
1345 bm::gap_word_t val = arr[mid_idx];
1346
1347 // write the interpolated value
1348 // write(x, r) where x=(arr[mid] - lo - mid) r=(hi - lo - sz + 1);
1349 {
1350 unsigned r = hi - lo - sz + 1;
1351 if (r)
1352 {
1353 unsigned value = val - lo - mid_idx;
1354
1355 unsigned n = r + 1;
1356 unsigned logv = bm::bit_scan_reverse32(n);
1357 unsigned c = (unsigned)(1ull << (logv + 1)) - n;
1358 unsigned half_c = c >> 1; // c / 2;
1359 unsigned half_r = r >> 1; // r / 2;
1360 int64_t lo1 = (int64_t(half_r) - half_c - (n & 1u));
1361 unsigned hi1 = (half_r + half_c);
1362 logv += (value <= lo1 || value > hi1);
1363
1364 put_bits(value, logv);
1365 }
1366 }
1367
1368 bic_encode_u16_cm(arr, mid_idx, lo, bm::gap_word_t(val-1));
1369 // tail recursive call:
1370 // bic_encode_u32_cm(arr + mid_idx + 1, sz - mid_idx - 1, val+1, hi);
1371 arr += ++mid_idx;
1372 sz -= mid_idx;
1373 lo = bm::gap_word_t(val + 1);
1374 } // for sz
1375}
1376
1377
1378
1379
1380
1381
1382// ----------------------------------------------------------------------
1383//
1384// ----------------------------------------------------------------------
1385
1386
1387template<class TDecoder>
1389 bm::gap_word_t lo,
1391{
1392 for (;sz;)
1393 {
1394 BM_ASSERT(lo <= hi);
1395
1396 unsigned val;
1397 // read the value
1398 {
1399 unsigned r = hi - lo - sz + 1;
1400 if (r)
1401 {
1402 unsigned logv = bm::bit_scan_reverse32(r) + 1;
1403 val = get_bits(logv);
1404 BM_ASSERT(val <= r);
1405 }
1406 else
1407 {
1408 val = 0;
1409 }
1410 }
1411 unsigned mid_idx = sz >> 1;
1412 val += lo + mid_idx;
1413
1414 BM_ASSERT(val < 65536);
1415 BM_ASSERT(mid_idx < 65536);
1416
1417 arr[mid_idx] = bm::gap_word_t(val);
1418 if (sz == 1)
1419 return;
1420 bic_decode_u16_rg(arr, mid_idx, lo, bm::gap_word_t(val - 1));
1421 //bic_decode_u16(arr + mid_idx + 1, sz - mid_idx - 1, bm::gap_word_t(val + 1), hi);
1422 arr += mid_idx + 1;
1423 sz -= mid_idx + 1;
1424 lo = bm::gap_word_t(val + 1);
1425 } // for sz
1426}
1427
1428// ----------------------------------------------------------------------
1429
1430template<class TDecoder>
1432 bm::word_t lo,
1434{
1435 for (;sz;)
1436 {
1437 BM_ASSERT(lo <= hi);
1438
1439 unsigned val;
1440
1441 // read the interpolated value
1442 // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1443 {
1444 unsigned r = hi - lo - sz + 1;
1445 if (r)
1446 {
1447 unsigned logv = bm::bit_scan_reverse32(r+1);
1448
1449 unsigned c = unsigned((1ull << (logv + 1)) - r - 1);
1450 int64_t half_c = c >> 1; // c / 2;
1451 int64_t half_r = r >> 1; // r / 2;
1452 int64_t lo1 = half_r - half_c - ((r + 1) & 1);
1453 int64_t hi1 = half_r + half_c + 1;
1454 val = get_bits(logv);
1455 if (val <= lo1 || val >= hi1)
1456 val += (get_bits(1) << logv);
1457 BM_ASSERT(val <= r);
1458 }
1459 else
1460 {
1461 val = 0;
1462 }
1463 }
1464
1465 unsigned mid_idx = sz >> 1;
1466 val += lo + mid_idx;
1467 arr[mid_idx] = val;
1468 if (sz == 1)
1469 return;
1470
1471 bic_decode_u32_cm(arr, mid_idx, lo, val-1);
1472 // tail recursive call:
1473 // bic_decode_u32_cm(arr + mid_idx + 1, sz - mid_idx - 1, val + 1, hi);
1474 arr += mid_idx + 1;
1475 sz -= mid_idx + 1;
1476 lo = val + 1;
1477 } // for sz
1478}
1479
1480// ----------------------------------------------------------------------
1481
1482template<class TDecoder>
1484 bm::gap_word_t lo,
1486{
1487 for (;sz;)
1488 {
1489 BM_ASSERT(lo <= hi);
1490
1491 unsigned val;
1492
1493 // read the interpolated value
1494 // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1495 {
1496 unsigned r = hi - lo - sz + 1;
1497 if (r)
1498 {
1499 unsigned logv = bm::bit_scan_reverse32(r+1);
1500
1501 unsigned c = unsigned((1ull << (logv + 1)) - r - 1);
1502 int64_t half_c = c >> 1; // c / 2;
1503 int64_t half_r = r >> 1; // r / 2;
1504 int64_t lo1 = half_r - half_c - ((r + 1) & 1);
1505 int64_t hi1 = half_r + half_c + 1;
1506 val = get_bits(logv);
1507 if (val <= lo1 || val >= hi1)
1508 val += (get_bits(1) << logv);
1509 BM_ASSERT(val <= r);
1510 }
1511 else
1512 {
1513 val = 0;
1514 }
1515 }
1516
1517 unsigned mid_idx = sz >> 1;
1518 val += lo + mid_idx;
1519 arr[mid_idx] = bm::gap_word_t(val);
1520 if (sz == 1)
1521 return;
1522
1523 bic_decode_u16_cm(arr, mid_idx, lo, bm::gap_word_t(val-1));
1524 // tail recursive call:
1525 // bic_decode_u32_cm(arr + mid_idx + 1, sz - mid_idx - 1, val + 1, hi);
1526 arr += mid_idx + 1;
1527 sz -= mid_idx + 1;
1528 lo = bm::gap_word_t(val + 1);
1529 } // for sz
1530}
1531
1532// ----------------------------------------------------------------------
1533
1534template<class TDecoder>
1536 bm::gap_word_t lo,
1538{
1539 for (;sz;)
1540 {
1541 BM_ASSERT(lo <= hi);
1542
1543 unsigned val;
1544
1545 // read the interpolated value
1546 // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1547 {
1548 unsigned r = hi - lo - sz + 1;
1549 if (r)
1550 {
1551 unsigned logv = bm::bit_scan_reverse32(r+1);
1552
1553 unsigned c = unsigned((1ull << (logv + 1)) - r - 1);
1554 int64_t half_c = c >> 1; // c / 2;
1555 int64_t half_r = r >> 1; // r / 2;
1556 int64_t lo1 = half_r - half_c - ((r + 1) & 1);
1557 int64_t hi1 = half_r + half_c + 1;
1558 val = get_bits(logv);
1559 if (val <= lo1 || val >= hi1)
1560 val += (get_bits(1) << logv);
1561 BM_ASSERT(val <= r);
1562 }
1563 else
1564 {
1565 val = 0;
1566 }
1567 }
1568
1569 unsigned mid_idx = sz >> 1;
1570 val += lo + mid_idx;
1571
1572 // set bit in the target block
1573 {
1574 unsigned nword = (val >> bm::set_word_shift);
1575 block[nword] |= (1u << (val & bm::set_word_mask));
1576 }
1577
1578 if (sz == 1)
1579 return;
1580
1581 bic_decode_u16_cm_bitset(block, mid_idx, lo, bm::gap_word_t(val-1));
1582 // tail recursive call:
1583 // bic_decode_u32_cm(block, sz - mid_idx - 1, val + 1, hi);
1584 sz -= mid_idx + 1;
1585 lo = bm::gap_word_t(val + 1);
1586 } // for sz
1587}
1588
1589// ----------------------------------------------------------------------
1590
1591template<class TDecoder>
1593 bm::gap_word_t lo,
1595{
1596 for (;sz;)
1597 {
1598 BM_ASSERT(lo <= hi);
1599
1600 unsigned val;
1601
1602 // read the interpolated value
1603 // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1604 {
1605 unsigned r = hi - lo - sz + 1;
1606 if (r)
1607 {
1608 unsigned logv = bm::bit_scan_reverse32(r+1);
1609
1610 unsigned c = unsigned((1ull << (logv + 1)) - r - 1);
1611 int64_t half_c = c >> 1; // c / 2;
1612 int64_t half_r = r >> 1; // r / 2;
1613 int64_t lo1 = half_r - half_c - ((r + 1) & 1);
1614 int64_t hi1 = half_r + half_c + 1;
1615 val = get_bits(logv);
1616 if (val <= lo1 || val >= hi1)
1617 val += (get_bits(1) << logv);
1618 BM_ASSERT(val <= r);
1619 }
1620 else
1621 {
1622 val = 0;
1623 }
1624 }
1625
1626 unsigned mid_idx = sz >> 1;
1627 val += lo + mid_idx;
1628
1629 if (sz == 1)
1630 return;
1631
1632 bic_decode_u16_cm_dry(mid_idx, lo, bm::gap_word_t(val-1));
1633 // tail recursive call:
1634 // bic_decode_u32_cm_dry(sz - mid_idx - 1, val + 1, hi);
1635 sz -= mid_idx + 1;
1636 lo = bm::gap_word_t(val + 1);
1637 } // for sz
1638}
1639
1640
1641// ----------------------------------------------------------------------
1642
1643template<class TDecoder>
1645 bm::gap_word_t lo,
1647{
1648 for (;sz;)
1649 {
1650 BM_ASSERT(lo <= hi);
1651
1652 unsigned val;
1653 // read the value
1654 {
1655 unsigned r = hi - lo - sz + 1;
1656 if (r)
1657 {
1658 unsigned logv = bm::bit_scan_reverse32(r) + 1;
1659 val = get_bits(logv);
1660 BM_ASSERT(val <= r);
1661 }
1662 else
1663 {
1664 val = 0;
1665 }
1666 }
1667 unsigned mid_idx = sz >> 1;
1668 val += lo + mid_idx;
1669 BM_ASSERT(val < 65536);
1670 BM_ASSERT(mid_idx < 65536);
1671
1672 // set bit in the target block
1673 {
1674 unsigned nword = (val >> bm::set_word_shift);
1675 block[nword] |= (1u << (val & bm::set_word_mask));
1676 }
1677
1678 if (sz == 1)
1679 return;
1680 bic_decode_u16_rg_bitset(block, mid_idx, lo, bm::gap_word_t(val - 1));
1681 // tail recursion of:
1682 //bic_decode_u16_bitset(block, sz - mid_idx - 1, bm::gap_word_t(val + 1), hi);
1683 sz -= mid_idx + 1;
1684 lo = bm::gap_word_t(val + 1);
1685 } // for sz
1686}
1687
1688// ----------------------------------------------------------------------
1689
1690template<class TDecoder>
1692 bm::gap_word_t lo,
1694{
1695 for (;sz;)
1696 {
1697 BM_ASSERT(lo <= hi);
1698
1699 unsigned val;
1700 // read the value
1701 {
1702 unsigned r = hi - lo - sz + 1;
1703 if (r)
1704 {
1705 unsigned logv = bm::bit_scan_reverse32(r) + 1;
1706 val = get_bits(logv);
1707 BM_ASSERT(val <= r);
1708 }
1709 else
1710 {
1711 val = 0;
1712 }
1713 }
1714 unsigned mid_idx = sz >> 1;
1715 val += lo + mid_idx;
1716 BM_ASSERT(val < 65536);
1717 BM_ASSERT(mid_idx < 65536);
1718
1719 if (sz == 1)
1720 return;
1721 bic_decode_u16_rg_dry(mid_idx, lo, bm::gap_word_t(val - 1));
1722 sz -= mid_idx + 1;
1723 lo = bm::gap_word_t(val + 1);
1724 } // for sz
1725}
1726
1727
1728
1729// ----------------------------------------------------------------------
1730
1731template<class TDecoder>
1733{
1734 unsigned acc = accum_;
1735 unsigned used = used_bits_;
1736
1737 if (used == (sizeof(acc) * 8))
1738 {
1739 acc = src_.get_32();
1740 used ^= used;
1741 }
1742 unsigned zero_bits = 0;
1743 while (true)
1744 {
1745 if (acc == 0)
1746 {
1747 zero_bits = unsigned(zero_bits +(sizeof(acc) * 8) - used);
1748 used = 0;
1749 acc = src_.get_32();
1750 continue;
1751 }
1752 unsigned first_bit_idx =
1753 #if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER))
1754 bm::bsf_asm32(acc);
1755 #else
1756 bm::bit_scan_fwd(acc);
1757 #endif
1758 acc >>= first_bit_idx;
1759 zero_bits += first_bit_idx;
1760 used += first_bit_idx;
1761 break;
1762 } // while
1763
1764 // eat the border bit
1765 //
1766 if (used == (sizeof(acc) * 8))
1767 {
1768 acc = src_.get_32();
1769 used = 1;
1770 }
1771 else
1772 {
1773 ++used;
1774 }
1775 acc >>= 1;
1776
1777 // get the value
1778 unsigned current;
1779
1780 unsigned free_bits = unsigned((sizeof(acc) * 8) - used);
1781 if (zero_bits <= free_bits)
1782 {
1783 take_accum:
1784 current =
1785 (acc & block_set_table<true>::_left[zero_bits]) | (1 << zero_bits);
1786 acc >>= zero_bits;
1787 used += zero_bits;
1788 goto ret;
1789 }
1790
1791 if (used == (sizeof(acc) * 8))
1792 {
1793 acc = src_.get_32();
1794 used ^= used;
1795 goto take_accum;
1796 }
1797
1798 // take the part
1799 current = acc;
1800 // read the next word
1801 acc = src_.get_32();
1802 used = zero_bits - free_bits;
1803 current |=
1804 ((acc & block_set_table<true>::_left[used]) << free_bits) |
1805 (1 << zero_bits);
1806
1807 acc >>= used;
1808ret:
1809 accum_ = acc;
1810 used_bits_ = used;
1811 return current;
1812}
1813
1814// ----------------------------------------------------------------------
1815
1816template<class TDecoder>
1818{
1819 BM_ASSERT(count);
1820 const unsigned maskFF = ~0u;
1821 unsigned acc = accum_;
1822 unsigned used = used_bits_;
1823
1824 unsigned value = 0;
1825 unsigned free_bits = unsigned((sizeof(acc) * 8) - used);
1826 if (count <= free_bits)
1827 {
1828 take_accum:
1829 value = acc & (maskFF >> (32 - count));
1830 acc >>= count;
1831 used += count;
1832 goto ret;
1833 }
1834 if (used == (sizeof(acc) * 8))
1835 {
1836 acc = src_.get_32();
1837 used = 0;
1838 goto take_accum;
1839 }
1840 value = acc;
1841 acc = src_.get_32();
1842 used = count - free_bits;
1843 value |= ((acc & (maskFF >> (32 - used))) << free_bits);
1844 acc >>= used;
1845ret:
1846 accum_ = acc;
1847 used_bits_ = used;
1848 return value;
1849}
1850
1851// ----------------------------------------------------------------------
1852
1853} // namespace bm
1854
1855#ifdef _MSVC_VER
1856#pragma warning(pop)
1857#endif
1858
1859#endif
#define BMNOEXCEPT
Definition bmdef.h:79
#define BMFORCEINLINE
Definition bmdef.h:203
#define BM_ASSERT
Definition bmdef.h:130
Bit manipulation primitives (internal)
Byte based reader for un-aligned bit streaming.
Definition encoding.h:249
unsigned gamma() BMNOEXCEPT
decode unsigned value using Elias Gamma coding
Definition encoding.h:1732
void bic_decode_u16_cm(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode.
Definition encoding.h:1483
void bic_decode_u16_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Definition encoding.h:270
void bic_decode_u16_cm_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode into /dev/null.
Definition encoding.h:1592
bit_in(TDecoder &decoder) BMNOEXCEPT
Definition encoding.h:251
void bic_decode_u32_cm(bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative array decode (32-bit)
Definition encoding.h:1431
void bic_decode_u16_rg(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode.
Definition encoding.h:1388
void bic_decode_u16_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Definition encoding.h:275
void bic_decode_u16_rg_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode into /dev/null.
Definition encoding.h:1691
unsigned get_bits(unsigned count) BMNOEXCEPT
read number of bits out of the stream
Definition encoding.h:1817
void bic_decode_u16_cm_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode into bitset (32-bit based)
Definition encoding.h:1535
void bic_decode_u16_rg_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode into bitset (32-bit based)
Definition encoding.h:1644
Byte based writer for un-aligned bit streaming.
Definition encoding.h:175
void bic_encode_u16_rg(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative encoding (array of 16-bit ints)
Definition encoding.h:1175
void flush() BMNOEXCEPT
Flush the incomplete 32-bit accumulator word.
Definition encoding.h:222
bit_out(TEncoder &dest)
Definition encoding.h:177
void put_zero_bits(unsigned count) BMNOEXCEPT
issue specified number of 0s
Definition encoding.h:1073
void bic_encode_u32_cm(const bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative encoding (array of 32-bit ints) cm - "center-minimal".
Definition encoding.h:1210
void put_bit(unsigned value) BMNOEXCEPT
issue single bit into encode bit-stream
Definition encoding.h:1011
void put_bits(unsigned value, unsigned count) BMNOEXCEPT
issue count bits out of value
Definition encoding.h:1022
void gamma(unsigned value) BMNOEXCEPT
Elias Gamma encode the specified value.
Definition encoding.h:1103
void put_zero_bit() BMNOEXCEPT
issue 0 into output stream
Definition encoding.h:1064
void bic_encode_u16_cm(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative encoding (array of 16-bit ints) cm - "center-minimal".
Definition encoding.h:1336
Base class for all decoding functionality.
Definition encoding.h:84
const unsigned char * start_
Definition encoding.h:107
decoder_base(const unsigned char *buf) BMNOEXCEPT
Definition encoding.h:86
const unsigned char * buf_
Definition encoding.h:106
const unsigned char * get_pos() const BMNOEXCEPT
Return current buffer pointer.
Definition encoding.h:101
void seek(int delta) BMNOEXCEPT
change current position
Definition encoding.h:95
size_t size() const BMNOEXCEPT
Returns size of the current decoding stream.
Definition encoding.h:92
unsigned char get_8() BMNOEXCEPT
Reads character from the decoding buffer.
Definition encoding.h:89
void memcpy(unsigned char *dst, size_t count) BMNOEXCEPT
read bytes from the decode buffer
Definition encoding.h:618
void set_pos(const unsigned char *pos) BMNOEXCEPT
Set current buffer pointer.
Definition encoding.h:104
Class for decoding data from memory buffer.
Definition encoding.h:152
decoder_little_endian(const unsigned char *buf)
Definition encoding.h:868
bool get_32_OR(bm::word_t *w, unsigned count)
Definition encoding.h:951
void get_32_AND(bm::word_t *w, unsigned count)
Definition encoding.h:973
Class for decoding data from memory buffer.
Definition encoding.h:118
bm::word_t get_32() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
Definition encoding.h:668
bm::word_t get_24() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
Definition encoding.h:655
bool get_32_OR(bm::word_t *w, unsigned count) BMNOEXCEPT
Reads block of 32-bit words from the decoding buffer and ORs to the destination.
Definition encoding.h:761
bm::id64_t get_64() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
Definition encoding.h:703
void get_32_AND(bm::word_t *w, unsigned count) BMNOEXCEPT
Reads block of 32-bit words from the decoding buffer and ANDs to the destination.
Definition encoding.h:802
bm::short_t get_16() BMNOEXCEPT
Reads 16-bit word from the decoding buffer.
Definition encoding.h:639
decoder(const unsigned char *buf) BMNOEXCEPT
Construction.
Definition encoding.h:630
bm::id64_t get_48() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
Definition encoding.h:686
Memory encoding.
Definition encoding.h:50
size_t size() const BMNOEXCEPT
Returns size of the current encoding stream.
Definition encoding.h:485
void put_48(bm::id64_t w) BMNOEXCEPT
Puts 48 bits word into encoding buffer.
Definition encoding.h:545
unsigned char * get_pos() const BMNOEXCEPT
Get current memory stream position.
Definition encoding.h:493
void put_64(bm::id64_t w) BMNOEXCEPT
Puts 64 bits word into encoding buffer.
Definition encoding.h:562
encoder(unsigned char *buf, size_t size) BMNOEXCEPT
Construction.
Definition encoding.h:384
void put_8(unsigned char c) BMNOEXCEPT
Puts one character into the encoding buffer.
Definition encoding.h:420
void set_pos(unsigned char *buf_pos) BMNOEXCEPT
Set current memory stream position.
Definition encoding.h:501
void put_prefixed_array_16(unsigned char c, const bm::short_t *s, unsigned count, bool encode_count) BMNOEXCEPT
Encode 8-bit prefix + an array.
Definition encoding.h:403
unsigned char * position_type
Definition encoding.h:52
void memcpy(const unsigned char *src, size_t count) BMNOEXCEPT
copy bytes into target buffer or just rewind if src is NULL
Definition encoding.h:472
void put_32(bm::word_t w) BMNOEXCEPT
Puts 32 bits word into encoding buffer.
Definition encoding.h:527
void put_24(bm::word_t w) BMNOEXCEPT
Puts 24 bits word into encoding buffer.
Definition encoding.h:511
void put_16(bm::short_t s) BMNOEXCEPT
Puts short word (16 bits) into the encoding buffer.
Definition encoding.h:430
void put_prefixed_array_32(unsigned char c, const bm::word_t *w, unsigned count) BMNOEXCEPT
Encode 8-bit prefix + an array.
Definition encoding.h:392
Elias Gamma decoder.
Definition bmgamma.h:43
void stop()
Stop decoding sequence.
Definition encoding.h:360
void start()
Start encoding sequence.
Definition encoding.h:355
T operator()(void)
Decode word.
Definition encoding.h:365
gamma_decoder(TBitIO &bin)
Definition encoding.h:350
Functor for Elias Gamma encoding.
Definition encoding.h:327
gamma_encoder(TBitIO &bout)
Definition encoding.h:329
void operator()(T value)
Encode word.
Definition encoding.h:333
bool sse2_or_arr_unal(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end)
OR array elements against another array (unaligned) dst |= *src.
Definition bmsse_util.h:342
unsigned sse2_and_arr_unal(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end)
AND array elements against another array (unaligned) dst &= *src.
Definition bmsse_util.h:175
decoder decoder_big_endian
Class for decoding data from memory buffer.
Definition encoding.h:140
Definition bm.h:77
unsigned int word_t
Definition bmconst.h:38
unsigned bit_scan_reverse32(unsigned value) BMNOEXCEPT
Definition bmutil.h:301
const unsigned set_word_shift
Definition bmconst.h:71
unsigned long long int id64_t
Definition bmconst.h:34
unsigned short gap_word_t
Definition bmconst.h:77
T bit_scan_fwd(T v) BMNOEXCEPT
Definition bmutil.h:294
const unsigned set_word_mask
Definition bmconst.h:72
unsigned short short_t
Definition bmconst.h:39
Structure keeps all-left/right ON bits masks.
Definition bmconst.h:343