1#ifndef BMSPARSEVEC_ALGO__H__INCLUDED__
2#define BMSPARSEVEC_ALGO__H__INCLUDED__
24#ifndef BM__H__INCLUDED__
27# error missing include (bm.h or bm64.h)
38# pragma warning( disable: 4146 )
66 unsigned sv_plains = svect.plains();
68 BM_ASSERT(sv_plains > high_bit && high_bit > 0);
70 typename SV::bvector_type bv_acc;
74 for (i = high_bit+1; i < sv_plains; ++i)
76 typename SV::bvector_type* bv_plain = svect.plain(i);
79 bv_acc.bit_or(*bv_plain);
85 for (i = high_bit;
true; --i)
87 typename SV::bvector_type* bv_plain = svect.get_plain(i);
88 bv_plain->bit_or(bv_acc);
107 if (low_bit == 0)
return;
110 unsigned sv_plains = svect.plains();
111 typename SV::bvector_type bv_acc1;
115 for (i = low_bit+1; i < sv_plains; ++i)
117 typename SV::bvector_type* bv_plain = svect.plain(i);
119 bv_acc1.bit_or(*bv_plain);
123 typename SV::bvector_type bv_acc2;
124 typename SV::bvector_type* bv_low_plain = svect.get_plain(low_bit);
126 for (i = low_bit-1;
true; --i)
128 typename SV::bvector_type* bv_plain = svect.plain(i);
131 bv_acc2.bit_or(*bv_plain);
144 bv_acc1.bit_xor(bv_acc2);
145 bv_low_plain->bit_or(bv_acc1);
172 typename SV::size_type& midx,
180 unsigned plains1 = sv1.plains();
193 typename SV::bvector_type_const_ptr bv_null1 = sv1.get_null_bvector();
194 typename SV::bvector_type_const_ptr bv_null2 = sv2.get_null_bvector();
195 if (bv_null1 && bv_null2)
197 bool f = bv_null1->find_first_mismatch(*bv_null2, midx, mismatch);
198 if (f && (midx < mismatch))
200 found = f; mismatch = midx;
208 typename SV::bvector_type bv_tmp;
209 bv_tmp.resize(sv2.size());
213 bool f = bv_null1->find_first_mismatch(bv_tmp, midx, mismatch);
214 if (f && (midx < mismatch))
216 found = f; mismatch = midx;
221 typename SV::bvector_type bv_tmp;
222 bv_tmp.resize(sv1.size());
225 bool f = bv_null2->find_first_mismatch(bv_tmp, midx, mismatch);
226 if (f && (midx < mismatch))
228 found = f; mismatch = midx;
235 for (
unsigned i = 0; mismatch & (i < plains1); ++i)
237 typename SV::bvector_type_const_ptr bv1 = sv1.get_plain(i);
238 typename SV::bvector_type_const_ptr bv2 = sv2.get_plain(i);
243 bool f = bv2->find(midx);
244 if (f && (midx < mismatch))
246 found = f; sv_idx = 2; mismatch = midx;
253 bool f = bv1->find(midx);
254 if (f && (midx < mismatch))
256 found = f; sv_idx = 1; mismatch = midx;
262 bool f = bv1->find_first_mismatch(*bv2, midx, mismatch);
263 if (f && (midx < mismatch))
265 found = f; mismatch = midx;
268 sv_idx = (bv1->test(mismatch)) ? 1 : 2;
285 found = sv1.find_rank(midx + 1, mismatch);
288 found = sv2.find_rank(midx + 1, mismatch);
300 typename SV::bvector_type_const_ptr bv_null1 = sv1.get_null_bvector();
301 typename SV::bvector_type_const_ptr bv_null2 = sv2.get_null_bvector();
302 found = bv_null1->find_first_mismatch(*bv_null2, mismatch);
329template<
typename SV1,
typename SV2>
337 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
339 allocator_pool_type pool;
355 unsigned plains = sv1.plains();
356 if (plains < sv2.plains())
357 plains = sv2.plains();
359 for (
unsigned i = 0; i < plains; ++i)
361 typename SV1::bvector_type_const_ptr bv1 = sv1.get_plain(i);
362 typename SV2::bvector_type_const_ptr bv2 = sv2.get_plain(i);
384 bv_xor.
bit_xor(*bv1, *bv2, SV1::bvector_type::opt_none);
391 typename SV1::size_type sz1 = sv1.size();
392 typename SV2::size_type sz2 = sv2.size();
402 bv.set_range(sz1, sz2-1);
408 typename SV1::bvector_type_const_ptr bv_null1 = sv1.get_null_bvector();
409 typename SV2::bvector_type_const_ptr bv_null2 = sv2.get_null_bvector();
416 if (bv_null1 && bv_null2)
434 if (bv_null1 && bv_null2)
451 bv_null.
resize(sv1.size());
456 bv_null.
resize(sv2.size());
502 void bind(
const SV& sv,
bool sorted);
630 template<typename It>
641 bool any_zero =
false;
642 for (; start < end; ++start)
645 any_zero |= (v == 0);
662 typename SV::value_type value,
663 typename SV::bvector_type& bv_out);
669 void correct_nulls(
const SV& sv,
typename SV::bvector_type& bv_out);
681 typename SV::value_type value,
682 typename SV::bvector_type& bv_out,
683 typename SV::size_type search_limit = 0);
687 typename SV::value_type value,
692 const typename SV::value_type* str,
699 typename SV::value_type value);
703 const typename SV::value_type* str,
704 unsigned octet_start);
707 void decompress(
const SV& sv,
typename SV::bvector_type& bv_out);
733 typedef bm::heap_matrix<
typename SV::value_type,
754 value_type remap_value_vect_[SV::max_vector_size];
756 bm::id64_t vector_plain_masks_[SV::max_vector_size];
835 remap(bv_in, sv_brel, bv_out);
848 void attach_sv(
const SV* sv_brel,
bool compute_stats =
false);
857 template<
unsigned BSIZE>
894: sv_ptr_(0), gb_(0), have_stats_(false)
899 SV::throw_bad_alloc();
925 if (sv_brel->empty() || !compute_stats)
927 const bvector_type* bv_non_null = sv_brel->get_null_bvector();
951 const bvector_type* bv_non_null = sv_brel.get_null_bvector();
954 if (!bv_non_null->test(id_from))
957 size_type idx = sv_brel.translate_address(id_from);
958 id_to = sv_brel.get(idx);
976 remap(bv_in, bv_out);
989 if (sv_ptr_ == 0 || sv_ptr_->empty())
1002 const bvector_type * bv_non_null = sv_ptr_->get_null_bvector();
1006 bv_product_ = bv_in;
1007 bv_product_.bit_and(*bv_non_null);
1008 enum_bv = &bv_product_;
1019 bv_product_ = bv_in;
1021 bv_product_.bit_sub(bv_zero_);
1027 bv_out.set_bit_no_check(0);
1029 enum_bv = &bv_product_;
1036 buf_cnt = nb_old = 0;
1039 for (; en.
valid(); ++en)
1041 typename SV::size_type idx = *en;
1042 idx = sv_ptr_->translate_address(idx);
1049 sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt,
BM_SORTED_UNIFORM);
1050 bv_out.set(&gb_->buffer_[0], buf_cnt,
BM_SORTED);
1054 gb_->gather_idx_[buf_cnt++] = idx;
1058 gb_->gather_idx_[buf_cnt++] = idx;
1061 if (buf_cnt == sv_g_size)
1063 sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt,
BM_SORTED_UNIFORM);
1070 sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt,
BM_SORTED_UNIFORM);
1078template<
typename SV>
1083 if (sv_brel.empty())
1088 typename SV::const_iterator it = sv_brel.begin();
1089 for (; it.valid(); ++it)
1091 typename SV::value_type t_id = *it;
1093 if (bv_in.test(idx))
1095 bv_out.set_bit_no_check(t_id);
1105template<
typename SV>
1112 effective_str_max_ = 0;
1117template<
typename SV>
1126 effective_str_max_ = sv.effective_vector_max();
1128 block0_elements_cache_.resize(total_nb, effective_str_max_+1);
1129 block0_elements_cache_.set_zero();
1131 block3_elements_cache_.resize(total_nb * 3, effective_str_max_+1);
1132 block3_elements_cache_.set_zero();
1138 value_type* s0 = block0_elements_cache_.row(nb);
1139 sv.get(i, s0,
size_type(block0_elements_cache_.cols()));
1143 value_type* s1 = block3_elements_cache_.row(nb * 3 + k);
1145 sv.get(idx, s1,
size_type(block3_elements_cache_.cols()));
1151 for (
unsigned i = 0; i < SV::max_vector_size; ++i)
1153 vector_plain_masks_[i] = sv.get_plains_mask(i);
1160template<
typename SV>
1164 effective_str_max_ = 0;
1169template<
typename SV>
1171 typename SV::bvector_type& bv_out)
1178 find_nonzero(sv, bv_out);
1179 if (sv.is_compressed())
1182 bv_out.set_range(sv.effective_size(),
bm::id_max - 1,
false);
1183 decompress(sv, bv_out);
1189 correct_nulls(sv, bv_out);
1194template<
typename SV>
1210 bv_out.set_range(sv.size(),
bm::id_max - 1,
false);
1216template<
typename SV>
1218 typename SV::bvector_type& bv_out)
1222 bv_out.bit_and(*bv_null);
1227template<
typename SV>
1229 typename SV::value_type value,
1230 typename SV::bvector_type& bv_out,
1231 typename SV::size_type search_limit)
1238 find_zero(sv, bv_out);
1239 return bv_out.any();
1243 bool found = prepare_and_sub_aggregator(sv, value);
1250 bool any = (search_limit == 1);
1251 found = agg_.combine_and_sub(bv_out, any);
1258template<
typename SV>
1260 typename SV::value_type value,
1271 bool found = prepare_and_sub_aggregator(sv, value);
1274 found = agg_.find_first_and_sub(idx);
1282template<
typename SV>
1284 const typename SV::value_type* str,
1296 unsigned common_prefix_len = 0;
1300 agg_.set_range_hint(mask_from_, mask_to_);
1301 common_prefix_len = sv.common_prefix_length(mask_from_, mask_to_);
1306 str = remap_value_vect_;
1310 if (sv.is_remap() && str != remap_value_vect_)
1312 bool r = sv.remap_tosv(remap_value_vect_, SV::max_vector_size, str);
1315 str = remap_value_vect_;
1319 bool found = prepare_and_sub_aggregator(sv, str, common_prefix_len);
1323 found = agg_.find_first_and_sub(idx);
1330template<
typename SV>
1332 const typename SV::value_type* str,
1333 unsigned octet_start)
1335 unsigned char bits[64];
1338 for (; str[len] != 0; ++len)
1343 for (
int octet_idx = len-1; octet_idx >= 0; --octet_idx)
1345 if (
unsigned(octet_idx) < octet_start)
1348 unsigned value = unsigned(str[octet_idx]) & 0xFF;
1352 if (&sv == bound_sv_)
1353 plains_mask = vector_plain_masks_[octet_idx];
1355 plains_mask = sv.get_plains_mask(
unsigned(octet_idx));
1357 if ((value & plains_mask) != value)
1362 unsigned short bit_count_v =
bm::bitscan(value, bits);
1363 for (
unsigned i = 0; i < bit_count_v; ++i)
1365 unsigned bit_idx = bits[i];
1367 unsigned plain_idx = (unsigned(octet_idx) * 8) + bit_idx;
1372 unsigned vinv = unsigned(value);
1383 vinv &= unsigned(plains_mask);
1384 for (
unsigned octet_plain = (
unsigned(octet_idx) * 8); vinv; vinv &= vinv-1)
1388 unsigned plain_idx = octet_plain + bit_idx;
1397 unsigned plain_idx = unsigned(len * 8) + 1;
1398 typename SV::size_type plains;
1399 if (&sv == bound_sv_)
1400 plains = effective_str_max_ * unsigned(
sizeof(
value_type)) * 8;
1402 plains = sv.plains();
1403 for (; plain_idx < plains; ++plain_idx)
1414template<
typename SV>
1416 typename SV::value_type value)
1418 unsigned char bits[
sizeof(value) * 8];
1419 unsigned short bit_count_v =
bm::bitscan(value, bits);
1426 for (
unsigned i = bit_count_v; i > 0; --i)
1428 unsigned bit_idx = bits[i-1];
1437 unsigned sv_plains = sv.effective_plains();
1438 for (
unsigned i = 0; i < sv_plains; ++i)
1442 if (bv && !(value & mask))
1451template<
typename SV>
1453 typename SV::value_type value,
1454 typename SV::bvector_type& bv_out)
1461 find_zero(sv, bv_out);
1465 unsigned char bits[
sizeof(value) * 8];
1466 unsigned short bit_count_v =
bm::bitscan(value, bits);
1471 const bvector_type* bv_plain = sv.get_plain(bits[--bit_count_v]);
1480 for (
unsigned i = 0; i < bit_count_v; ++i)
1484 bv_out &= *bv_plain;
1493 unsigned sv_plains = sv.effective_plains();
1494 for (
unsigned i = 0; (i < sv_plains) && value; ++i)
1497 if (bv_plain && !(value & (
value_type(1) << i)))
1498 bv_out -= *bv_plain;
1504template<
typename SV>
1506 typename SV::size_type& pos)
1509 return this->find_eq_str(*bound_sv_, str, pos);
1514template<
typename SV>
1516 const typename SV::value_type* str,
1517 typename SV::size_type& pos)
1524 bool remaped =
false;
1527 if (sv.is_remap() && str != remap_value_vect_)
1529 remaped = sv.remap_tosv(remap_value_vect_, SV::max_vector_size, str);
1532 str = remap_value_vect_;
1537 found = find_first_eq(sv, str, found_pos, remaped);
1543 if (sv.is_compressed())
1544 found = sv.find_rank(found_pos + 1, pos);
1551 typename SV::bvector_type bv_zero;
1552 find_zero(sv, bv_zero);
1553 found = bv_zero.find(pos);
1560template<
typename SV>
1562 const typename SV::value_type* str,
1563 typename SV::size_type& pos)
1571 bool remaped =
false;
1575 if (sv.is_remap() && str != remap_value_vect_)
1577 remaped = sv.remap_tosv(
1578 remap_value_vect_, SV::max_vector_size, str);
1584 reset_search_range();
1589 l = 0; r = sv.size()-1;
1596 if (dist < min_distance_cutoff)
1609 int cmp = this->compare_str(sv, mid, str);
1627 int cmp = this->compare_str(sv, mid, str);
1632 cmp = this->compare_str(sv, mid, str);
1637 cmp = this->compare_str(sv, mid, str);
1655 set_search_range(l, r);
1659 typename SV::size_type mid = dist/2+l;
1671 int cmp = this->compare_str(sv, mid, str);
1676 set_search_range(l, mid);
1686 found = find_first_eq(sv, str, found_pos, remaped);
1692 if (sv.is_compressed())
1693 found = sv.find_rank(found_pos + 1, pos);
1696 reset_search_range();
1701 typename SV::bvector_type bv_zero;
1702 find_zero(sv, bv_zero);
1703 found = bv_zero.find(pos);
1710template<
typename SV>
1712 typename SV::size_type& pos)
1715 return bfind_eq_str(*bound_sv_, str, pos);
1720template<
typename SV>
1722 const typename SV::value_type val,
1723 typename SV::size_type& pos)
1735 cmp = this->compare(sv, l, val);
1746 cmp = this->compare(sv, r, val);
1754 cmp = this->compare(sv, r, val);
1768 if (dist < linear_cutoff1)
1772 cmp = this->compare(sv, l, val);
1789 cmp = this->compare(sv, mid, val);
1796 cmp = this->compare(sv, i, val);
1810 if (dist < linear_cutoff2)
1812 typename SV::const_iterator it(&sv, l);
1813 for (; it.valid(); ++it, ++l)
1815 typename SV::value_type sv_value = it.value();
1816 if (sv_value == val)
1840template<
typename SV>
1843 const typename SV::value_type* str,
1844 typename SV::size_type& pos)
1857 cmp = this->compare_str(sv, l, str);
1868 cmp = this->compare_str(sv, r, str);
1876 cmp = this->compare_str(sv, r, str);
1890 if (dist < linear_cutoff1)
1894 cmp = this->compare_str(sv, l, str);
1910 cmp = this->compare_str(sv, mid, str);
1917 cmp = this->compare_str(sv, i, str);
1931 if (dist < linear_cutoff2)
1935 dist = sv.decode(hmatr_, l, dist+1);
1936 for (
unsigned i = 0; i < dist; ++i, ++l)
1938 const typename SV::value_type* hm_str = hmatr_.row(i);
1939 cmp = ::strcmp(hm_str, str);
1951 cmp = this->compare_str(sv, l, str);
1970template<
typename SV>
1975 if (bound_sv_ == &sv)
1981 value_type* s0 = block0_elements_cache_.row(nb);
1984 sv.get(idx, s0,
size_type(block0_elements_cache_.cols()));
1987 for (
unsigned i = 0; i < block0_elements_cache_.cols(); ++i)
1989 char octet = str[i];
char value = s0[i];
1990 res = (value > octet) - (value < octet);
2002 value_type* s1 = block3_elements_cache_.row(nb * 3 + k);
2004 for (
unsigned i = 0; i < block3_elements_cache_.cols(); ++i)
2006 char octet = str[i];
char value = s1[i];
2007 res = (value > octet) - (value < octet);
2016 return sv.compare(idx, str);
2021template<
typename SV>
2027 return sv.compare(idx, val);
2032template<
typename SV>
2034 typename SV::value_type value,
2035 typename SV::bvector_type& bv_out)
2044 find_zero(sv, bv_out);
2048 find_eq_with_nulls(sv, value, bv_out, 0);
2050 decompress(sv, bv_out);
2051 correct_nulls(sv, bv_out);
2056template<
typename SV>
2058 typename SV::value_type value,
2059 typename SV::size_type& pos)
2064 find_eq(sv, value, bv_zero);
2065 bool found = bv_zero.find(pos);
2070 bool found = find_first_eq(sv, value, found_pos);
2076 if (sv.is_compressed())
2077 found = sv.find_rank(found_pos + 1, pos);
2085template<
typename SV>
2087 typename SV::bvector_type& bv_out)
2090 for (
unsigned i = 0; i < sv.plains(); ++i)
2091 agg_.add(sv.get_plain(i));
2092 agg_.combine_or(bv_out);
2098template<
typename SV>
2100 typename SV::bvector_type& bv_out)
2102 if (!sv.is_compressed())
2104 const bvector_type* bv_non_null = sv.get_null_bvector();
2108 rank_compr_.decompress(bv_tmp_, *bv_non_null, bv_out);
2109 bv_out.swap(bv_tmp_);
2114template<
typename SV>
2125template<
typename SV>
Algorithms for fast aggregation of N bvectors.
Algorithms for bvector<> (main include)
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
pre-processor un-defines to avoid global space pollution (internal)
Algorithms for fast aggregation of a group of bit-vectors.
Constant iterator designed to enumerate "ON" bits.
bool valid() const BMNOEXCEPT
Checks if iterator is still valid.
void assign_if_not_set(allocator_pool_type &pool, bvector< Alloc > &bv) BMNOEXCEPT
check if vector has no assigned allocator and set one
Bitvector Bit-vector container with runtime compression of bits.
@ opt_none
no optimization
bvector< Alloc > & invert()
Invert/NEG all bits It should be noted, invert is affected by size() if size is set - it only inverts...
void resize(size_type new_size)
Change size of the bvector.
bm::bvector< Alloc > & bit_or(const bm::bvector< Alloc > &bv1, const bm::bvector< Alloc > &bv2, typename bm::bvector< Alloc >::optmode opt_mode)
3-operand OR : this := bv1 OR bv2
allocator_type::allocator_pool_type allocator_pool_type
bm::bvector< Alloc > & bit_xor(const bm::bvector< Alloc > &bv1, const bm::bvector< Alloc > &bv2, typename bm::bvector< Alloc >::optmode opt_mode)
3-operand XOR : this := bv1 XOR bv2
Algorithms for rank compression of bit-vector.
algorithms for sparse_vector scan/search
bm::heap_matrix< typename SV::value_type, linear_cutoff2, SV::sv_octet_plains, allocator_type > matrix_search_buf_type
void find_nonzero(const SV &sv, typename SV::bvector_type &bv_out)
Find non-zero elements Output vector is computed as a logical OR (join) of all plains.
bool prepare_and_sub_aggregator(const SV &sv, typename SV::value_type value)
Prepare aggregator for AND-SUB (EQ) search.
bool find_eq_with_nulls(const SV &sv, typename SV::value_type value, typename SV::bvector_type &bv_out, typename SV::size_type search_limit=0)
find value (may include NULL indexes)
allocator_type::allocator_pool_type allocator_pool_type
bool find_first_eq(const SV &sv, typename SV::value_type value, size_type &idx)
find first value (may include NULL indexes)
bvector_type::allocator_type allocator_type
SV::bvector_type bvector_type
const bvector_type * bvector_type_const_ptr
SV::value_type value_type
bool lower_bound_str(const SV &sv, const typename SV::value_type *str, typename SV::size_type &pos)
lower bound search for an array position
void find_eq_with_nulls_horizontal(const SV &sv, typename SV::value_type value, typename SV::bvector_type &bv_out)
For testing purposes only.
sparse_vector_scanner(const sparse_vector_scanner &)=delete
int compare(const SV &sv, size_type idx, const value_type val) BMNOEXCEPT
compare sv[idx] with input value
void reset_binding() BMNOEXCEPT
reset sparse vector binding
void invert(const SV &sv, typename SV::bvector_type &bv_out)
invert search result ("EQ" to "not EQ")
bool lower_bound(const SV &sv, const typename SV::value_type val, typename SV::size_type &pos)
lower bound search for an array position
void operator=(const sparse_vector_scanner &)=delete
void find_zero(const SV &sv, typename SV::bvector_type &bv_out)
find all sparse vector elements EQ to 0
void decompress(const SV &sv, typename SV::bvector_type &bv_out)
Rank-Select decompression for RSC vectors.
void find_eq(const SV &sv, typename SV::value_type value, typename SV::bvector_type &bv_out)
find all sparse vector elements EQ to search value
bm::dynamic_heap_matrix< value_type, allocator_type > heap_matrix_type
void bind(const SV &sv, bool sorted)
bind sparse vector for all searches
bvector_type * bvector_type_ptr
void reset_search_range()
reset (disable) search range
bool bfind_eq_str(const SV &sv, const typename SV::value_type *str, typename SV::size_type &pos)
binary find first sparse vector element (string) Sparse vector must be sorted.
int compare_str(const SV &sv, size_type idx, const value_type *str)
compare sv[idx] with input str
void set_search_range(size_type from, size_type to)
set search boundaries (hint for the aggregator)
void correct_nulls(const SV &sv, typename SV::bvector_type &bv_out)
Exclude possible NULL values from the result vector.
bool find_eq_str(const SV &sv, const typename SV::value_type *str, typename SV::size_type &pos)
find first sparse vector element (string)
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w) BMNOEXCEPT
null_support
NULL-able value support.
@ BM_SORTED
input set is sorted (ascending order)
@ BM_SORTED_UNIFORM
sorted and in one block (internal!)
@ use_null
support "non-assigned" or "NULL" logic
@ no_null
do not support NULL values
bool sparse_vector_find_first_mismatch(const SV &sv1, const SV &sv2, typename SV::size_type &midx, bm::null_support null_proc=bm::use_null)
Find first mismatch (element which is different) between two sparse vectors (uses linear scan in bit-...
void dynamic_range_clip_low(SV &svect, unsigned low_bit)
Clip dynamic range for signal lower than specified (boost)
void dynamic_range_clip_high(SV &svect, unsigned high_bit)
Clip dynamic range for signal higher than specified.
void sparse_vector_find_mismatch(typename SV1::bvector_type &bv, const SV1 &sv1, const SV2 &sv2, bm::null_support null_proc)
Find mismatch vector, indicating positions of mismatch between two sparse vectors (uses linear scan i...
void xor_swap(W &x, W &y) BMNOEXCEPT
XOR swap two scalar variables.
const unsigned set_block_mask
unsigned short bitscan(V w, B *bits) BMNOEXCEPT
const unsigned sub_block3_size
unsigned long long int id64_t
const unsigned gap_max_bits
const unsigned set_block_shift
ad-hoc conditional expressions