BitMagic-C++
xsample08.cpp
Go to the documentation of this file.
1/*
2Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15
16For more information please visit: http://bitmagic.io
17*/
18
19/** \example xsample08.cpp
20
21 Example on intervals and how to use it for layout calculation.
22 As a use case this example uses genomics visualization for
23 features mapped into genomic coordinates.
24
25 It is also illustartes vector model using coordinate ranges
26 or feature vectors. Various properties of the initial model acn be
27 dropped (sliced) to improve memory efficiency, better storage
28 or network transfer.
29
30 This example does NOT do serialization of models (which is possible)
31 for the clarity of the sample code.
32
33 \sa bm::bvector::set_range
34 \sa bm::bvector::any_range
35 \sa bm::bvector::copy_range
36 \sa bm::interval_enumerator
37 \sa bm::rsc_sparse_vector
38 \sa bm::rsc_sparse_vector::copy_range
39 \sa bm::find_interval_start
40 \sa bm::find_interval_end
41
42 \sa sample22.cpp
43 \sa sample23.cpp
44 \sa bvintervals
45*/
46
47/*! \file sample23.cpp
48 \brief Example: interval_enumerator<> - interator class for intervals
49*/
50
51#include <iostream>
52#include <utility>
53#include <vector>
54#include <memory>
55#include <cassert>
56
57#include "bm.h"
58#include "bmintervals.h"
59#include "bmsparsevec_compr.h"
60
61using namespace std;
62
64typedef std::vector<std::unique_ptr<bm::bvector<> > > layout_vector_type;
65
68typedef std::vector<std::unique_ptr<rsc_vector_u8> > starnds_vector_type;
69
70
71// -------------------------------------------------------------------
72
73/// Data frame object, sued to buid succinct data model
74///
75///
77{
78 /// Optimize memory layoput, build index for faster read access
79 ///
80 void optimize();
81
82 void add_layout(size_t plane, bm::bvector<>* bv);
83 void add_strand(size_t plane, rsc_vector_u8* strand);
84
85 layout_vector_type layout_v; ///< layout vector
86 starnds_vector_type strand_v; ///< strand planes vector
87};
88
90{
91 BM_DECLARE_TEMP_BLOCK(tb); // explicit temp for faster optimization
92 for (size_t i = 0; i < layout_v.size(); ++i)
93 {
94 auto bv = layout_v[i].get();
95 if (bv)
96 bv->optimize(tb); // memory optimization
97 } // for i
98 for (size_t i = 0; i < strand_v.size(); ++i)
99 {
100 auto strand_plane = strand_v[i].get();
101 if (strand_plane)
102 {
103 strand_plane->optimize(tb);
104 strand_plane->sync(); // build rank-select idx (faster read access)
105 }
106 } // for i
107}
108
110{
111 unique_ptr<bm::bvector<> > ap(bv);
112 if (layout_v.size() == plane) // push back requested
113 {
114 layout_v.emplace_back(move(ap));
115 }
116 else
117 {
118 while (layout_v.size() < plane) // this is crude resize() but it would do
119 layout_v.emplace_back(new bm::bvector<>(bm::BM_GAP));
120 layout_v[plane] = std::move(ap);
121 }
122}
123
124void data_model::add_strand(size_t plane, rsc_vector_u8* strand)
125{
126 unique_ptr<rsc_vector_u8 > ap(strand);
127 if (strand_v.size() == plane) // push back requested
128 {
129 strand_v.emplace_back(move(ap));
130 }
131 else
132 {
133 while (strand_v.size() < plane) // this is crude resize() but it would do
134 strand_v.emplace_back(new rsc_vector_u8());
135 strand_v[plane] = std::move(ap);
136 }
137}
138
139
140// -------------------------------------------------------------------
141
142void set_feature_strand(data_model& dm, size_t plane,
144 unsigned char strand)
145{
146 if (!strand)
147 return;
148 while (dm.strand_v.size() <= plane) // add planes
149 {
150 std::unique_ptr<rsc_vector_u8> p2(new rsc_vector_u8());
151 dm.strand_v.emplace_back(move(p2));
152 }
153
154 rsc_vector_u8* strand_plane = dm.strand_v[plane].get();
155 if (!strand_plane)
156 {
157 strand_plane = new rsc_vector_u8();
158 dm.strand_v[plane] = unique_ptr<rsc_vector_u8 >(strand_plane);
159 }
160 assert(strand_plane->is_null(pos));
161 strand_plane->set(pos, strand);
162}
163
164/// Register new object in the data model: [start..end] + strand
165///
167 unsigned start, unsigned end,
168 unsigned char strand)
169{
170 assert(start <= end);
171
172 bm::bvector<>* bv; // layout plane vector
173
174 for (size_t i = 0; i < dm.layout_v.size(); ++i)
175 {
176 bv = dm.layout_v[i].get();
177 if (!bv)
178 {
179 bv = new bm::bvector<>(bm::BM_GAP);
180 dm.layout_v[i] = unique_ptr<bm::bvector<> >(bv);
181 // bv just created (empty) no need to do range check
182 bv->set_range(start, end);
183 set_feature_strand(dm, i, start, strand);
184
185 return;
186 }
187 if (!bv->any_range(start, end)) // check if layout space is not used
188 {
189 bv->set_range(start, end); // add [start..end] coordinates
190 // set strand at the start of feature
191 set_feature_strand(dm, i, start, strand);
192
193 return;
194 }
195 } // for i
196
197 // not found, make new plane
198 //
199 bv = new bm::bvector<>(bm::BM_GAP);
200 dm.layout_v.emplace_back(std::unique_ptr<bm::bvector<> >(bv));
201 bv->set_range(start, end);
202 set_feature_strand(dm, dm.layout_v.size()-1, start, strand);
203}
204
205/// Data model splicer
206///
207
208void splice_model(data_model& dm_target, const data_model& dm,
211 bool copy_strands)
212{
213 const bm::bvector<>* bv; // layout
214 const rsc_vector_u8* strand_plane;
215
216 size_t t_plane = 0;
217 for (size_t i = 0; i < dm.layout_v.size(); ++i)
218 {
219 bv = dm.layout_v[i].get();
220 if (bv)
221 {
222 bm::bvector<>::size_type start_pos;
224
225 bool found = bm::find_interval_start(*bv, start, start_pos);
226 if (!found)
227 start_pos = start;
228 found = bm::find_interval_end(*bv, end, end_pos);
229 if (!found)
230 end_pos = end;
231
232 unique_ptr<bm::bvector<>> bv_ptr(new bm::bvector<>(bm::BM_GAP));
233 bv_ptr->copy_range(*bv, start_pos, end_pos);
234 if (bv_ptr->any()) // copy range may have ended as empty
235 {
236 dm_target.add_layout(t_plane, bv_ptr.release());
237
238 // slice the strands plane (if requested)
239 //
240 if (copy_strands)
241 {
242 if (i < dm.strand_v.size())
243 {
244 strand_plane = dm.strand_v[i].get();
245 if (strand_plane)
246 {
247 unique_ptr<rsc_vector_u8> strand_ptr(new rsc_vector_u8());
248 strand_ptr->copy_range(*strand_plane, start_pos, end_pos);
249 dm_target.add_strand(t_plane, strand_ptr.release());
250 }
251 }
252 }
253 ++t_plane;
254 } // if any()
255
256 } // if bv
257 } // for i
258
259}
260
261
262/// This is ASCII art "renderer" for the data model.
263/// illustrates how to manipulate succinct data model to create graphics
264///
265void print_model(const data_model& dm)
266{
267 const bm::bvector<>* bv; // layout
268 const rsc_vector_u8* strand_plane;
269
270 // Sequence on top is for purely decorative purposes
271 cout <<
272 "-------------------------------------------------------------------------"
273 << endl <<
274 "ATGTTAGCCCGCGCATATTATATATGTAGCGTATTAAGCGDGGAGATTACCCTTGCATTAGGTTANNNNNNNN"
275 << endl <<
276 "-------------------------------------------------------------------------"
277 << endl;
278
279 for (size_t i = 0; i < dm.layout_v.size(); ++i)
280 {
281 bv = dm.layout_v[i].get();
282 if (bv)
283 {
284 strand_plane = i < dm.strand_v.size() ? dm.strand_v[i].get() : nullptr;
286 if (ien.valid())
287 {
288 bm::bvector<>::size_type spaces = 0;
289 do
290 {
291 auto st = ien.start(); auto end = ien.end();
292 char ch_strand = '?';
293 if (strand_plane)
294 {
295 auto strand = strand_plane->get(st);
296 switch (strand)
297 {
298 case 0: ch_strand = '>'; break; // positive
299 case 1: ch_strand = '<'; break; // negative
300 default: break; // unknown strand
301 }
302 }
303 for (; spaces < st; ++spaces)
304 cout << " ";
305 for (bool first = true; st <= end; ++st, first = false)
306 {
307 if (st == end)
308 cout << ch_strand;
309 else
310 cout << (first ? ch_strand : '.');
311 } // for
312 spaces = end+1;
313 } while (ien.advance());
314 cout << endl;
315 }
316 }
317 } // for
318}
319
321
322
323
324int main(void)
325{
326 try
327 {
328 data_model dm;
329
330 // build the data model using succinct vectors
331 //
332 add_object(dm, 0, 0, negative);
333 add_object(dm, 5, 10, positive);
334 add_object(dm, 4, 70, negative);
335 add_object(dm, 15, 20, negative);
336 add_object(dm, 20, 30, positive);
337 add_object(dm, 16, 21, unknown);
338
339 dm.optimize(); // run compression and build access index
340
341 // View the model using toy ASCII art renderer
342 //
343 print_model(dm);
344
345 // create a model splice for [5..10] range
346 // plus drop strand property (renderer will assume unknown)
347 //
348 {
349 data_model dm_splice;
350
351 splice_model(dm_splice, dm, 5, 10, false);
352 dm_splice.optimize();
353
354 cout << endl;
355 print_model(dm_splice);
356 }
357
358 // create a model splice for [5..10] range
359 // now WITH strand property
360 //
361 {
362 data_model dm_splice;
363
364 splice_model(dm_splice, dm, 5, 10, true);
365 dm_splice.optimize();
366
367 cout << endl;
368 print_model(dm_splice);
369 }
370 }
371 catch(std::exception& ex)
372 {
373 std::cerr << ex.what() << std::endl;
374 return 1;
375 }
376
377 return 0;
378}
379
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
#define BM_DECLARE_TEMP_BLOCK(x)
Definition bm.h:47
Algorithms for bit ranges and intervals.
Compressed sparse container rsc_sparse_vector<> for integer types.
bool any_range(size_type left, size_type right) const BMNOEXCEPT
Returns true if any bits in the range are 1s (non-empty interval) Function uses closed interval [left...
Definition bm.h:2865
bvector< Alloc > & set_range(size_type left, size_type right, bool value=true)
Sets all bits in the specified closed interval [left,right] Interval must be inside the bvector's siz...
Definition bm.h:2158
forward iterator class to traverse bit-vector as ranges
Definition bmintervals.h:53
size_type end() const BMNOEXCEPT
Return interval end/right as bit-vector coordinate 011110 [left..right].
bool valid() const BMNOEXCEPT
Returns true if enumerator is valid (false if traversal is done)
size_type start() const BMNOEXCEPT
Return interval start/left as bit-vector coordinate 011110 [left..right].
Rank-Select compressed sparse vector.
void set(size_type idx, value_type v)
set specified element with bounds checking and automatic resize
value_type get(size_type idx) const BMNOEXCEPT
get specified element without bounds checking
bool is_null(size_type idx) const BMNOEXCEPT
test if specified element is NULL
sparse vector with runtime compression using bit transposition method
Definition bmsparsevec.h:82
@ BM_GAP
GAP compression is ON.
Definition bmconst.h:144
bool find_interval_end(const BV &bv, typename BV::size_type from, typename BV::size_type &pos) BMNOEXCEPT
Reverse find index of first 1 bit gap (01110) starting from position Reverse scan for the first 1 in ...
bool find_interval_start(const BV &bv, typename BV::size_type from, typename BV::size_type &pos) BMNOEXCEPT
Reverse find index of first 1 bit gap (01110) starting from position Reverse scan for the first 1 in ...
Data frame object, sued to buid succinct data model.
Definition xsample08.cpp:77
void optimize()
Optimize memory layoput, build index for faster read access.
Definition xsample08.cpp:89
void add_strand(size_t plane, rsc_vector_u8 *strand)
void add_layout(size_t plane, bm::bvector<> *bv)
layout_vector_type layout_v
layout vector
Definition xsample08.cpp:85
starnds_vector_type strand_v
strand planes vector
Definition xsample08.cpp:86
bm::sparse_vector< unsigned char, bm::bvector<> > sparse_vector_u8
Definition xsample08.cpp:66
bm::interval_enumerator< bm::bvector<> > interval_enumerator_type
Definition xsample08.cpp:63
std::vector< std::unique_ptr< rsc_vector_u8 > > starnds_vector_type
Definition xsample08.cpp:68
bm::rsc_sparse_vector< unsigned char, sparse_vector_u8 > rsc_vector_u8
Definition xsample08.cpp:67
Strand
@ unknown
@ negative
@ positive
void splice_model(data_model &dm_target, const data_model &dm, bm::bvector<>::size_type start, bm::bvector<>::size_type end, bool copy_strands)
Data model splicer.
void add_object(data_model &dm, unsigned start, unsigned end, unsigned char strand)
Register new object in the data model: [start..end] + strand.
int main(void)
void set_feature_strand(data_model &dm, size_t plane, bm::bvector<>::size_type pos, unsigned char strand)
std::vector< std::unique_ptr< bm::bvector<> > > layout_vector_type
Definition xsample08.cpp:64
void print_model(const data_model &dm)
This is ASCII art "renderer" for the data model.