BitMagic-C++
xsample03.cpp File Reference

Example: SNP search in human genome. More...

#include <iostream>
#include <sstream>
#include <chrono>
#include <regex>
#include <time.h>
#include <stdio.h>
#include <vector>
#include <map>
#include <utility>
#include "bm.h"
#include "bmalgo.h"
#include "bmserial.h"
#include "bmrandom.h"
#include "bmsparsevec.h"
#include "bmsparsevec_compr.h"
#include "bmsparsevec_algo.h"
#include "bmsparsevec_serial.h"
#include "bmalgo_similarity.h"
#include "bmsparsevec_util.h"
#include "bmdbg.h"
#include "bmtimer.h"
Include dependency graph for xsample03.cpp:

Go to the source code of this file.

Typedefs

typedef bm::sparse_vector< unsigned, bm::bvector<> > sparse_vector_u32
 
typedef bm::rsc_sparse_vector< unsigned, sparse_vector_u32rsc_sparse_vector_u32
 
typedef std::vector< std::pair< unsigned, unsigned > > vector_pairs
 

Functions

static void show_help ()
 
static int parse_args (int argc, char *argv[])
 
static int load_snp_report (const std::string &fname, sparse_vector_u32 &sv)
 
static void generate_random_subset (const sparse_vector_u32 &sv, std::vector< unsigned > &vect, unsigned count)
 
static void build_vector_pairs (const sparse_vector_u32 &sv, vector_pairs &vp)
 
static bool search_vector_pairs (const vector_pairs &vp, unsigned rs_id, unsigned &pos)
 
static void run_benchmark (const sparse_vector_u32 &sv, const rsc_sparse_vector_u32 &csv)
 
int main (int argc, char *argv[])
 

Variables

std::string sv_out_name
 
std::string rsc_out_name
 
std::string sv_in_name
 
std::string rsc_in_name
 
std::string isnp_name
 
bool is_diag = false
 
bool is_timing = false
 
bool is_bench = false
 
bm::chrono_taker::duration_map_type timing_map
 

Detailed Description

Example: SNP search in human genome.

Brief description of used method:

  1. Parse SNP chromosome report and extract information about SNP number and location in the chromosome
  2. Use this information to build bit-transposed sparse_vector<> where vector position matches chromosome position and SNP ids (aka rsid) is kept as a bit-transposed matrix
  3. Build rank-select compressed sparse vector, dropping all NULL columns (this data format is pretty sparse, since number of SNPs is significantly less than number of chromosome bases (1:5 or less) Use memory report to understand memory footprint for each form of storage
  4. Run benchmarks searching for 500 randomly selected SNPs using
    • bm::sparse_vector<>
    • bm::rsc_sparse_vector<>
    • std::vector<pair<unsigned, unsigned> >

This example should be useful for construction of compressed columnar tables with parallel search capabilities.

Definition in file xsample03.cpp.

Typedef Documentation

◆ rsc_sparse_vector_u32

◆ sparse_vector_u32

Definition at line 203 of file xsample03.cpp.

◆ vector_pairs

typedef std::vector<std::pair<unsigned, unsigned> > vector_pairs
Examples
xsample03.cpp.

Definition at line 205 of file xsample03.cpp.

Function Documentation

◆ build_vector_pairs()

static void build_vector_pairs ( const sparse_vector_u32 & sv,
vector_pairs & vp )
static

◆ generate_random_subset()

static void generate_random_subset ( const sparse_vector_u32 & sv,
std::vector< unsigned > & vect,
unsigned count )
static

◆ load_snp_report()

static int load_snp_report ( const std::string & fname,
sparse_vector_u32 & sv )
static

◆ main()

◆ parse_args()

static int parse_args ( int argc,
char * argv[] )
static

◆ run_benchmark()

◆ search_vector_pairs()

static bool search_vector_pairs ( const vector_pairs & vp,
unsigned rs_id,
unsigned & pos )
static
Examples
xsample03.cpp.

Definition at line 329 of file xsample03.cpp.

Referenced by run_benchmark().

◆ show_help()

static void show_help ( )
static
Examples
xsample03.cpp, xsample04.cpp, xsample04a.cpp, and xsample05.cpp.

Definition at line 80 of file xsample03.cpp.

Referenced by main(), and parse_args().

Variable Documentation

◆ is_bench

bool is_bench = false
Examples
xsample03.cpp, xsample04.cpp, and xsample05.cpp.

Definition at line 106 of file xsample03.cpp.

Referenced by main(), and parse_args().

◆ is_diag

bool is_diag = false
Examples
xsample03.cpp, xsample04.cpp, and xsample05.cpp.

Definition at line 104 of file xsample03.cpp.

Referenced by main(), and parse_args().

◆ is_timing

bool is_timing = false
Examples
xsample03.cpp, xsample04.cpp, xsample04a.cpp, and xsample05.cpp.

Definition at line 105 of file xsample03.cpp.

Referenced by main(), and parse_args().

◆ isnp_name

std::string isnp_name
Examples
xsample03.cpp.

Definition at line 103 of file xsample03.cpp.

Referenced by main(), and parse_args().

◆ rsc_in_name

std::string rsc_in_name
Examples
xsample03.cpp.

Definition at line 102 of file xsample03.cpp.

Referenced by main(), and parse_args().

◆ rsc_out_name

std::string rsc_out_name
Examples
xsample03.cpp.

Definition at line 100 of file xsample03.cpp.

Referenced by main(), and parse_args().

◆ sv_in_name

std::string sv_in_name
Examples
xsample03.cpp, and xsample05.cpp.

Definition at line 101 of file xsample03.cpp.

Referenced by main(), and parse_args().

◆ sv_out_name

std::string sv_out_name
Examples
xsample03.cpp, and xsample05.cpp.

Definition at line 99 of file xsample03.cpp.

Referenced by main(), and parse_args().

◆ timing_map

Definition at line 209 of file xsample03.cpp.