BitMagic-C++
xsample04.cpp File Reference

Example: DNA substring search. More...

#include <iostream>
#include <sstream>
#include <chrono>
#include <regex>
#include <time.h>
#include <stdio.h>
#include <stdexcept>
#include <memory>
#include <vector>
#include <map>
#include <utility>
#include <algorithm>
#include <unordered_map>
#include "bm.h"
#include "bmalgo.h"
#include "bmserial.h"
#include "bmaggregator.h"
#include "bmdbg.h"
#include "bmtimer.h"
Include dependency graph for xsample04.cpp:

Go to the source code of this file.

Data Structures

class  DNA_FingerprintScanner
 Utility for keeping all DNA finger print vectors and search using various techniques. More...
 

Typedefs

typedef std::map< std::string, unsigned > freq_map
 
typedef std::vector< std::pair< unsigned, std::string > > dict_vect
 
typedef bm::aggregator< bm::bvector<> > aggregator_type
 
using THitList = vector<unsigned>
 

Functions

static void show_help ()
 
static int parse_args (int argc, char *argv[])
 
static int load_FASTA (const std::string &fname, std::vector< char > &seq_vect)
 
static void generate_kmers (vector< tuple< string, int > > &top_words, vector< tuple< string, int > > &lo_words, const vector< char > &data, size_t N, unsigned word_size)
 generate the most frequent words of specified length from the input sequence
 
static void find_word_2way (vector< char > &data, const char *word, unsigned word_size, THitList &r)
 2-way string matching
 
static void find_words (const vector< char > &data, vector< const char * > words, unsigned word_size, vector< vector< unsigned > > &hits)
 Find all words in one pass (cache coherent algorithm) (variation of 2-way string matching for collection search)
 
static bool hitlist_compare (const THitList &h1, const THitList &h2)
 Check search result match.
 
int main (int argc, char *argv[])
 

Variables

std::string ifa_name
 
bool is_diag = false
 
bool is_timing = false
 
bool is_bench = false
 
bool is_search = false
 
bool h_word_set = true
 
bm::chrono_taker::duration_map_type timing_map
 
static const size_t WORD_SIZE = 28
 

Detailed Description

Example: DNA substring search.

Definition in file xsample04.cpp.

Typedef Documentation

◆ aggregator_type

Examples
xsample04.cpp.

Definition at line 144 of file xsample04.cpp.

◆ dict_vect

typedef std::vector<std::pair<unsigned, std::string> > dict_vect
Examples
xsample04.cpp.

Definition at line 142 of file xsample04.cpp.

◆ freq_map

typedef std::map<std::string, unsigned> freq_map
Examples
xsample04.cpp.

Definition at line 141 of file xsample04.cpp.

◆ THitList

using THitList = vector<unsigned>

Definition at line 362 of file xsample04.cpp.

Function Documentation

◆ find_word_2way()

static void find_word_2way ( vector< char > & data,
const char * word,
unsigned word_size,
THitList & r )
static

2-way string matching

Examples
xsample04.cpp.

Definition at line 421 of file xsample04.cpp.

Referenced by main().

◆ find_words()

static void find_words ( const vector< char > & data,
vector< const char * > words,
unsigned word_size,
vector< vector< unsigned > > & hits )
static

Find all words in one pass (cache coherent algorithm) (variation of 2-way string matching for collection search)

Examples
xsample04.cpp.

Definition at line 451 of file xsample04.cpp.

Referenced by main().

◆ generate_kmers()

static void generate_kmers ( vector< tuple< string, int > > & top_words,
vector< tuple< string, int > > & lo_words,
const vector< char > & data,
size_t N,
unsigned word_size )
static

generate the most frequent words of specified length from the input sequence

Examples
xsample04.cpp.

Definition at line 367 of file xsample04.cpp.

Referenced by main().

◆ hitlist_compare()

static bool hitlist_compare ( const THitList & h1,
const THitList & h2 )
static

Check search result match.

Examples
xsample04.cpp.

Definition at line 490 of file xsample04.cpp.

Referenced by main().

◆ load_FASTA()

static int load_FASTA ( const std::string & fname,
std::vector< char > & seq_vect )
static
Examples
xsample04.cpp, and xsample04a.cpp.

Definition at line 152 of file xsample04.cpp.

References timing_map.

Referenced by main().

◆ main()

◆ parse_args()

static int parse_args ( int argc,
char * argv[] )
static

Definition at line 80 of file xsample04.cpp.

References h_word_set, ifa_name, is_bench, is_diag, is_search, is_timing, and show_help().

Referenced by main().

◆ show_help()

static void show_help ( )
static

Definition at line 56 of file xsample04.cpp.

Referenced by main(), and parse_args().

Variable Documentation

◆ h_word_set

bool h_word_set = true
Examples
xsample04.cpp.

Definition at line 77 of file xsample04.cpp.

Referenced by main(), and parse_args().

◆ ifa_name

std::string ifa_name
Examples
xsample04.cpp, and xsample04a.cpp.

Definition at line 72 of file xsample04.cpp.

Referenced by main(), and parse_args().

◆ is_bench

bool is_bench = false

Definition at line 75 of file xsample04.cpp.

Referenced by parse_args().

◆ is_diag

bool is_diag = false

Definition at line 73 of file xsample04.cpp.

Referenced by parse_args().

◆ is_search

bool is_search = false
Examples
xsample04.cpp.

Definition at line 76 of file xsample04.cpp.

Referenced by main(), and parse_args().

◆ is_timing

bool is_timing = false

Definition at line 74 of file xsample04.cpp.

Referenced by main(), and parse_args().

◆ timing_map

Definition at line 148 of file xsample04.cpp.

Referenced by load_FASTA(), and main().

◆ WORD_SIZE

const size_t WORD_SIZE = 28
static
Examples
xsample04.cpp.

Definition at line 361 of file xsample04.cpp.

Referenced by main().