Cassiopee 1.0
Suffix indexer and search tool
Loading...
Searching...
No Matches
Cassiopee.h
1#include <iostream>
2#include <fstream>
3#include <list>
4#include <map>
5
6#include "tree/tree.hh"
7
8#include "CassiopeeConfig.h"
9
10#include <boost/archive/text_oarchive.hpp>
11#include <boost/archive/text_iarchive.hpp>
12#include <boost/archive/binary_oarchive.hpp>
13#include <boost/archive/binary_iarchive.hpp>
14// Provide an implementation of serialize for std::list
15#include <boost/serialization/list.hpp>
16
17using namespace std;
18
19
20
21
25class Match {
26public:
27
31 bool operator==(const Match& p) const {
32 return pos == p.pos && in+del == p.in + p.del;
33 }
34
38 int in;
42 int del;
46 int subst;
47
48
49 long pos;
50
51 Match();
52
53
54};
55
65class TreeNode {
66public:
70 char c;
71
72
76 list<long> positions;
77
84
89
90
96 TreeNode(char nc);
97
104 TreeNode(char nc, long pos);
105 TreeNode();
106
107
108private:
109 friend class boost::serialization::access;
110 template<class Archive>
111 void serialize(Archive & ar, const unsigned int /*version*/)
112 {
113 ar & c;
114 ar & next_pos;
115 ar & next_length;
116 ar & positions;
117 }
118
119};
120
121
122
123#ifndef __CASSIOPEE_H_
124#define __CASSIOPEE_H_
125//inline std::ostream& operator<<(std::ostream &strm, const TreeNode &a) {
126// return strm << "TreeNode(" << a.c << ")";
127//}
128#endif
129
130
137public:
138
139
145 CassieIndexer(const char* path);
146
148
152 void save();
153
157 void load();
158
159
163 void graph();
164
170 void graph(int depth);
171
178 string getSuffix(long pos);
179
183 void index();
184
188 tree<TreeNode>* getTree();
189
193 list<Match> matches;
194
199
203 char getCharAtSuffix(long pos);
204
210 void filltree(long pos);
211
216
222
223 long seq_length;
224
229
230private:
231 list<TreeNode> serialized_nodes;
232
233 bool loaded_from_file;
234
235 const char* filename;
236 ifstream seqstream;
237 tree<TreeNode> tr;
238
239 const long MAX_SUFFIX;
240 long suffix_position;
241 char* suffix;
242
246 long graphNode(tree<TreeNode>::iterator node, long counter, ofstream& myfile, int maxdepth);
247
251 char* loadSuffix(long pos);
252
253
257 void reset_suffix();
258
259
260
268 void fillTreeWithSuffix(tree<TreeNode>::iterator sib, long suffix_pos, long pos);
269 void fillTreeWithSuffix(long suffix_pos, long pos);
270
271
272
273};
274
275
280public:
284 static void transform_fasta(const string in, const string out);
285};
286
291public:
299 static bool isequal(char a, char b);
300private:
309 static bool ismatchequal(char a, const char b[], int len);
310 static const char K_MATCH[];
311 static const char M_MATCH[];
312 static const char R_MATCH[];
313 static const char Y_MATCH[];
314 static const char S_MATCH[];
315 static const char W_MATCH[];
316 static const char B_MATCH[];
317 static const char V_MATCH[];
318 static const char H_MATCH[];
319 static const char D_MATCH[];
320 static const char N_MATCH[];
321};
322
323
325{
326 inline bool operator() (const Match* struct1, const Match* struct2)
327 {
328 return (struct1->pos < struct2->pos);
329 }
330};
331
337
338public:
344 CassieSearch(CassieIndexer* index_ref);
345
347
351 void removeDuplicates();
352
353 map<std::string, string> morphisms;
354
359
363 list<Match*> matches;
364
370
374 int nmax;
375
383 int mode;
384
391 void search(string suffix, bool clear);
392
393
400 void search(string suffix);
401
407 void search(string suffixes[]);
408
412 bool isequal(char a,char b);
413
414
423
424 long pattern_length;
425
429 void sort();
430
431private:
432
436 static bool same_match (Match* first, Match* second)
437 { return ( *first == *second ); }
438
439 CassieIndexer* indexer;
440
449 void getMatchesFromNode(tree<TreeNode>::iterator sib, const int nbSubst, const int nbIn, const int nbDel);
450
451
452
464 void searchAtNode(string suffix, const long suffix_pos, const tree<TreeNode>::iterator root, int nbSubst, int nbIn, int nbDel, int nbN);
465
477 void searchAtNode(string suffix, const long suffix_pos, const tree<TreeNode>::iterator root, const tree<TreeNode>::iterator start_node, int nbSubst, int nbIn, int nbDel, int nbN);
478
483 bool searchAtreduction(const string suffix, const tree<TreeNode>::iterator sib, long counter, long tree_reducted_pos, int nbSubst, int nbIn, int nbDel, int nbN);
484
485
486};
Definition Cassiopee.h:290
static bool isequal(char a, char b)
Definition cassiopee.cxx:77
Definition Cassiopee.h:136
void graph()
Definition cassiopee.cxx:574
bool do_reduction
Definition Cassiopee.h:198
tree< TreeNode > * getTree()
Definition cassiopee.cxx:836
long max_index_depth
Definition Cassiopee.h:221
void filltree(long pos)
Definition cassiopee.cxx:840
CassieIndexer(const char *path)
Definition cassiopee.cxx:504
long max_depth
Definition Cassiopee.h:215
void index()
Definition cassiopee.cxx:621
char getCharAtSuffix(long pos)
Definition cassiopee.cxx:525
void save()
Definition cassiopee.cxx:666
bool index_loaded_from_file()
Definition cassiopee.cxx:660
list< Match > matches
Definition Cassiopee.h:193
void load()
Definition cassiopee.cxx:710
string getSuffix(long pos)
Definition cassiopee.cxx:748
Definition Cassiopee.h:336
void sort()
Definition cassiopee.cxx:197
int nmax
Definition Cassiopee.h:374
Match * match_limits
Definition Cassiopee.h:358
bool isequal(char a, char b)
Definition cassiopee.cxx:127
int max_indel
Definition Cassiopee.h:418
void search(string suffix, bool clear)
Definition cassiopee.cxx:216
bool ambiguity
Definition Cassiopee.h:369
void removeDuplicates()
Definition cassiopee.cxx:294
list< Match * > matches
Definition Cassiopee.h:363
int mode
Definition Cassiopee.h:383
CassieSearch(CassieIndexer *index_ref)
Definition cassiopee.cxx:117
int max_subst
Definition Cassiopee.h:422
Definition Cassiopee.h:279
static void transform_fasta(const string in, const string out)
Definition cassiopee.cxx:16
Definition Cassiopee.h:25
bool operator==(const Match &p) const
Definition Cassiopee.h:31
int subst
Definition Cassiopee.h:46
int del
Definition Cassiopee.h:42
int in
Definition Cassiopee.h:38
Definition Cassiopee.h:65
char c
Definition Cassiopee.h:70
long next_length
Definition Cassiopee.h:88
list< long > positions
Definition Cassiopee.h:76
long next_pos
Definition Cassiopee.h:83
Definition Cassiopee.h:325