libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
fastafileindexer.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/fasta/fastafileindexer.cpp
3 * \date 22/06/2109
4 * \author Olivier Langella
5 * \brief Quick random access to sequences in a fasta file using an index
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2019 Olivier Langella <Olivier.Langella@u-psud.fr>.
10 *
11 * This file is part of the PAPPSOms++ library.
12 *
13 * PAPPSOms++ is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms++ is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25 *
26 ******************************************************************************/
27
28#include "fastafileindexer.h"
29
30#include <QDebug>
31#include <QTextStream>
32#include <QDataStream>
33#include <QFileInfo>
35#include "fastareader.h"
36
37namespace pappso
38{
39FastaFileIndexer::FastaFileIndexer(const QFileInfo &fastaFile)
40 : m_fasta_file(fastaFile.absoluteFilePath())
41{
42
43 if(m_fasta_file.fileName().isEmpty())
44 {
45 throw PappsoException(QObject::tr("No FASTA file name specified"));
46 }
47 if(m_fasta_file.open(QIODevice::ReadOnly))
48 {
49 parseFastaFile();
50 m_fasta_file.close();
51 }
52 else
53 {
54 throw PappsoException(QObject::tr("ERROR opening FASTA file %1 for read")
55 .arg(fastaFile.fileName()));
56 }
57}
58
60 : m_fasta_file(other.m_fasta_file.fileName())
61{
62
64 mpa_sequenceTxtIn = nullptr;
65}
70
71
72void
74{
75
76 qDebug();
77 QDataStream bin_in(&m_fasta_file);
78 qint64 position = 0;
79
80 // QChar first_char;
81 // txt_in >> first_char;
82 qint8 char_in;
83 bin_in >> char_in;
84 while(!bin_in.atEnd() && (char_in < (qint8)21))
85 { // eat Windows \r\n
86 position++;
87 bin_in >> char_in;
88 }
89 while(!bin_in.atEnd())
90 {
91 // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__
92 // << " first_char=" << first_char;
93 if(char_in == (qint8)'>')
94 {
95
96 // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__
97 // << " index=" << m_indexArray.size()
98 // << " position=" << position;
99 m_indexArray.push_back(position);
100 }
101 // eat line
102 position++;
103 bin_in >> char_in;
104 while(!bin_in.atEnd() && (char_in > (qint8)20))
105 {
106 position++;
107 bin_in >> char_in;
108 }
109 position++;
110 bin_in >> char_in;
111
112 if(!bin_in.atEnd() && (char_in < (qint8)21))
113 { // eat Windows \r\n
114 position++;
115 bin_in >> char_in;
116 }
117 }
118 qDebug();
119}
120
121void
123{
124 if(mpa_sequenceTxtIn != nullptr)
125 return;
126 if(m_fasta_file.open(QIODevice::ReadOnly))
127 {
128 mpa_sequenceTxtIn = new QTextStream(&m_fasta_file);
129 }
130 else
131 {
132 throw PappsoException(QObject::tr("ERROR opening FASTA file %1 for read")
133 .arg(m_fasta_file.fileName()));
134 }
135}
136
137void
139{
140 if(mpa_sequenceTxtIn != nullptr)
141 {
142 delete mpa_sequenceTxtIn;
143 mpa_sequenceTxtIn = nullptr;
144 m_fasta_file.close();
145 }
146}
147
148void
150 std::size_t index)
151{
152 open();
153
154 qDebug() << " goto=" << index << " pos=" << m_indexArray[index];
155 bool seek_ok;
156 if((index < m_indexArray.size()) &&
157 (seek_ok = mpa_sequenceTxtIn->seek(m_indexArray[index])))
158 {
159
160 qDebug() << " realpos=" << mpa_sequenceTxtIn->pos();
161 ;
162 if(!seek_ok)
163 {
164
165 throw PappsoException(QObject::tr("ERROR FastaFileIndexer : seek to "
166 "sequence %1, position %2 failed")
167 .arg(index)
168 .arg(m_indexArray[index]));
169 }
170 FastaReader reader(fasta_handler);
172 }
173 else
174 {
176 QObject::tr("ERROR reading FASTA file %1 : sequence index %2 "
177 "unreachable, array size=%3")
178 .arg(m_fasta_file.fileName())
179 .arg(index)
180 .arg(m_indexArray.size()));
181 }
182}
183
184
187{
188
189 return std::make_shared<FastaFileIndexer>(*this);
190}
191
192
193std::size_t
195{
196 return m_indexArray.size();
197}
198} // namespace pappso
FastaFileIndexer(const QFileInfo &fastaFile)
FastaFileIndexerSPtr makeFastaFileIndexerSPtr() const
std::vector< qint64 > m_indexArray
void getSequenceByIndex(FastaHandlerInterface &fasta_handler, std::size_t index) override
void parseOnlyOne(QTextStream &p_in)
Quick random access to sequences in a fasta file using an index.
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::shared_ptr< FastaFileIndexer > FastaFileIndexerSPtr