libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
pappso::BafAsciiFileReader Class Reference

#include <bafasciifilereader.h>

Inheritance diagram for pappso::BafAsciiFileReader:
pappso::MsFileReader

Public Member Functions

 BafAsciiFileReader (const QString &file_name)
 
virtual ~BafAsciiFileReader ()
 
virtual MsDataFormat getFileFormat () override
 
virtual std::vector< MsRunIdCstSPtrgetMsRunIds (const QString &run_prefix) override
 
MsRunReaderselectMsRunReader (const QString &file_name) const
 

Private Member Functions

virtual bool initialize (std::size_t &line_count)
 
- Private Member Functions inherited from pappso::MsFileReader
 MsFileReader (const QString &file_name)
 
virtual ~MsFileReader ()
 

Additional Inherited Members

- Private Attributes inherited from pappso::MsFileReader
QString m_fileName
 
MsDataFormat m_fileFormat = MsDataFormat::unknown
 

Detailed Description

Definition at line 15 of file bafasciifilereader.h.

Constructor & Destructor Documentation

◆ BafAsciiFileReader()

pappso::BafAsciiFileReader::BafAsciiFileReader ( const QString & file_name)

Definition at line 29 of file bafasciifilereader.cpp.

30 : MsFileReader{file_name}
31{
32 // To avoid initializing multiple times (costly process), we
33 // only initialize when needed, that is, upon getMsRunIds().
34 // initialize();
35}
MsFileReader(const QString &file_name)

◆ ~BafAsciiFileReader()

pappso::BafAsciiFileReader::~BafAsciiFileReader ( )
virtual

Definition at line 38 of file bafasciifilereader.cpp.

39{
40}

Member Function Documentation

◆ getFileFormat()

MsDataFormat pappso::BafAsciiFileReader::getFileFormat ( )
overridevirtual

Implements pappso::MsFileReader.

Definition at line 263 of file bafasciifilereader.cpp.

264{
265 return m_fileFormat;
266}
MsDataFormat m_fileFormat

References pappso::MsFileReader::m_fileFormat.

Referenced by pappso::MsFileAccessor::getMsRunIds().

◆ getMsRunIds()

std::vector< MsRunIdCstSPtr > pappso::BafAsciiFileReader::getMsRunIds ( const QString & run_prefix)
overridevirtual

Implements pappso::MsFileReader.

Definition at line 270 of file bafasciifilereader.cpp.

271{
272 std::vector<MsRunIdCstSPtr> ms_run_ids;
273
274 std::size_t ms_data_line_count = 0;
275
276 if(!initialize(ms_data_line_count))
277 return ms_run_ids;
278
279 // Finally create the MsRunId with the file name.
280 MsRunId ms_run_id(m_fileName);
281 ms_run_id.setMsDataFormat(m_fileFormat);
282
283 // We need to set the unambiguous xmlId string.
284 ms_run_id.setXmlId(
285 QString("%1%2").arg(run_prefix).arg(Utils::getLexicalOrderedString(0)));
286
287 // Craft a meaningful sample name because otherwise all the files loaded from
288 // text files will have the same sample name and it will be difficult to
289 // differentiate them.
290 // Orig version:
291 // ms_run_id.setRunId("Single spectrum");
292 // Now the sample name is nothing but the file name without the path.
293
294 QFileInfo file_info(m_fileName);
295
296 // qDebug() << "file name:" << m_fileName;
297
298 QString sample_name = file_info.fileName();
299
300 // qDebug() << "sample name:" << sample_name;
301
302 ms_run_id.setRunId(sample_name);
303
304 // Now set the sample name to the run id:
305
306 ms_run_id.setSampleName(ms_run_id.getRunId());
307
308 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
309 //<< "Current ms_run_id:" << ms_run_id.toString();
310
311 // Finally make a shared pointer out of it and append it to the vector.
312 ms_run_ids.push_back(std::make_shared<MsRunId>(ms_run_id));
313
314 return ms_run_ids;
315}
virtual bool initialize(std::size_t &line_count)
static const QString getLexicalOrderedString(unsigned int num)
Definition utils.cpp:74

References pappso::Utils::getLexicalOrderedString(), pappso::MsRunId::getRunId(), initialize(), pappso::MsFileReader::m_fileFormat, pappso::MsFileReader::m_fileName, pappso::MsRunId::setMsDataFormat(), pappso::MsRunId::setRunId(), pappso::MsRunId::setSampleName(), and pappso::MsRunId::setXmlId().

Referenced by pappso::MsFileAccessor::getMsRunIds().

◆ initialize()

bool pappso::BafAsciiFileReader::initialize ( std::size_t & line_count)
privatevirtual

Definition at line 43 of file bafasciifilereader.cpp.

44{
45 // Here we just test some the lines of the file to check that they comply with
46 // the brukerBafAscii format.
47
48 line_count = 0;
49
50 QFile file(m_fileName);
51
52 if(!file.open(QFile::ReadOnly | QFile::Text))
53 {
54 qDebug() << "Failed to open file" << m_fileName;
55
56 return false;
57 }
58
59 // Construct the regular expression pattern, piecemeal...
60
61 // The retention time as the very first value in the line.
62
63 QString regexp_pattern = QString("^(%1)").arg(
65
66 // The ionization mode (positive or negative)
67 regexp_pattern += QString(",([+-])");
68
69 regexp_pattern += QString(",(ESI|MALDI)");
70
71 // The MS level (ms1 for full scan mass spectrum)
72 regexp_pattern += QString(",ms(\\d)");
73
74 // Do no know what this is for.
75 regexp_pattern += QString(",(-)");
76
77 // The type of peak (profile or centroid).
78 regexp_pattern += QString(",(profile|line)");
79
80 // The m/z range of the mass spectrum.
81
82 regexp_pattern +=
83 QString(",(%1-%2)")
86
87 // The count of peaks following this element in the remaining of the line.
88
89 regexp_pattern += QString(",(\\d+)");
90
91 regexp_pattern += QString("(.*$)");
92
93 // qDebug() << "The full regexp_pattern:" << regexp_pattern;
94
95 QRegularExpression line_regexp(regexp_pattern);
96
97 QRegularExpressionMatch regexp_match;
98
99 QString line;
100 bool file_reading_failed = false;
101 bool ok = false;
102
103 // Reading, parsing and checking lines is extremely time consuming.
104 // What we want here is reduce the time all the file's lines are
105 // read. We could say that we want to parse and check the first
106 // CHECKED_LINES_COUNT lines and then avoid parsing and checking, just go
107 // through the lines. At the end of the file, the number of lines that have
108 // been read is stored in the out parameter line_count.
109 std::size_t iter = 0;
110
111 while(!file.atEnd())
112 {
113 line = file.readLine().trimmed();
114
115 ++iter;
116 // qDebug() << "Read one line more: (not yet checked)" << iter;
117 if(iter > CHECKED_LINES_COUNT)
118 continue;
119
120 if(line.startsWith('#') || line.isEmpty() ||
121 Utils::endOfLineRegExp.match(line).hasMatch())
122 continue;
123
124 // qDebug() << "Current brukerBafAscii format line " << line_count << ": "
125 // << line.left(30) << " ... " << line.right(30);
126
127 regexp_match = line_regexp.match(line);
128
129 if(regexp_match.hasMatch())
130 {
131 // qDebug() << "The match succeeded.";
132
133 regexp_match.captured(1).toDouble(&ok);
134 if(!ok)
135 {
136 qDebug()
137 << "Failed to extract the retention time of the mass spectrum.";
138
139 file_reading_failed = true;
140
141 break;
142 }
143
144 QString ionization_mode = regexp_match.captured(2);
145 QString source_type = regexp_match.captured(3);
146
147 regexp_match.captured(4).toInt(&ok);
148 if(!ok)
149 {
150 qDebug()
151 << "Failed to extract the MS level of the mass spectrum.";
152
153 file_reading_failed = true;
154
155 break;
156 }
157
158 QString peak_shape_type = regexp_match.captured(6);
159
160 QString mz_range = regexp_match.captured(7);
161
162 mz_range.left(mz_range.indexOf("-")).toDouble(&ok);
163 if(!ok)
164 {
165 qDebug() << "Failed to extract the start of the m/z range.";
166
167 file_reading_failed = true;
168
169 break;
170 }
171
172 mz_range.right(mz_range.indexOf("-") + 1).toDouble(&ok);
173 if(!ok)
174 {
175 qDebug() << "Failed to extract the end of the m/z range.";
176
177 file_reading_failed = true;
178
179 break;
180 }
181
182 // qDebug() << qSetRealNumberPrecision(10)
183 // << "mz_range_start: " << mz_range_start
184 // << "mz_range_end: " << mz_range_end;
185
186 int peak_count = regexp_match.captured(8).toInt(&ok);
187 if(!ok)
188 {
189 qDebug() << "Failed to extract the number of peaks in the mass "
190 "spectrum.";
191
192 file_reading_failed = true;
193
194 break;
195 }
196
197 QString peaks = regexp_match.captured(9);
198 QStringList peaks_stringlist = peaks.split(",", Qt::SkipEmptyParts);
199
200 // qDebug() << "The number of peaks:" << peaks_stringlist.size();
201
202 // Sanity check:
203 if(peaks_stringlist.size() != peak_count)
204 {
205 // qDebug() << "The number of peaks in the mass spectrum does not
206 // "
207 // "match the advertised one.";
208
209 file_reading_failed = true;
210
211 break;
212 }
213
214 // qDebug() << "The retention time:" << retention_time
215 // << "the ionization mode: " << ionization_mode
216 // << "the source type: " << source_type
217 // << "MS level is:" << ms_level
218 // << "peak shape type: " << peak_shape_type
219 // << "m/z range: " << mz_range << "peak count: " <<
220 // peak_count
221 // << "and peaks: " << peaks.left(100) << " ... "
222 // << peaks.right(100) << "";
223
224 // If we are here, that means that the read line has conformed
225 // to the format expected.
226 ++line_count;
227 // qDebug() << "Checked one line more:" << line_count;
228 }
229 // End end of
230 // if(regexp_match.hasMatch())
231 else
232 {
233 qDebug() << "The match failed.";
234 file_reading_failed = true;
235
236 break;
237 }
238 }
239 // End of
240 // while(!file.atEnd())
241
242 file.close();
243
244 if(!file_reading_failed && line_count >= 1)
245 {
247 return true;
248 }
249
251
252 // qDebug() << "The number of parsed mass spectra: " << line_count;
253
254 // qDebug() << "Detected file format:"
255 // << Utils::msDataFormatAsString(m_fileFormat)
256 // << "with number of spectra: " << line_count;
257
258 return false;
259}
static QRegularExpression unsignedDoubleNumberNoExponentialRegExp
Definition utils.h:53
static QRegularExpression endOfLineRegExp
Regular expression that tracks the end of line in text files.
Definition utils.h:68
@ unknown
unknown format
static const std::size_t CHECKED_LINES_COUNT

References pappso::brukerBafAscii, pappso::CHECKED_LINES_COUNT, pappso::Utils::endOfLineRegExp, line, pappso::MsFileReader::m_fileFormat, pappso::MsFileReader::m_fileName, pappso::unknown, and pappso::Utils::unsignedDoubleNumberNoExponentialRegExp.

Referenced by getMsRunIds().

◆ selectMsRunReader()

MsRunReader * pappso::BafAsciiFileReader::selectMsRunReader ( const QString & file_name) const

The documentation for this class was generated from the following files: