libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
filterlowintensitysignalremoval.cpp
Go to the documentation of this file.
1/* BEGIN software license
2 *
3 * msXpertSuite - mass spectrometry software suite
4 * -----------------------------------------------
5 * Copyright(C) 2009,...,2021 Filippo Rusconi
6 *
7 * http://www.msxpertsuite.org
8 *
9 * This file is part of the msXpertSuite project.
10 *
11 * The msXpertSuite project is the successor of the massXpert project. This
12 * project now includes various independent modules:
13 *
14 * - massXpert, model polymer chemistries and simulate mass spectrometric data;
15 * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
16 *
17 * This program is free software: you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation, either version 3 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program. If not, see <http://www.gnu.org/licenses/>.
29 *
30 * END software license
31 */
32
33
34#include <qmath.h>
35
36#include <limits>
37#include <iterator>
38
39#include <QVector>
40#include <QDebug>
41
43
46
47
48namespace pappso
49{
50
51
53 double mean, double std_dev, double threshold)
54{
55 m_noiseMean = mean;
56 m_noiseStdDev = std_dev;
57 m_threshold = threshold;
58}
59
60
66
67
75
76
80
81
85{
86 if(&other == this)
87 return *this;
88
92
93 return *this;
94}
95
96
97void
99 const QString &parameters)
100{
101 // Typical string: "FloorAmplitudePercentage|15"
102 if(parameters.startsWith(QString("%1|").arg(name())))
103 {
104 QStringList params = parameters.split("|").back().split(";");
105
106 m_noiseMean = params.at(0).toDouble();
107 m_noiseStdDev = params.at(1).toDouble();
108 m_threshold = params.at(2).toDouble();
109 }
110 else
111 {
113 QString(
114 "Building of FilterLowIntensitySignalRemoval from string %1 failed")
115 .arg(parameters));
116 }
117}
118
119
120std::size_t
122{
123 // We want to iterate in the trace and check if we can get the apicess
124 // isolated one by one. An apex is nothing more than the top cusp of a
125 // triangle. We only store apices that have a value > m_threshold.
126
127 m_clusters.clear();
128
129 std::size_t trace_size = trace.size();
130 if(trace_size <= 2)
131 {
132 // qDebug() << "The original trace has less than 3 points. Returning it "
133 //"without modification.";
134 return m_clusters.size();
135 }
136 // else
137 // qDebug() << "The original trace has" << trace_size << "data points";
138
139 // Seed the system with the first point of the trace.
140 m_curIter = trace.cbegin();
141 m_prevIter = trace.cbegin();
142
143 // qDebug() << "First trace point: "
144 //<< "(" << m_prevIter->x << "," << m_prevIter->y << ");";
145
146 // We still have not found any apex!
147 m_prevApex = trace.cend();
148
149 // We will need to know if we were ascending to an apex.
150 bool was_ascending_to_apex = false;
151
152 // Prepare a first apex cluster.
153 ApicesSPtr cluster_apices = std::make_shared<ClusterApices>();
154
155 // Now that we have seeded the system with the first point of the original
156 // trace, go to the next one.
157 ++m_curIter;
158
159 // And now iterate in the original trace and make sure we detect all the
160 // apicess.
161
162 while(m_curIter != trace.cend())
163 {
164 // qDebug() << "Current trace point: "
165 //<< "(" << m_curIter->x << "," << m_curIter->y << ");";
166
167 // We monitor if we are going up or down a peak.
168
169 if(m_curIter->y > m_prevIter->y)
170 // We are ascending a peak. We do not know if we are at the apex.
171 {
172 // qDebug().noquote() << "We are ascending to an apex.\n";
173 was_ascending_to_apex = true;
174 }
175 else
176 // We are descending a peak.
177 {
178 // qDebug().noquote() << "Descending a peak. ";
179
180 // There are two situations:
181 //
182 // 1. Either we were ascending to an apex, and m_prev is that apex,
183 //
184 // 2. Or we were not ascending to an apex and in fact all we are doing
185 // is going down an apex that occurred more than one trace point ago.
186
187 if(!was_ascending_to_apex)
188 // We were not ascending to an apex.
189 {
190 // Do nothing here.
191
192 // qDebug().noquote()
193 //<< "But, we were not ascending to an apex, so do nothing.\n";
194 }
195 else
196 // We are effectively descending a peak right after the apex was
197 // reached at the previous iteration.
198 {
200
201 // qDebug().noquote()
202 //<< "And, we were ascending to an apex, so "
203 //"m_curApex has become m_prevIter: ("
204 //<< m_curApex->x << "," << m_curApex->y << ")\n";
205
206 // We might have two situations:
207
208 // 1. We had already encountered an apex.
209 // 2. We had not yet encountered an apex.
210
211 if(m_prevApex != trace.cend())
212 // We had already encountered an apex.
213 {
214 // Was that apex far on the left of the current apex ?
215
216 if(m_curApex->x - m_prevApex->x >
218 // The distance is not compatible with both apices to belong
219 // to the same apices cluster.
220 {
221 // We are creating a new isotopic apices.
222
223 // But, since another apex had been encountered already,
224 // that means that an isotopic apices was cooking
225 // already. We must store it.
226
227 if(!cluster_apices->size())
228 qFatal("Cannot be that the apices has no apex.");
229
230 // Store the crafted apices cluster.
231 m_clusters.push_back(cluster_apices);
232
233 // qDebug().noquote()
234 //<< "There was a previous apex already, BUT "
235 //"outside of apices range. "
236 //"Pushing the cooking apices that has size:"
237 //<< cluster_apices->size();
238
239 // Now create a brand new apices for later work.
240 cluster_apices = std::make_shared<ClusterApices>();
241
242 // qDebug() << "Created a brand new apices.";
243
244 // We only start the new apices with the current apex if
245 // that apex is above the threshold.
246
247 if(m_curApex->y > m_threshold)
248 {
249 // And start it with the current apex.
250 cluster_apices->push_back(m_curApex);
251
252 // qDebug()
253 //<< "Since the current apex is above the threshold, "
254 //"we PUSH it to the newly created apices: ("
255 //<< m_curApex->x << "," << m_curApex->y << ")";
256
258
259 // qDebug() << "Set prev apex to be cur apex.";
260 }
261 else
262 {
263 // qDebug()
264 //<< "Since the current apex is below the threshold, "
265 //"we DO NOT push it to the newly created "
266 //"apices: ("
267 //<< m_curApex->x << "," << m_curApex->y << ")";
268
269 // Since previous apex went to the closed apices, we
270 // need to reset it.
271
272 m_prevApex = trace.cend();
273
274 // qDebug() << "Since the previous apex went to the "
275 //"closed apices, and cur apex has too "
276 //"small an intensity, we reset prev apex "
277 //"to trace.cend().";
278 }
279 }
280 else
281 // The distance is compatible with both apices to belong to
282 // the same isotopic apices.
283 {
284
285 // But we only push back the current apex to the apices
286 // if its intensity is above the threshold.
287
288 if(m_curApex->y > m_threshold)
289 // The current apex was above the threshold
290 {
291 cluster_apices->push_back(m_curApex);
292
293 // qDebug().noquote()
294 //<< "There was an apex already inside of apices "
295 //"range. "
296 //"AND, since the current apex was above the "
297 //"threshold, we indeed push it to apices.\n";
298
299 // qDebug().noquote()
300 //<< "Current apex PUSHED: " << m_curApex->x << ", "
301 //<< m_curApex->y;
302
304
305 // qDebug() << "We set prev apex to be cur apex.";
306 }
307 else
308 {
309 // qDebug().noquote()
310 //<< "There was an apex already inside of apices "
311 //"range. "
312 //"BUT, since the current apex was below the "
313 //"threshold, we do not push it to apices.\n";
314
315 // qDebug().noquote()
316 //<< "Current apex NOT pushed: " << m_curApex->x
317 //<< ", " << m_curApex->y;
318 }
319 }
320 }
321 else
322 // No apex was previously found. We are fillin-up a new isotopic
323 // apices.
324 {
325 if(m_curApex->y > m_threshold)
326 // We can actually add that apex to a new isotopic apices.
327 {
328 if(cluster_apices->size())
329 qCritical(
330 "At this point, the apices should be new and "
331 "empty.");
332
333 cluster_apices->push_back(m_curApex);
334
335 // qDebug().noquote()
336 //<< "No previous apex was found. Since current apex' "
337 //"intensity is above threshold, we push it back to "
338 //"the "
339 //"apices.\n";
340
341 // Store current apex as previous apex for next rounds.
343
344 // qDebug().noquote() << "And thus we store the current "
345 //"apex as previous apex.\n";
346 }
347 else
348 {
349 // qDebug().noquote()
350 //<< "No previous apex was found. Since current apex' "
351 //"intensity is below threshold, we do nothing.\n";
352 }
353 }
354 }
355 // End of
356 // ! if(!was_ascending_to_apex)
357 // That is, we were ascending to an apex.
358
359 // Tell what it is: we were not ascending to an apex.
360 was_ascending_to_apex = false;
361 }
362 // End of
363 // ! if(m_curIter->y > m_prevIter->y)
364 // That is, we are descending a peak.
365
366 // At this point, prepare next round.
367
368 // qDebug().noquote()
369 //<< "Preparing next round, with m_prevIter = m_curIter and ++index.\n";
370
372
373 ++m_curIter;
374 }
375 // End of
376 // while(index < trace_size)
377
378 // At this point, if a apices had been cooking, add it.
379
380 if(cluster_apices->size())
381 m_clusters.push_back(cluster_apices);
382
383 return m_clusters.size();
384}
385
386
387Trace::const_iterator
389 Trace::const_iterator iter,
390 double distance_threshold)
391{
392 // We receive an iterator that points to an apex. We want iterate back in the
393 // trace working copy and look if there are apices that are distant less than
394 // 1.1~Th.
395
396 Trace::const_iterator init_iter = iter;
397
398 if(iter == trace.cbegin())
399 return iter;
400
401 // Seed the previous iter to iter, because we'll move from there right away.
402 Trace::const_iterator prev_iter = init_iter;
403
404 Trace::const_iterator last_apex_iter = init_iter;
405
406 // We will need to know if we were ascending to an apex.
407 bool was_ascending_to_apex = false;
408
409 // Now that we have seeded the system, we can move iter one data point:
410 --iter;
411
412 while(iter != trace.cbegin())
413 {
414 // If we are already outside of distance_threshold, return the last apex
415 // that was found (or initial iter if none was encountered)
416
417 if(abs(init_iter->x - iter->x) >= distance_threshold)
418 return last_apex_iter;
419
420 if(iter->y > prev_iter->y)
421 {
422 // New data point has greater intensity. Just record that fact.
423 was_ascending_to_apex = true;
424 }
425 else
426 {
427 // New data point has smaller intensity. We are descending a peak.
428
429 if(was_ascending_to_apex)
430 {
431 // We had an apex at previous iter. We are inside the distance
432 // threshold. That is good. But we still could find another apex
433 // while moving along the trace that is in the distance threshold.
434 // This is why we keep going, but we store the previous iter as
435 // the last encountered apex.
436
437 last_apex_iter = prev_iter;
438 }
439 }
440
441 prev_iter = iter;
442
443 // Move.
444 --iter;
445 }
446
447 // qDebug() << "Init m/z: " << init_iter->x
448 //<< "Returning m/z: " << last_apex_iter->x
449 //<< "at distance:" << std::distance(last_apex_iter, init_iter);
450
451 // At this point last_apex_iter is the same as the initial iter.
452 return last_apex_iter;
453}
454
455
456Trace::const_iterator
458 Trace::const_iterator iter,
459 double distance_threshold)
460{
461 // We receive an iterator that points to an apex. We want iterate back in the
462 // trace working copy and look if there are apices that are distant less than
463 // 1.1~Th.
464
465 Trace::const_iterator init_iter = iter;
466
467 if(iter == trace.cend())
468 return iter;
469
470 // Seed the previous iter to iter, because we'll move from there right away.
471 Trace::const_iterator prev_iter = init_iter;
472
473 Trace::const_iterator last_apex_iter = init_iter;
474
475 // We will need to know if we were ascending to an apex.
476 bool was_ascending_to_apex = false;
477
478 // Now that we have seeded the system, we can move iter one data point:
479 ++iter;
480
481 while(iter != trace.cend())
482 {
483 // If we are already outside of distance_threshold, return the last apex
484 // that was found (or initial iter if none was encountered)
485
486 // FIXME: maybe we should compare prev_iter->x with iter->x so that we
487 // continue moving if all the succcessive apices found are each one in the
488 // distance_threshold from the previous one ?
489
490 if(abs(init_iter->x - iter->x) >= distance_threshold)
491 return last_apex_iter;
492
493 if(iter->y > prev_iter->y)
494 {
495 // New data point has greater intensity. Just record that fact.
496 was_ascending_to_apex = true;
497 }
498 else
499 {
500 // New data point has smaller intensity. We are descending a peak.
501
502 if(was_ascending_to_apex)
503 {
504 // We had an apex at previous iter. We are inside the distance
505 // threshold. That is good. But we still could find another apex
506 // while moving along the trace that is in the distance threshold.
507 // This is why we keep going, but we store the previous iter as
508 // the last encountered apex.
509
510 last_apex_iter = prev_iter;
511 }
512 }
513
514 prev_iter = iter;
515
516 // Move.
517 ++iter;
518 }
519
520 // At this point last_apex_iter is the same as the initial iter.
521 return last_apex_iter;
522}
523
524
525Trace
527{
528 // We start from the vector of apices and try to remake a high resolution
529 // trace out of these apices.
530
531 MapTrace map_trace;
532
533 // The general idea is that apices were detected, and only apices having
534 // their intensity above the threshold were stored. That means that we need to
535 // add points to the trace to reconstruct a meaningful trace. Indeed, imagine
536 // a heavy peptide isotopic cluster: the first peak's apex is below threshold
537 // and was not stored. The second peak is also below. But the third isotopic
538 // cluster peak is above and was stored.
539 //
540 // How do we reconstruct the trace to have all these points that were
541 // preceding the first isotopic cluster apex that was detected.
542 //
543 // The same applies to the last peaks of the cluster that are below the
544 // threshold whil the preceeding ones were above!
545
546 // The general idea is to iterate in the vector of apices and for each apex
547 // that is encountered, ask if there were apices
548
549 // m_clusters is a vector that contains vectors of Trace::const_iter.
550 // Each vector in m_clusters should represent all the apices of a cluster.
551
552 // qDebug() << "Reconstructing trace with " << m_clusters.size() <<
553 // "clusters.";
554
555 Trace::const_iterator left_begin_iter = trace.cend();
556 Trace::const_iterator right_end_iter = trace.cend();
557
558 for(auto &&cluster_apices : m_clusters)
559 {
560 // cluster_apices is a vector of Trace::const_iterator. If we want to
561 // reconstruct the Trace, we need to iterate through all the DataPoint
562 // objects in between cluster_apices.begin() and cluster_apices.end().
563
564 Trace::const_iterator begin_iter = *(cluster_apices->begin());
565 Trace::const_iterator end_iter = *(std::prev(cluster_apices->end()));
566
567 // qDebug() << "Iterating in a new cluster apices with boundaries:"
568 //<< begin_iter->x << "-" << end_iter->x;
569
570 left_begin_iter =
572
573 // qDebug() << "After backwardFindApex, left_begin_iter points to:"
574 //<< left_begin_iter->toString() << "with distance:"
575 //<< std::distance(left_begin_iter, begin_iter);
576
577 right_end_iter = forwardFindApex(
578 trace, end_iter, 1.5 * INTRA_CLUSTER_INTER_PEAK_DISTANCE);
579
580 for(Trace::const_iterator iter = left_begin_iter; iter != right_end_iter;
581 ++iter)
582 {
583 map_trace[iter->x] = iter->y;
584 }
585
586 // Now reset the left and right iters to intensity 0 to avoid having
587 // disgraceful oblique lines connnecting trace segments.
588
589 map_trace[left_begin_iter->x] = 0;
590 map_trace[std::prev(right_end_iter)->x] = 0;
591 }
592
593 return map_trace.toTrace();
594}
595
596
597Trace &
599{
600 // qDebug();
601
602 // Horrible hack to have a non const filtering process.
603 return const_cast<FilterLowIntensitySignalRemoval *>(this)->nonConstFilter(
604 trace);
605}
606
607
608Trace &
610{
611 // qDebug();
612
613 if(trace.size() <= 2)
614 {
615 // qDebug() << "The original trace has less than 3 points. Returning it "
616 //"without modification.";
617 return trace;
618 }
619 // else
620 // qDebug() << "The original trace had" << trace.size() << "data points";
621
622 std::size_t cluster_count = detectClusterApices(trace);
623
624 // qDebug() << "Number of detected cluster apices: " << cluster_count;
625
626 if(!cluster_count)
627 return trace;
628
629 // At this point we want to work on the apices and reconstruct a full
630 // trace.
631
632 Trace reconstructed_trace = reconstructTrace(trace);
633
634 trace = std::move(reconstructed_trace);
635
636 return trace;
637}
638
639
640double
645
646
647//! Return a string with the textual representation of the configuration data.
648QString
650{
651 return QString("%1|%2").arg(name()).arg(QString::number(m_threshold, 'f', 2));
652}
653
654
655QString
657{
658 return "FilterLowIntensitySignalRemoval";
659}
660
661} // namespace pappso
excetion to use when an item type is not recognized
Redefines the floor intensity of the Trace.
Trace::const_iterator backwardFindApex(const Trace &trace, Trace::const_iterator iter, double distance_threshold)
FilterLowIntensitySignalRemoval(double mean, double std_dev, double threshold)
Trace & filter(Trace &data_points) const override
Trace::const_iterator forwardFindApex(const Trace &trace, Trace::const_iterator iter, double distance_threshold)
FilterLowIntensitySignalRemoval & operator=(const FilterLowIntensitySignalRemoval &other)
void buildFilterFromString(const QString &strBuildParams) override
build this filter using a string
QString toString() const override
Return a string with the textual representation of the configuration data.
Trace toTrace() const
Definition maptrace.cpp:219
A simple container of DataPoint instances.
Definition trace.h:148
excetion to use when an item type is not recognized (file format, object type...)
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39