libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
msrundatasettree.cpp
Go to the documentation of this file.
1// GPL 3+
2// Filippo Rusconi
3
4#include <map>
5#include <limits>
6#include <iostream>
7#include <iomanip>
8
9#include "msrundatasettree.h"
10
11#include "../pappsoexception.h"
13
14#include <QVariant>
15
16
17namespace pappso
18{
19
20
22 : mcsp_msRunId(ms_run_id_csp)
23{
24}
25
26
28{
29 // qDebug();
30
31 for(auto &&node : m_rootNodes)
32 {
33 // Each node is responsible for freeing its children nodes!
34
35 delete node;
36 }
37
38 m_rootNodes.clear();
39
40 // Beware not to delete the node member of the map, as we have already
41 // destroyed them above!
42 //
43 // for(auto iterator = m_indexNodeMap.begin(); iterator !=
44 // m_indexNodeMap.end();
45 //++iterator)
46 //{
47 // delete(iterator->second);
48 //}
49
50 // qDebug();
51}
52
53
56 QualifiedMassSpectrumCstSPtr mass_spectrum_csp)
57{
58 // qDebug();
59
60 if(mass_spectrum_csp == nullptr)
61 qFatal("Cannot be nullptr");
62
63 if(mass_spectrum_csp.get() == nullptr)
64 qFatal("Cannot be nullptr");
65
66 // We need to get the precursor spectrum index, in case this spectrum is a
67 // fragmentation index.
68
69 MsRunDataSetTreeNode *new_node_p = nullptr;
70
71 std::size_t precursor_spectrum_index =
72 mass_spectrum_csp->getPrecursorSpectrumIndex();
73
74 // qDebug() << "The precursor_spectrum_index:" << precursor_spectrum_index;
75
76 if(precursor_spectrum_index == std::numeric_limits<std::size_t>::max())
77 {
78 // This spectrum is a full scan spectrum, not a fragmentation spectrum.
79 // Create a new node with no parent and push it back to the root nodes
80 // vector.
81
82 new_node_p = new MsRunDataSetTreeNode(mass_spectrum_csp, nullptr);
83
84 // Since there is no parent in this overload, it is assumed that the node
85 // to be populated with the new node is the root node.
86
87 m_rootNodes.push_back(new_node_p);
88
89 // true: with_data
90 // qDebug().noquote() << "Pushed back to the roots node vector node:"
91 //<< new_node_p->toString(true);
92 }
93 else
94 {
95 // This spectrum is a fragmentation spectrum.
96
97 // Sanity check
98
99 if(mass_spectrum_csp->getMsLevel() <= 1)
100 {
102 "msrundatasettree.cpp -- ERROR the MS level needs to be > 1 in a "
103 "fragmentation spectrum.");
104 }
105
106 // Get the node that contains the precursor ion mass spectrum.
107 MsRunDataSetTreeNode *parent_node_p = findNode(precursor_spectrum_index);
108
109 if(parent_node_p == nullptr)
110 {
112 "msrundatasettree.cpp -- ERROR could not find "
113 "a tree node matching the index.");
114 }
115
116 // qDebug() << "Fragmentation spectrum"
117 //<< "Found parent node:" << parent_node_p
118 //<< "for precursor index:" << precursor_spectrum_index;
119
120 // At this point, create a new node with the right parent.
121
122 new_node_p = new MsRunDataSetTreeNode(mass_spectrum_csp, parent_node_p);
123
124 parent_node_p->m_children.push_back(new_node_p);
125 }
126
127 // And now document that addition in the node index map.
128 m_indexNodeMap.insert(std::pair<std::size_t, MsRunDataSetTreeNode *>(
129 mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex(), new_node_p));
130
131 // We also want to document the new node relating to the
132 // retention time.
133
135 mass_spectrum_csp->getRtInMinutes(), new_node_p, DataKind::rt);
136
137 // Likewise for the ion mobility time.
138
139 double ion_mobility_value = -1;
140
141 MsDataFormat file_format = mcsp_msRunId->getMsDataFormat();
142 bool ok = false;
143
144 if(file_format == MsDataFormat::brukerTims)
145 {
146 // Start by looking if there is a OneOverK0 valid value, which
147 // means we have effectively handled a genuine mobility scan spectrum.
148 QVariant ion_mobility_variant_value =
149 mass_spectrum_csp->getParameterValue(
151
152 if(ion_mobility_variant_value.isValid())
153 {
154 // Yes, genuine ion mobility scan handled here.
155
156 ion_mobility_value = ion_mobility_variant_value.toDouble(&ok);
157
158 if(!ok)
159 {
160 qFatal(
161 "The data are Bruker timsTOF data but failed to convert valid "
162 "QVariant 1/K0 value to double.");
163 }
164 }
165 else
166 {
167 // We are not handling a genuine single ion mobility scan here.
168 // We must be handling a mass spectrum that correspond to
169 // the combination of all the ion mobility scans for a single
170 // frame. This is when the user asks that ion mobility scans
171 // be flattended. In this case, the OneOverK0 value is not valid
172 // but there are two values that are set:
173 // TimsFrameInvKoBegin and TimsFrameInvKoEnd.
174 // See TimsFramesMsRunReader::readSpectrumCollection2.
175
176 // Test one of these values as a sanity check. But
177 // give the value of -1 for the ion mobility because we do not
178 // have ion mobility data here.
179
180 ion_mobility_variant_value = mass_spectrum_csp->getParameterValue(
182
183 if(!ion_mobility_variant_value.isValid())
184 {
185 qFatal(
186 "The data are Bruker timsTOF data but failed to get correct "
187 "ion mobility data. Inconsistency found.");
188 }
189 }
190 }
191 else
192 {
193 ion_mobility_value = mass_spectrum_csp->getDtInMilliSeconds();
194 }
195
196 if(ion_mobility_value != -1)
197 documentNodeInDtRtMap(ion_mobility_value, new_node_p, DataKind::dt);
198
200
201 // qDebug() << "New index/node map:"
202 //<< mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex() << "/"
203 //<< new_node_p;
204
205 return new_node_p;
206}
207
208
209const std::map<std::size_t, MsRunDataSetTreeNode *> &
214
215
216std::size_t
218{
219 // We have a node and we want to get the matching mass spectrum index.
220
221 if(node == nullptr)
222 throw("Cannot be that the node pointer is nullptr");
223
224 std::map<std::size_t, MsRunDataSetTreeNode *>::const_iterator iterator =
225 std::find_if(
226 m_indexNodeMap.begin(),
227 m_indexNodeMap.end(),
228 [node](const std::pair<std::size_t, MsRunDataSetTreeNode *> pair) {
229 return pair.second == node;
230 });
231
232 if(iterator != m_indexNodeMap.end())
233 return iterator->first;
234
235 return std::numeric_limits<std::size_t>::max();
236}
237
238
239std::size_t
241 QualifiedMassSpectrumCstSPtr qualified_mass_spectrum_csp) const
242{
243 MsRunDataSetTreeNode *node_p = findNode(qualified_mass_spectrum_csp);
244
245 return massSpectrumIndex(node_p);
246}
247
248
249const std::vector<MsRunDataSetTreeNode *> &
251{
252 return m_rootNodes;
253}
254
255
256void
258{
259 // qDebug() << "Going to call node->accept(visitor) for each root node.";
260
261 for(auto &&node : m_rootNodes)
262 {
263 // qDebug() << "Calling accept for root node:" << node;
264
265 if(visitor.shouldStop())
266 break;
267
268 node->accept(visitor);
269 }
270}
271
272
273void
276 std::vector<MsRunDataSetTreeNode *>::const_iterator nodes_begin_iterator,
277 std::vector<MsRunDataSetTreeNode *>::const_iterator nodes_end_iterator)
278{
279 // qDebug() << "Visitor:" << &visitor << "The distance is between iterators
280 // is:"
281 //<< std::distance(nodes_begin_iterator, nodes_end_iterator);
282
283 using Iterator = std::vector<MsRunDataSetTreeNode *>::const_iterator;
284
285 Iterator iter = nodes_begin_iterator;
286
287 // Inform the visitor of the number of nodes to work on.
288
289 std::size_t node_count =
290 std::distance(nodes_begin_iterator, nodes_end_iterator);
291
292 visitor.setNodesToProcessCount(node_count);
293
294 while(iter != nodes_end_iterator)
295 {
296 // qDebug() << "Visitor:" << &visitor
297 //<< "The distance is between iterators is:"
298 //<< std::distance(nodes_begin_iterator, nodes_end_iterator);
299
300 // qDebug() << "Node visited:" << (*iter)->toString();
301
302 if(visitor.shouldStop())
303 break;
304
305 (*iter)->accept(visitor);
306 ++iter;
307 }
308}
309
310
313{
314 // qDebug();
315
316 for(auto &node : m_rootNodes)
317 {
318 // qDebug() << "In one node of the root nodes.";
319
320 MsRunDataSetTreeNode *iterNode = node->findNode(mass_spectrum_csp);
321 if(iterNode != nullptr)
322 return iterNode;
323 }
324
325 return nullptr;
326}
327
328
330MsRunDataSetTree::findNode(std::size_t spectrum_index) const
331{
332 // qDebug();
333
334 for(auto &node : m_rootNodes)
335 {
336 // qDebug() << "In one node of the root nodes.";
337
338 MsRunDataSetTreeNode *iterNode = node->findNode(spectrum_index);
339 if(iterNode != nullptr)
340 return iterNode;
341 }
342
343 return nullptr;
344}
345
346
347std::vector<MsRunDataSetTreeNode *>
349{
350 // We want to push back all the nodes of the tree in a flat vector of nodes.
351
352 std::vector<MsRunDataSetTreeNode *> nodes;
353
354 for(auto &&node : m_rootNodes)
355 {
356 // The node will store itself and all of its children.
357 node->flattenedView(nodes, true /* with_descendants */);
358 }
359
360 return nodes;
361}
362
363
364std::vector<MsRunDataSetTreeNode *>
366 bool with_descendants)
367{
368 std::vector<MsRunDataSetTreeNode *> nodes;
369
370 // Logically, ms_level cannot be 0.
371
372 if(!ms_level)
373 {
375 "msrundatasettree.cpp -- ERROR the MS level cannot be 0.");
376
377 return nodes;
378 }
379
380 // The depth of the tree at which we are right at this point is 0, we have not
381 // gone into the children yet.
382
383 std::size_t depth = 0;
384
385 // If ms_level is 1, then that means that we want the nodes starting right at
386 // the root nodes with or without the descendants.
387
388 // std::cout << __FILE__ << " @ " << __LINE__ << " " << __FUNCTION__ << " () "
389 //<< "ms_level: " << ms_level << " depth: " << depth << std::endl;
390
391 if(ms_level == 1)
392 {
393 for(auto &&node : m_rootNodes)
394 {
395 // std::cout << __FILE__ << " @ " << __LINE__ << " " << __FUNCTION__
396 //<< " () "
397 //<< "Handling one of the root nodes at ms_level = 1."
398 //<< std::endl;
399
400 node->flattenedView(nodes, with_descendants);
401 }
402
403 return nodes;
404 }
405
406 // At this point, we know that we want the descendants of the root nodes since
407 // we want ms_level > 1, so we need go to to the children of the root nodes.
408
409 // Let depth to 0, because if we go to the children of the root nodes we will
410 // still be at depth 0, that is MS level 1.
411
412 for(auto &node : m_rootNodes)
413 {
414 // std::cout
415 //<< __FILE__ << " @ " << __LINE__ << " " << __FUNCTION__ << " () "
416 //<< std::setprecision(15)
417 //<< "Requesting a flattened view of the root's child nodes with depth: "
418 //<< depth << std::endl;
419
420 node->flattenedViewMsLevelNodes(ms_level, depth, nodes, with_descendants);
421 }
422
423 return nodes;
424}
425
426
429 std::size_t product_spectrum_index)
430{
431
432 // qDebug();
433
434 // Find the node that holds the mass spectrum that was acquired as the
435 // precursor that when fragmented gave a spectrum at spectrum_index;
436
437 // Get the node that contains the product_spectrum_index first.
438 MsRunDataSetTreeNode *node = nullptr;
439 node = findNode(product_spectrum_index);
440
441 // Now get the node that contains the precursor_spectrum_index.
442
443 return findNode(node->mcsp_massSpectrum->getPrecursorSpectrumIndex());
444}
445
446
447std::vector<MsRunDataSetTreeNode *>
449 std::size_t precursor_spectrum_index)
450{
451 std::vector<MsRunDataSetTreeNode *> nodes;
452
453 // First get the node of the precursor spectrum index.
454
455 MsRunDataSetTreeNode *precursor_node = findNode(precursor_spectrum_index);
456
457 if(precursor_node == nullptr)
458 return nodes;
459
460 nodes.assign(precursor_node->m_children.begin(),
461 precursor_node->m_children.end());
462
463 return nodes;
464}
465
466
467std::vector<MsRunDataSetTreeNode *>
469 PrecisionPtr precision_ptr)
470{
471
472 // Find all the precursor nodes holding a mass spectrum that contained a
473 // precursor mz-value.
474
475 if(precision_ptr == nullptr)
477 "msrundatasettree.cpp -- ERROR precision_ptr cannot be nullptr.");
478
479 std::vector<MsRunDataSetTreeNode *> product_nodes;
480
481 // As a first step, find all the nodes that hold a mass spectrum that was
482 // acquired as a fragmentation spectrum of an ion of mz, that is, search all
483 // the product ion nodes for which precursor was mz.
484
485 for(auto &&node : m_rootNodes)
486 {
487 node->productNodesByPrecursorMz(mz, precision_ptr, product_nodes);
488 }
489
490 // Now, for each node found get the precursor node
491
492 std::vector<MsRunDataSetTreeNode *> precursor_nodes;
493
494 for(auto &&node : product_nodes)
495 {
496 precursor_nodes.push_back(
497 findNode(node->mcsp_massSpectrum->getPrecursorSpectrumIndex()));
498 }
499
500 return precursor_nodes;
501}
502
503
504bool
506 MsRunDataSetTreeNode *node_p,
507 DataKind data_kind)
508{
509 // qDebug();
510
511 using NodeVector = std::vector<MsRunDataSetTreeNode *>;
512 using DoubleNodeVectorMap = std::map<double, NodeVector>;
513 using MapPair = std::pair<double, NodeVector>;
514 using MapIterator = DoubleNodeVectorMap::iterator;
515
516 DoubleNodeVectorMap *map_p;
517
518 if(data_kind == DataKind::rt)
519 {
521 }
522 else if(data_kind == DataKind::dt)
523 {
525 }
526 else
527 qFatal("Programming error.");
528
529 // There are two possibilities:
530 //
531 // 1. The time was never encountered yet. We won't find it. We need to
532 // allocate a vector of Node's and set it associated to time in the map.
533 //
534 // 2. The time was encountered already, we will find it in the maps, we'll
535 // just push_back the Node in the vector of nodes.
536
537 MapIterator found_iterator = map_p->find(time);
538
539 if(found_iterator != map_p->end())
540 {
541 // The time value was encountered already.
542
543 found_iterator->second.push_back(node_p);
544
545 // qDebug() << "Found iterator for time:" << time;
546 }
547 else
548 {
549 // We need to create a new vector with the node.
550
551 NodeVector node_vector = {node_p};
552
553 map_p->insert(MapPair(time, node_vector));
554
555 // qDebug() << "Inserted new time:node_vector pair.";
556 }
557
558 return true;
559}
560
561
564 QualifiedMassSpectrumCstSPtr mass_spectrum_csp,
565 MsRunDataSetTreeNode *parent_p)
566{
567 // qDebug();
568
569 // We want to add a mass spectrum. Either the parent_p argument is nullptr or
570 // not. If it is nullptr, then we just append the mass spectrum to the vector
571 // of root nodes. If it is not nullptr, we need to append the mass spectrum to
572 // that node.
573
574 MsRunDataSetTreeNode *new_node_p =
575 new MsRunDataSetTreeNode(mass_spectrum_csp, parent_p);
576
577 if(parent_p == nullptr)
578 {
579 m_rootNodes.push_back(new_node_p);
580
581 // qDebug() << "Pushed back" << new_node << "to root nodes:" <<
582 // &m_rootNodes;
583 }
584 else
585 {
586 parent_p->m_children.push_back(new_node_p);
587
588 // qDebug() << "Pushed back" << new_node << "with parent:" << parent_p;
589 }
590
592
593 // And now document that addition in the node index map.
594 m_indexNodeMap.insert(std::pair<std::size_t, MsRunDataSetTreeNode *>(
595 mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex(), new_node_p));
596
597 // We also want to document the new node relating to the
598 // retention time.
599
601 mass_spectrum_csp->getRtInMinutes(), new_node_p, DataKind::rt);
602
603 // Likewise for the ion mobility time.
604
605 double ion_mobility_value = -1;
606
607 MsDataFormat file_format = mcsp_msRunId->getMsDataFormat();
608 bool ok = false;
609
610 if(file_format == MsDataFormat::brukerTims)
611 {
612 // Start by looking if there is a OneOverK0 valid value, which
613 // means we have effectively handled a genuine mobility scan spectrum.
614 QVariant ion_mobility_variant_value =
615 mass_spectrum_csp->getParameterValue(
617
618 if(ion_mobility_variant_value.isValid())
619 {
620 // Yes, genuine ion mobility scan handled here.
621
622 ion_mobility_value = ion_mobility_variant_value.toDouble(&ok);
623
624 if(!ok)
625 {
626 qFatal(
627 "The data are Bruker timsTOF data but failed to convert valid "
628 "QVariant 1/K0 value to double.");
629 }
630 }
631 else
632 {
633 // We are not handling a genuine single ion mobility scan here.
634 // We must be handling a mass spectrum that correspond to
635 // the combination of all the ion mobility scans for a single
636 // frame. This is when the user asks that ion mobility scans
637 // be flattended. In this case, the OneOverK0 value is not valid
638 // but there are two values that are set:
639 // TimsFrameInvKoBegin and TimsFrameInvKoEnd.
640 // See TimsFramesMsRunReader::readSpectrumCollection2.
641
642 // Test one of these values as a sanity check. But
643 // give the value of -1 for the ion mobility because we do not
644 // have ion mobility data here.
645
646 ion_mobility_variant_value = mass_spectrum_csp->getParameterValue(
648
649 if(!ion_mobility_variant_value.isValid())
650 {
651 qFatal(
652 "The data are Bruker timsTOF data but failed to get correct "
653 "ion mobility data. Inconsistency found.");
654 }
655 }
656 }
657 else
658 {
659 ion_mobility_value = mass_spectrum_csp->getDtInMilliSeconds();
660 }
661
662 if(ion_mobility_value != -1)
663 documentNodeInDtRtMap(ion_mobility_value, new_node_p, DataKind::dt);
664
665 // qDebug() << "New index/node map:"
666 //<< mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex() << "/"
667 //<< new_node;
668
669 return new_node_p;
670}
671
672
675 QualifiedMassSpectrumCstSPtr mass_spectrum_csp,
676 std::size_t precursor_spectrum_index)
677{
678 // qDebug();
679
680 // First get the node containing the mass spectrum that was acquired at index
681 // precursor_spectrum_index.
682
683 // qDebug() << "Need to find the precursor's mass spectrum node for precursor
684 // "
685 //"spectrum index:"
686 //<< precursor_spectrum_index;
687
688 MsRunDataSetTreeNode *mass_spec_data_node_p =
689 findNode(precursor_spectrum_index);
690
691 // qDebug() << "Found node" << mass_spec_data_node_p
692 //<< "for precursor index:" << precursor_spectrum_index;
693
694 if(mass_spec_data_node_p == nullptr)
695 {
697 "msrundatasettree.cpp -- ERROR could not find a a "
698 "tree node matching the index.");
699 }
700
701 // qDebug() << "Calling addMassSpectrum with parent node:"
702 //<< mass_spec_data_node_p;
703
704 return addMassSpectrum(mass_spectrum_csp, mass_spec_data_node_p);
705}
706
707
708std::size_t
710 double end,
711 NodeVector &nodes,
712 DataKind data_kind) const
713{
714 using NodeVector = std::vector<MsRunDataSetTreeNode *>;
715 using DoubleNodeVectorMap = std::map<double, NodeVector>;
716 using MapIterator = DoubleNodeVectorMap::const_iterator;
717
718 const DoubleNodeVectorMap *map_p;
719
720 if(data_kind == DataKind::rt)
721 {
723 }
724 else if(data_kind == DataKind::dt)
725 {
727 }
728 else
729 qFatal("Programming error.");
730
731 std::size_t added_nodes = 0;
732
733 // Get the iterator to the map item that has the key greater or equal to
734 // start.
735
736 MapIterator start_iterator = map_p->lower_bound(start);
737
738 if(start_iterator == map_p->end())
739 return 0;
740
741 // Now get the end of the map useful range of items.
742
743 MapIterator end_iterator = map_p->upper_bound(end);
744
745 // Now that we have the iterator range, iterate in it and get the mass spectra
746 // from each item's pair.second node vector.
747
748 for(MapIterator iterator = start_iterator; iterator != end_iterator;
749 ++iterator)
750 {
751 // We are iterating in MapPair items.
752
753 NodeVector node_vector = iterator->second;
754
755 // All the nodes in the node vector need to be copied to the mass_spectra
756 // vector passed as parameter.
757
758 for(auto &&node_p : node_vector)
759 {
760 nodes.push_back(node_p);
761
762 ++added_nodes;
763 }
764 }
765
766 return added_nodes;
767}
768
769
770std::size_t
772 double start, double end, NodeVector &nodes, DataKind data_kind) const
773{
774 using NodeVector = std::vector<MsRunDataSetTreeNode *>;
775 using NodeVectorIterator = NodeVector::iterator;
776
777 using DoubleNodeVectorMap = std::map<double, NodeVector>;
778 using MapIterator = DoubleNodeVectorMap::const_iterator;
779
780 const DoubleNodeVectorMap *map_p;
781
782 if(data_kind == DataKind::rt)
783 {
785 }
786 else if(data_kind == DataKind::dt)
787 {
789 }
790 else
791 qFatal("Programming error.");
792
793 std::size_t removed_vector_items = 0;
794
795 // We want to remove from the nodes vector all the nodes that contain a mass
796 // spectrum acquired at a time range outside of [ start-end ], that is, the
797 // time values [begin() - start [ and ]end -- end()[.
798
799 // Get the iterator to the map item that has the key less to
800 // start (we want to keep the map item having key == start).
801
802 MapIterator first_end_iterator = (*map_p).upper_bound(start);
803
804 // Now that we have the first_end_iterator, we can iterate between [begin --
805 // first_end_iterator[
806
807 for(MapIterator iterator = map_p->begin(); iterator != first_end_iterator;
808 ++iterator)
809 {
810 // Remove from the nodes vector the nodes.
811
812 // We are iterating in MapPair items.
813
814 NodeVector node_vector = iterator->second;
815
816 // All the nodes in the node vector need to be removed from the
817 // mass_spectra vector passed as parameter if found.
818
819 for(auto &&node_p : node_vector)
820 {
821 NodeVectorIterator iterator =
822 std::find(nodes.begin(), nodes.end(), node_p);
823
824 if(iterator != nodes.end())
825 {
826 // We found the node: remove it.
827
828 nodes.erase(iterator);
829
830 ++removed_vector_items;
831 }
832 }
833 }
834
835 // Now the second begin iterator, so that we can remove all the items
836 // contained in the second range, that is, ]end--end()[.
837
838 // The second_first_iterator will point to the item having its time value less
839 // or equal to end. But we do not want to get items having their time equal to
840 // end, only < end. So, if the iterator is not begin(), we just need to
841 // decrement it once.
842 MapIterator second_first_iterator = map_p->upper_bound(end);
843 if(second_first_iterator != map_p->begin())
844 --second_first_iterator;
845
846 for(MapIterator iterator = second_first_iterator; iterator != map_p->end();
847 ++iterator)
848 {
849 // We are iterating in MapPair items.
850
851 NodeVector node_vector = iterator->second;
852
853 // All the nodes in the node vector need to be removed from the
854 // mass_spectra vector passed as parameter if found.
855
856 for(auto &&node_p : node_vector)
857 {
858 NodeVectorIterator iterator =
859 std::find(nodes.begin(), nodes.end(), node_p);
860
861 if(iterator != nodes.end())
862 {
863 // We found the node: remove it.
864
865 nodes.erase(iterator);
866
867 ++removed_vector_items;
868 }
869 }
870 }
871
872 return removed_vector_items;
873}
874
875
876std::size_t
878 double start,
879 double end,
880 QualMassSpectraVector &mass_spectra,
881 DataKind data_kind) const
882{
883 // qDebug() << "With start:" << start << "and end:" << end;
884
885 if(start == end)
886 qDebug() << "Special case, start and end are equal:" << start;
887
888 // We will use the maps that relate rt | dt to a vector of data tree nodes.
889 // Indeed, we may have more than one mass spectrum acquired for a given rt, in
890 // case of ion mobility mass spectrometry. Same for dt: we will have as many
891 // spectra for each dt as there are retention time values...
892
893 using DoubleNodeVectorMap = std::map<double, NodeVector>;
894 using MapIterator = DoubleNodeVectorMap::const_iterator;
895
896 const DoubleNodeVectorMap *map_p;
897
898 if(data_kind == DataKind::rt)
899 {
901
902 // qDebug() << "The RT map has size:" << map_p->size() << "start:" <<
903 // start
904 //<< "end:" << end;
905 }
906 else if(data_kind == DataKind::dt)
907 {
909
910 // qDebug() << "The DT map has size:" << map_p->size() << "start:" <<
911 // start
912 //<< "end:" << end;
913 }
914 else
915 qFatal("Programming error.");
916
917 // qDebug() << "The rt |dt / mass spectra map has size:" << map_p->size()
918 //<< "The start:" << start << "the end:" << end;
919
920 std::size_t added_mass_spectra = 0;
921
922 // Get the iterator to the map item that has the key greater or equal to
923 // start.
924
925 MapIterator start_iterator = map_p->lower_bound(start);
926
927 if(start_iterator == map_p->end())
928 {
929 qDebug() << "The start iterator is end()!";
930 return 0;
931 }
932
933 // qDebug() << "The start_iterator points to:" << start_iterator->first
934 //<< "as a rt|dt time.";
935
936 // Now get the end of the map's useful range of items.
937
938 // Returns an iterator pointing to the first element in the container whose
939 // key is considered to go after 'end'.
940
941 MapIterator end_iterator = map_p->upper_bound(end);
942
943 // Immediately verify if there is no distance between start and end.
944 if(!std::distance(start_iterator, end_iterator))
945 {
946 qDebug() << "No range of mass spectra could be selected.";
947 return 0;
948 }
949
950 if(end_iterator == map_p->end())
951 {
952 // qDebug() << "The end_iterator points to the end of the map."
953 //<< "The last map item is prev() at key value: "
954 //<< std::prev(end_iterator)->first;
955 }
956 else
957 {
958 // qDebug() << "The end_iterator points to:" << end_iterator->first
959 //<< "as a rt|dt time and the accounted key value is actually"
960 //<< std::prev(end_iterator)->first;
961 }
962
963 // qDebug() << "The number of time values to iterate through:"
964 //<< std::distance(start_iterator, end_iterator)
965 //<< "with values: start: " << start_iterator->first
966 //<< "and end: " << std::prev(end_iterator)->first;
967
968 // Now that we have the iterator range, iterate in it and get the mass
969 // spectra from each item's pair.second node vector.
970
971 for(MapIterator iterator = start_iterator; iterator != end_iterator;
972 ++iterator)
973 {
974 // We are iterating in DoubleNodeVectorMap MapPair items,
975 // with double = rt or dt and NodeVector being just that.
976
977 NodeVector node_vector = iterator->second;
978
979 // All the nodes' mass spectra in the node vector need to be copied to
980 // the mass_spectra vector passed as parameter.
981
982 for(auto &&node_p : node_vector)
983 {
984 QualifiedMassSpectrumCstSPtr qualified_mass_spectrum_csp =
985 node_p->getQualifiedMassSpectrum();
986
987#if 0
988 // Sanity check only for deep debugging.
989
990 if(qualified_mass_spectrum_csp == nullptr ||
991 qualified_mass_spectrum_csp.get() == nullptr)
992 {
994 "The QualifiedMassSpectrumCstSPtr cannot be nullptr.");
995 }
996 else
997 {
998 //qDebug() << "Current mass spectrum is valid with rt:"
999 //<< qualified_mass_spectrum_csp->getRtInMinutes();
1000 }
1001#endif
1002
1003 mass_spectra.push_back(qualified_mass_spectrum_csp);
1004
1005 ++added_mass_spectra;
1006 }
1007 }
1008
1009 // qDebug() << "Returning added_mass_spectra:" << added_mass_spectra;
1010
1011 return added_mass_spectra;
1012}
1013
1014
1015std::size_t
1017 double start,
1018 double end,
1019 QualMassSpectraVector &mass_spectra,
1020 DataKind data_kind) const
1021{
1022 using QualMassSpectraVectorIterator = QualMassSpectraVector::iterator;
1023
1024 using DoubleNodeVectorMap = std::map<double, NodeVector>;
1025 using MapIterator = DoubleNodeVectorMap::const_iterator;
1026
1027 const DoubleNodeVectorMap *map_p;
1028
1029 if(data_kind == DataKind::rt)
1030 {
1031 map_p = &m_rtDoubleNodeVectorMap;
1032
1033 // qDebug() << "The RT map has size:" << map_p->size() << "start:" <<
1034 // start
1035 //<< "end:" << end;
1036 }
1037 else if(data_kind == DataKind::dt)
1038 {
1039 map_p = &m_dtDoubleNodeVectorMap;
1040
1041 // qDebug() << "The DT map has size:" << map_p->size() << "start:" <<
1042 // start
1043 //<< "end:" << end;
1044 }
1045 else
1046 qFatal("Programming error.");
1047
1048 std::size_t removed_vector_items = 0;
1049
1050 // We want to remove from the nodes vector all the nodes that contain a mass
1051 // spectrum acquired at a time range outside of [ start-end ], that is, the
1052 // time values [begin() - start [ and ]end -- end()[.
1053
1054 // Looking for an iterator that points to an item having a time < start.
1055
1056 // lower_bound returns an iterator pointing to the first element in the
1057 // range [first, last) that is not less than (i.e. greater or equal to)
1058 // value, or last if no such element is found.
1059
1060 MapIterator first_end_iterator = (*map_p).lower_bound(start);
1061
1062 // first_end_iterator points to the item that has the next time value with
1063 // respect to start. This is fine because we'll not remove that point
1064 // because the for loop below will stop one item short of
1065 // first_end_iterator. That means that we effectively remove all the items
1066 // [begin() -> start[ (start not include). Exactly what we want.
1067
1068 // qDebug() << "lower_bound for start:" << first_end_iterator->first;
1069
1070 // Now that we have the first_end_iterator, we can iterate between [begin --
1071 // first_end_iterator[
1072
1073 for(MapIterator iterator = map_p->begin(); iterator != first_end_iterator;
1074 ++iterator)
1075 {
1076 // Remove from the nodes vector the nodes.
1077
1078 // We are iterating in MapPair items.
1079
1080 NodeVector node_vector = iterator->second;
1081
1082 // All the nodes in the node vector need to be removed from the
1083 // mass_spectra vector passed as parameter if found.
1084
1085 for(auto &&node_p : node_vector)
1086 {
1087 QualMassSpectraVectorIterator iterator =
1088 std::find(mass_spectra.begin(),
1089 mass_spectra.end(),
1090 node_p->getQualifiedMassSpectrum());
1091
1092 if(iterator != mass_spectra.end())
1093 {
1094 // We found the mass spectrum: remove it.
1095
1096 mass_spectra.erase(iterator);
1097
1098 ++removed_vector_items;
1099 }
1100 }
1101 }
1102
1103 // Now the second begin iterator, so that we can remove all the items
1104 // contained in the second range, that is, ]end--end()[.
1105
1106 // The second_first_iterator will point to the item having its time value
1107 // less or equal to end. But we do not want to get items having their time
1108 // equal to end, only < end. So, if the iterator is not begin(), we just
1109 // need to decrement it once.
1110
1111 MapIterator second_first_iterator = map_p->upper_bound(end);
1112
1113 // second_first_iterator now points to the item after the one having time
1114 // end. Which is exactly what we want: we want to remove ]end--end()[ and
1115 // this is exactly what the loop starting a the point after end below.
1116
1117 // qDebug() << "second_first_iterator for end:" <<
1118 // second_first_iterator->first;
1119
1120 for(MapIterator iterator = second_first_iterator; iterator != map_p->end();
1121 ++iterator)
1122 {
1123 // We are iterating in MapPair items.
1124
1125 NodeVector node_vector = iterator->second;
1126
1127 // All the nodes in the node vector need to be removed from the
1128 // mass_spectra vector passed as parameter if found.
1129
1130 for(auto &&node_p : node_vector)
1131 {
1132 QualMassSpectraVectorIterator iterator =
1133 std::find(mass_spectra.begin(),
1134 mass_spectra.end(),
1135 node_p->getQualifiedMassSpectrum());
1136
1137 if(iterator != mass_spectra.end())
1138 {
1139 // We found the node: remove it.
1140
1141 mass_spectra.erase(iterator);
1142
1143 ++removed_vector_items;
1144 }
1145 }
1146 }
1147
1148 return removed_vector_items;
1149}
1150
1151
1152std::size_t
1154{
1155 // We want to know what is the depth of the tree, that is the highest level
1156 // of MSn, that is, n.
1157
1158 if(!m_rootNodes.size())
1159 return 0;
1160
1161 // qDebug() << "There are" << m_rootNodes.size() << "root nodes";
1162
1163 // By essence, we are at MS0: only if we have at least one root node do we
1164 // know we have MS1 data. So we already know that we have at least one
1165 // child, so start with depth 1.
1166
1167 std::size_t depth = 1;
1168 std::size_t tmp_depth = 0;
1169 std::size_t greatest_depth = 0;
1170
1171 for(auto &node : m_rootNodes)
1172 {
1173 tmp_depth = node->depth(depth);
1174
1175 // qDebug() << "Returned depth:" << tmp_depth;
1176
1177 if(tmp_depth > greatest_depth)
1178 greatest_depth = tmp_depth;
1179 }
1180
1181 return greatest_depth;
1182}
1183
1184
1185std::size_t
1187{
1188
1189 std::size_t cumulative_node_count = 0;
1190
1191 for(auto &node : m_rootNodes)
1192 {
1193 node->size(cumulative_node_count);
1194
1195 // qDebug() << "Returned node_count:" << node_count;
1196 }
1197
1198 return cumulative_node_count;
1199}
1200
1201
1202std::size_t
1204{
1205 return m_indexNodeMap.size();
1206}
1207
1208
1209std::size_t
1214
1215
1216} // namespace pappso
virtual void setNodesToProcessCount(std::size_t)=0
QualifiedMassSpectrumCstSPtr mcsp_massSpectrum
MsRunDataSetTreeNode * findNode(std::size_t spectrum_index)
std::vector< MsRunDataSetTreeNode * > m_children
MsRunDataSetTreeNode * findNode(QualifiedMassSpectrumCstSPtr mass_spectrum_csp) const
const std::vector< MsRunDataSetTreeNode * > & getRootNodes() const
std::vector< QualifiedMassSpectrumCstSPtr > QualMassSpectraVector
std::vector< MsRunDataSetTreeNode * > flattenedViewMsLevel(std::size_t ms_level, bool with_descendants=false)
std::size_t indexNodeMapSize() const
void accept(MsRunDataSetTreeNodeVisitorInterface &visitor)
MsRunDataSetTree(MsRunIdCstSPtr ms_run_id_csp)
bool documentNodeInDtRtMap(double time, MsRunDataSetTreeNode *node_p, DataKind data_kind)
std::vector< MsRunDataSetTreeNode * > flattenedView()
std::size_t getSpectrumCount() const
std::map< std::size_t, MsRunDataSetTreeNode * > m_indexNodeMap
std::vector< MsRunDataSetTreeNode * > m_rootNodes
std::size_t addDataSetTreeNodesInsideDtRtRange(double start, double end, NodeVector &nodes, DataKind data_kind) const
std::map< double, NodeVector > DoubleNodeVectorMap
std::size_t removeDataSetTreeNodesOutsideDtRtRange(double start, double end, NodeVector &nodes, DataKind data_kind) const
std::vector< MsRunDataSetTreeNode * > precursorNodesByPrecursorMz(pappso_double mz, PrecisionPtr precision_ptr)
std::vector< MsRunDataSetTreeNode * > NodeVector
std::size_t addDataSetQualMassSpectraInsideDtRtRange(double start, double end, QualMassSpectraVector &mass_spectra, DataKind data_kind) const
MsRunDataSetTreeNode * precursorNodeByProductSpectrumIndex(std::size_t product_spectrum_index)
const std::map< std::size_t, MsRunDataSetTreeNode * > & getIndexNodeMap() const
MsRunDataSetTreeNode * addMassSpectrum(QualifiedMassSpectrumCstSPtr mass_spectrum)
std::size_t removeDataSetQualMassSpectraOutsideDtRtRange(double start, double end, QualMassSpectraVector &mass_spectra, DataKind data_kind) const
std::size_t massSpectrumIndex(const MsRunDataSetTreeNode *node) const
DoubleNodeVectorMap m_rtDoubleNodeVectorMap
std::vector< MsRunDataSetTreeNode * > productNodesByPrecursorSpectrumIndex(std::size_t precursor_spectrum_index)
DoubleNodeVectorMap m_dtDoubleNodeVectorMap
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
MsDataFormat
Definition types.h:120
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition msrunid.h:46
double pappso_double
A type definition for doubles.
Definition types.h:50
std::shared_ptr< const QualifiedMassSpectrum > QualifiedMassSpectrumCstSPtr
@ IonMobOneOverK0Begin
1/K0 range's begin value
DataKind
Definition types.h:218
@ dt
Drift time.
@ rt
Retention time.