libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
spectree.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/spectree/spectree.cpp
3 * \date 11/12/2023
4 * \author Olivier Langella
5 * \brief Matthieu David's SpecTree structure
6 *
7 * C++ implementation of algorithm already described in :
8 * 1. David, M., Fertin, G., Rogniaux, H. & Tessier, D. SpecOMS: A Full Open
9 * Modification Search Method Performing All-to-All Spectra Comparisons within
10 * Minutes. J. Proteome Res. 16, 3030–3038 (2017).
11 *
12 * https://www.theses.fr/2019NANT4092
13 */
14
15
16/*
17 * SpecTree
18 * Copyright (C) 2023 Olivier Langella
19 * <olivier.langella@universite-paris-saclay.fr>
20 *
21 * This program is free software: you can redistribute ipetide to spectrum
22 * alignmentt and/or modify it under the terms of the GNU General Public License
23 * as published by the Free Software Foundation, either version 3 of the
24 * License, or (at your option) any later version.
25 *
26 * This program is distributed in the hope that it will be useful,
27 * but WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 * GNU General Public License for more details.
30 *
31 * You should have received a copy of the GNU General Public License
32 * along with this program. If not, see <http://www.gnu.org/licenses/>.
33 *
34 */
35
36#include "spectree.h"
37#include <QDebug>
38#include <QObject>
41
42
43namespace pappso
44{
45namespace spectree
46{
47
48SpecTree::SpecTree(const BucketClustering &bucket_clustering)
49{
50
51
52 std::vector<Bucket> bins = bucket_clustering.asSortedList();
53 SpecTreeNode node;
54 node.value = bins[0].getCartList().at(0);
55 node.count = 1;
56 qDebug() << node.value;
57 // Specific processing for the first bucket : the first node and all its
58 // children are added in SpecTrees and the transversal accessor is updated
59 // accordingly
60
61
62 std::vector<std::size_t> spectrumLastNodeIndex;
63 spectrumLastNodeIndex.resize(bucket_clustering.getItemCartCount(),
65 m_spectrumFirstNodeIndex.resize(bucket_clustering.getItemCartCount(),
67
68 m_spectrumFirstNodeIndex[node.value] = m_nodeList.size() - 1;
69
70
71 m_nodeList.push_back(node);
72 manageSideAccess(spectrumLastNodeIndex);
73 for(std::size_t cpt = 1; cpt < bins[0].size(); ++cpt)
74 {
75 node.parentIndex = m_nodeList.size() - 1;
77 node.value = bins[0].getCartList().at(cpt);
78 node.count = 1;
79 m_nodeList.push_back(node);
80 manageSideAccess(spectrumLastNodeIndex);
81 }
82
83
84 qDebug();
85 // For the rest of the buckets, we increment the node counter in prefix
86 // common parts without creating new nodes. Once out of the prefix common
87 // part, new nodes are created similarily as they were for the first bucket.
88 for(std::size_t bins_index = 1; bins_index < bins.size(); ++bins_index)
89 {
90 qDebug() << "bins_index=" << bins_index;
91 std::size_t pos = 0;
92 bool common = false;
93 std::size_t parentNode = index_not_defined;
94
95 std::size_t previous_bin_size = bins[bins_index - 1].size();
96
97 // detection of a common first element
98 if(pos < previous_bin_size)
99 {
100 if(bins[bins_index].getCartList().at(pos) ==
101 bins[bins_index - 1].getCartList().at(pos))
102 {
103 common = true;
104 }
105 }
106 // insertion procedure for a common prefix part insertion : we go
107 // through existing nodes and increment their counter value by one
108 while(common)
109 {
110 qDebug() << "common pos=" << pos;
111 std::size_t spectrum_index = bins[bins_index].getCartList().at(pos);
112 qDebug() << "common spectrum_index=" << spectrum_index;
113 std::size_t tempNodeIndex = spectrumLastNodeIndex[spectrum_index];
114 qDebug() << "common tempNodeIndex=" << tempNodeIndex;
115 m_nodeList[tempNodeIndex].count++;
116 // checking on the prefix common part persistence conditions ; if
117 // not valid, we are not in a common prefix part anymore for the
118 // currently inserted bucket
119 if((previous_bin_size - 1 == pos) ||
120 (bins[bins_index].size() - 1 == pos))
121 {
122 qDebug() << "last one";
123 common = false;
124 }
125 else
126 {
127 qDebug() << "bins[bins_index - 1].getSpectrumIndex(pos + 1)="
128 << bins[bins_index - 1].getCartList().at(pos + 1);
129 qDebug() << "bins[bins_index].getSpectrumIndex(pos + 1)="
130 << bins[bins_index].getCartList().at(pos + 1);
131 if(bins[bins_index - 1].getCartList().at(pos + 1) !=
132 bins[bins_index].getCartList().at(pos + 1))
133 {
134 common = false;
135 }
136 }
137 parentNode = tempNodeIndex;
138 ++pos;
139
140 qDebug() << "common=" << common;
141 }
142 qDebug();
143 // insertion procedure for all the remaining nodes not in the prefix
144 // common part : we create new nodes with a counter value of 1 and
145 // update the transversal accessor and last inserted node list
146 while(pos < bins[bins_index].size())
147 {
148 std::size_t spectrum_index = bins[bins_index].getCartList().at(pos);
149
150 node.parentIndex = parentNode;
152 node.value = spectrum_index;
153 node.count = 1;
154 m_nodeList.push_back(node);
155 manageSideAccess(spectrumLastNodeIndex);
156 parentNode = m_nodeList.size() - 1;
157
158 ++pos;
159 }
160 }
161}
162
166
167void
169{
170 m_nodeList.push_back(node);
171}
172
173void
174SpecTree::manageSideAccess(std::vector<std::size_t> &spectrumLastNodeIndex)
175{
176 auto spectrum_index = m_nodeList.back().value;
177 auto node_position = m_nodeList.size() - 1;
178 if(m_spectrumFirstNodeIndex[spectrum_index] == index_not_defined)
179 {
180 // first occurence of this spectrum
181 m_spectrumFirstNodeIndex[spectrum_index] = node_position;
182 spectrumLastNodeIndex[spectrum_index] = node_position;
183 }
184 else
185 {
186 if(spectrumLastNodeIndex[spectrum_index] == node_position)
187 {
188 }
189 else
190 {
191 m_nodeList[spectrumLastNodeIndex[spectrum_index]].nextIndex =
192 node_position;
193 spectrumLastNodeIndex[spectrum_index] = node_position;
194 }
195 }
196}
197
198QString
200{
201 QString node_str("node|spectrum|parent|next|count|\n");
202 std::size_t i = 0;
203 for(auto node : m_nodeList)
204 {
205 int parent = -1;
206 if(node.parentIndex < index_not_defined)
207 parent = node.parentIndex;
208 int next = -1;
209 if(node.nextIndex < index_not_defined)
210 next = node.nextIndex;
211 node_str.append(QString("node_%1|%2|%3|%4|%5end\n")
212 .arg(i)
213 .arg(node.value)
214 .arg(parent)
215 .arg(next)
216 .arg(node.count));
217
218 i++;
219 }
220 return node_str;
221}
222
223const std::vector<std::size_t> &
228
229void
231 SpecXtractInterface &spec_xtract,
232 std::size_t minimum_count,
233 std::size_t cart_id_range_max,
234 std::size_t cart_id_range_min,
235 std::size_t target_cart_id_max,
236 std::size_t target_cart_id_min) const
237{
238
239 if(cart_id_range_max < cart_id_range_min)
240 {
241 std::swap(cart_id_range_max, cart_id_range_min);
242 }
243
244 if(cart_id_range_max < m_spectrumFirstNodeIndex.size())
245 {
246
247 monitor.setStatus(QObject::tr("starting SpecXtract operation"));
248
249 MapSimilarityCount map_count;
250 map_count.map_id_count.resize(cart_id_range_max);
251
252 // initialisation of the structure to store the compared pairs
253 // int[][] results = new int[m_count][3];
254 // int count = 0;
255
256 // iteration on the transversal accessor starting from the deepest spetrum
257 // and climbing up to limit
258 std::size_t spectra1 = cart_id_range_max;
259 monitor.setTotalSteps(cart_id_range_max - cart_id_range_min);
260 while(true)
261 {
262
263 if(monitor.shouldIstop())
264 {
266 QObject::tr("SpecXtract job stopped"));
267 }
268 qDebug() << "spectra1=" << spectra1;
269 spec_xtract.beginItemCartExtraction(spectra1);
270 monitor.count();
271 // System.out.println(start);
272
273 map_count.keys.clear();
274 map_count.aboveThreshold.clear();
275
277 minimum_count,
278 spectra1,
279 target_cart_id_max,
280 target_cart_id_min);
281
282 qDebug() << "spectra1=" << spectra1
283 << " map_count.aboveThreshold.size()="
284 << map_count.aboveThreshold.size()
285 << " map_count.keys.size()=" << map_count.keys.size();
286
287 for(std::size_t spectra2 : map_count.aboveThreshold)
288 {
289
290 // std::size_t spectra2 = *it;
291 qDebug() << "spectra2=" << spectra2;
292
293 // std::size_t total_extracted_count =
294 // map_count.map_id_count.at(spectra2);
295 spec_xtract.reportSimilarity(
296 spectra1, spectra2, map_count.map_id_count.at(spectra2).count);
297 }
298
299 spec_xtract.endItemCartExtraction(spectra1);
300 // monitor.count();
301 if(spectra1 == cart_id_range_min)
302 break;
303 spectra1--;
304 }
305 }
306 else
307 {
309 QObject::tr(
310 "item cart index %1 out of range (item cart id max size=%2)")
311 .arg(cart_id_range_max)
312 .arg(m_spectrumFirstNodeIndex.size()));
313 }
314 qDebug();
315}
316
317void
319 MapSimilarityCount &map_count,
320 std::size_t minimum_count,
321 std::size_t target_cart_id_max,
322 std::size_t target_cart_id_min) const
323{
324
325 std::size_t parent_id = start_node.parentIndex;
326
327 qDebug() << " start node spectrum=" << start_node.value
328 << " parent_id=" << parent_id << " init_count=" << start_node.count
329 << " minimum_count=" << minimum_count
330 << " target_cart_id_min=" << target_cart_id_min;
331
332 while(parent_id != index_not_defined)
333 {
334 // next_node :
335 const SpecTree::SpecTreeNode &current_node = m_nodeList[parent_id];
336
337 if(current_node.value < target_cart_id_min)
338 break;
339 if(current_node.value <= target_cart_id_max)
340 {
341 auto &map_id_count = map_count.map_id_count[current_node.value];
342 if(map_id_count.lastWitness == start_node.value)
343 {
344 map_id_count.count += start_node.count;
345 // if at one time, a similarity exceeds the threshold value for
346 // the first time, it is written into the proper stack
347 if((map_id_count.count >= minimum_count) &&
348 (map_id_count.aboveThreshold == false))
349 {
350 map_id_count.aboveThreshold = true;
351 map_count.aboveThreshold.push_back(current_node.value);
352 }
353 }
354 else
355 {
356 map_id_count.lastWitness = start_node.value;
357 map_id_count.count = start_node.count;
358 map_id_count.aboveThreshold = false;
359 // if at one time, a similarity exceeds the threshold value for
360 // the first time, it is written into the proper stack
361 if(map_id_count.count >= minimum_count)
362 {
363 map_id_count.aboveThreshold = true;
364 map_count.aboveThreshold.push_back(current_node.value);
365 }
366 map_count.keys.push_back(current_node.value);
367 }
368 }
369 parent_id = current_node.parentIndex;
370 }
371}
372
373std::size_t
375 const SpecTree::SpecTreeNode &start_node,
376 std::size_t spectrum_index_target) const
377{
378 std::size_t parent_id = start_node.parentIndex;
379
380 while(parent_id != index_not_defined)
381 {
382 // next_node :
383 const SpecTree::SpecTreeNode &current_node = m_nodeList[parent_id];
384
385 if(current_node.value < spectrum_index_target)
386 return 0;
387 if(current_node.value == spectrum_index_target)
388 return start_node.count;
389
390 parent_id = current_node.parentIndex;
391 }
392 return 0;
393}
394
395
396/**
397 * Extraction of the similarities above a given threshold between a given
398 * spectrum and all other spectra higher located in SpecTrees.
399 * @param tree_indices SpecTrees the data has to be extracted from
400 * @param spectrum_index The spectrum with ated in SpecTrees.
401 * @param tree_indices SpecTrees the data has to be extracted from
402 * @param spectrum_index The spectrum with which we want to extract similarities
403 * @param minimum_count The threshold to use for the extraction
404 * @since 0.1
405 */
406void
408 std::size_t minimum_count,
409 std::size_t spectrum_index,
410 std::size_t target_cart_id_max,
411 std::size_t target_cart_id_min) const
412{
413
414 qDebug() << " spectrum_index=" << spectrum_index;
415 // sOne;
416 // we position the current node at the first occurence of the spectrum in
417 // the transversal accessor and initialise variablethresholds accordingly
418 std::size_t node_index = m_spectrumFirstNodeIndex[spectrum_index];
419 while(node_index != index_not_defined)
420 {
421 qDebug() << "node_index=" << node_index;
422 const SpecTreeNode &current_node = m_nodeList[node_index];
423 // go back in the tree branch from the current_node
424 walkBackInBranchFromNode(current_node,
425 map_count,
426 minimum_count,
427 target_cart_id_max,
428 target_cart_id_min);
429 node_index = current_node.nextIndex;
430 }
431 qDebug();
432}
433
434std::size_t
436 std::size_t spectrum_b_index) const
437{
438 if(spectrum_a_index > spectrum_b_index)
439 std::swap(spectrum_a_index, spectrum_b_index);
440
441 std::size_t count_target = 0;
442 if(spectrum_b_index < m_spectrumFirstNodeIndex.size())
443 {
444 std::size_t node_index = m_spectrumFirstNodeIndex[spectrum_b_index];
445 while(node_index != index_not_defined)
446 {
447 qDebug() << "node_index=" << node_index;
448 const SpecTreeNode &current_node = m_nodeList[node_index];
449 // go back in the tree branch from the current_node
450 count_target +=
451 walkBackInBranchFromNodeToTarget(current_node, spectrum_a_index);
452 node_index = current_node.nextIndex;
453 }
454 }
455 else
456 {
458 QObject::tr(
459 "spectrum_index %1 out of range (spectrum_index max size=%2)")
460 .arg(spectrum_b_index)
461 .arg(m_spectrumFirstNodeIndex.size()));
462 }
463 return count_target;
464}
465} // namespace spectree
466} // namespace pappso
virtual void setStatus(const QString &status)=0
current status of the process
virtual void setTotalSteps(std::size_t total_number_of_steps)
use it if the number of steps is known in an algorithm the total number of steps is usefull to report...
virtual bool shouldIstop()=0
should the procces be stopped ? If true, then cancel process Use this function at strategic point of ...
virtual void count()=0
count steps report when a step is computed in an algorithm
std::vector< Bucket > asSortedList() const
std::vector< SpecTreeNode > m_nodeList
Definition spectree.h:172
std::size_t walkBackInBranchFromNodeToTarget(const SpecTree::SpecTreeNode &start_node, std::size_t spectrum_index_target) const
Definition spectree.cpp:374
const std::vector< std::size_t > & getSpectrumFirstNodeIndex() const
get the adress map of sepctrum index and their first node index
Definition spectree.cpp:224
static constexpr std::size_t index_not_defined
Definition spectree.h:118
void walkBackInBranchFromNode(const SpecTree::SpecTreeNode &start_node, MapSimilarityCount &map_count, std::size_t minimum_count, std::size_t target_cart_id_max, std::size_t target_cart_id_min) const
Definition spectree.cpp:318
std::vector< std::size_t > m_spectrumFirstNodeIndex
Definition spectree.h:173
QString toString() const
Definition spectree.cpp:199
std::size_t extractSpectrumPairSimilarityCount(std::size_t spectrum_a_index, std::size_t spectrum_b_index) const
get the number of common component for a pair of spectrum
Definition spectree.cpp:435
void manageSideAccess(std::vector< std::size_t > &spectrumLastNodeIndex)
Definition spectree.cpp:174
void addNewNode(const SpecTreeNode &node)
Definition spectree.cpp:168
void extractSpectrumSimilarityCount(MapSimilarityCount &map_count, std::size_t minimum_count, std::size_t spectrum_index, std::size_t target_cart_id_max, std::size_t target_cart_id_min) const
get a map of similarities for a given spectrum index
Definition spectree.cpp:407
SpecTree(const BucketClustering &bucket_clustering)
Definition spectree.cpp:48
void xtract(UiMonitorInterface &monitor, SpecXtractInterface &spec_xtract, std::size_t minimum_count, std::size_t cart_id_range_max, std::size_t cart_id_range_min, std::size_t target_cart_id_max, std::size_t target_cart_id_min) const
Definition spectree.cpp:230
yield similarities between pairs of ItemCart
virtual void beginItemCartExtraction(std::size_t cart_id_a)
virtual void reportSimilarity(std::size_t cart_id_a, std::size_t cart_id_b, std::size_t similarity)=0
virtual void endItemCartExtraction(std::size_t cart_id_a)
process interrupted exception
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
Matthieu David's SpecTree structure.
std::vector< MapSimilarityCountElement > map_id_count
Definition spectree.h:138
std::vector< std::size_t > aboveThreshold
Definition spectree.h:137