[ VIGRA Homepage | Function Index | Class Index | Namespaces | File List | Main Page ]

regression.hxx
1/************************************************************************/
2/* */
3/* Copyright 2008-2013 by Ullrich Koethe */
4/* */
5/* This file is part of the VIGRA computer vision library. */
6/* The VIGRA Website is */
7/* http://hci.iwr.uni-heidelberg.de/vigra/ */
8/* Please direct questions, bug reports, and contributions to */
9/* ullrich.koethe@iwr.uni-heidelberg.de or */
10/* vigra@informatik.uni-hamburg.de */
11/* */
12/* Permission is hereby granted, free of charge, to any person */
13/* obtaining a copy of this software and associated documentation */
14/* files (the "Software"), to deal in the Software without */
15/* restriction, including without limitation the rights to use, */
16/* copy, modify, merge, publish, distribute, sublicense, and/or */
17/* sell copies of the Software, and to permit persons to whom the */
18/* Software is furnished to do so, subject to the following */
19/* conditions: */
20/* */
21/* The above copyright notice and this permission notice shall be */
22/* included in all copies or substantial portions of the */
23/* Software. */
24/* */
25/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND */
26/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES */
27/* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND */
28/* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT */
29/* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, */
30/* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING */
31/* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR */
32/* OTHER DEALINGS IN THE SOFTWARE. */
33/* */
34/************************************************************************/
35
36
37#ifndef VIGRA_REGRESSION_HXX
38#define VIGRA_REGRESSION_HXX
39
40#include "matrix.hxx"
41#include "linear_solve.hxx"
42#include "singular_value_decomposition.hxx"
43#include "numerictraits.hxx"
44#include "functorexpression.hxx"
45#include "autodiff.hxx"
46
47
48namespace vigra
49{
50
51namespace linalg
52{
53
54/** \addtogroup Optimization Optimization and Regression
55 */
56//@{
57 /** Ordinary Least Squares Regression.
58
59 Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
60 and a column vector \a b of length <tt>m</tt> rows, this function computes
61 the column vector \a x of length <tt>n</tt> rows that solves the optimization problem
62
63 \f[ \tilde \textrm{\bf x} = \textrm{argmin}
64 \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
65 \f]
66
67 When \a b is a matrix with <tt>k</tt> columns, \a x must also have
68 <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
69 \a b. Note that all matrices must already have the correct shape.
70
71 This function is just another name for \ref linearSolve(), perhaps
72 leading to more readable code when \a A is a rectangular matrix. It returns
73 <tt>false</tt> when the rank of \a A is less than <tt>n</tt>.
74 See \ref linearSolve() for more documentation.
75
76 <b>\#include</b> <vigra/regression.hxx><br/>
77 Namespaces: vigra and vigra::linalg
78 */
79template <class T, class C1, class C2, class C3>
80inline bool
83 std::string method = "QR")
84{
85 return linearSolve(A, b, x, method);
86}
87
88 /** Weighted Least Squares Regression.
89
90 Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
91 a vector \a b of length <tt>m</tt>, and a weight vector \a weights of length <tt>m</tt>
92 with non-negative entries, this function computes the vector \a x of length <tt>n</tt>
93 that solves the optimization problem
94
95 \f[ \tilde \textrm{\bf x} = \textrm{argmin}
96 \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right)^T
97 \textrm{diag}(\textrm{\bf weights})
98 \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right)
99 \f]
100
101 where <tt>diag(weights)</tt> creates a diagonal matrix from \a weights.
102 The algorithm calls \ref leastSquares() on the equivalent problem
103
104 \f[ \tilde \textrm{\bf x} = \textrm{argmin}
105 \left|\left|\textrm{diag}(\textrm{\bf weights})^{1/2}\textrm{\bf A} \textrm{\bf x} -
106 \textrm{diag}(\textrm{\bf weights})^{1/2} \textrm{\bf b}\right|\right|_2^2
107 \f]
108
109 where the square root of \a weights is just taken element-wise.
110
111 When \a b is a matrix with <tt>k</tt> columns, \a x must also have
112 <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
113 \a b. Note that all matrices must already have the correct shape.
114
115 The function returns
116 <tt>false</tt> when the rank of the weighted matrix \a A is less than <tt>n</tt>.
117
118 <b>\#include</b> <vigra/regression.hxx><br/>
119 Namespaces: vigra and vigra::linalg
120 */
121template <class T, class C1, class C2, class C3, class C4>
122bool
124 MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> const &weights,
125 MultiArrayView<2, T, C4> &x, std::string method = "QR")
126{
127 const unsigned int rows = rowCount(A);
128 const unsigned int cols = columnCount(A);
129 const unsigned int rhsCount = columnCount(b);
130 vigra_precondition(rows >= cols,
131 "weightedLeastSquares(): Input matrix A must be rectangular with rowCount >= columnCount.");
132 vigra_precondition(rowCount(b) == rows,
133 "weightedLeastSquares(): Shape mismatch between matrices A and b.");
134 vigra_precondition(rowCount(b) == rowCount(weights) && columnCount(weights) == 1,
135 "weightedLeastSquares(): Weight matrix has wrong shape.");
136 vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount,
137 "weightedLeastSquares(): Result matrix x has wrong shape.");
138
139 Matrix<T> wa(A.shape()), wb(b.shape());
140
141 for(unsigned int k=0; k<rows; ++k)
142 {
143 vigra_precondition(weights(k,0) >= 0,
144 "weightedLeastSquares(): Weights must be positive.");
145 T w = std::sqrt(weights(k,0));
146 for(unsigned int l=0; l<cols; ++l)
147 wa(k,l) = w * A(k,l);
148 for(unsigned int l=0; l<rhsCount; ++l)
149 wb(k,l) = w * b(k,l);
150 }
151
152 return leastSquares(wa, wb, x, method);
153}
154
155 /** Ridge Regression.
156
157 Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
158 a vector \a b of length <tt>m</tt>, and a regularization parameter <tt>lambda >= 0.0</tt>,
159 this function computes the vector \a x of length <tt>n</tt>
160 that solves the optimization problem
161
162 \f[ \tilde \textrm{\bf x} = \textrm{argmin}
163 \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2 +
164 \lambda \textrm{\bf x}^T\textrm{\bf x}
165 \f]
166
167 This is implemented by means of \ref singularValueDecomposition().
168
169 When \a b is a matrix with <tt>k</tt> columns, \a x must also have
170 <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
171 \a b. Note that all matrices must already have the correct shape.
172
173 The function returns <tt>false</tt> if the rank of \a A is less than <tt>n</tt>
174 and <tt>lambda == 0.0</tt>.
175
176 <b>\#include</b> <vigra/regression.hxx><br/>
177 Namespaces: vigra and vigra::linalg
178 */
179template <class T, class C1, class C2, class C3>
180bool
182 MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> &x, double lambda)
183{
184 const unsigned int rows = rowCount(A);
185 const unsigned int cols = columnCount(A);
186 const unsigned int rhsCount = columnCount(b);
187 vigra_precondition(rows >= cols,
188 "ridgeRegression(): Input matrix A must be rectangular with rowCount >= columnCount.");
189 vigra_precondition(rowCount(b) == rows,
190 "ridgeRegression(): Shape mismatch between matrices A and b.");
191 vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount,
192 "ridgeRegression(): Result matrix x has wrong shape.");
193 vigra_precondition(lambda >= 0.0,
194 "ridgeRegression(): lambda >= 0.0 required.");
195
196 unsigned int m = rows;
197 unsigned int n = cols;
198
199 Matrix<T> u(m, n), s(n, 1), v(n, n);
200
201 unsigned int rank = singularValueDecomposition(A, u, s, v);
202 if(rank < n && lambda == 0.0)
203 return false;
204
205 Matrix<T> t = transpose(u)*b;
206 for(unsigned int k=0; k<cols; ++k)
207 for(unsigned int l=0; l<rhsCount; ++l)
208 t(k,l) *= s(k,0) / (sq(s(k,0)) + lambda);
209 x = v*t;
210 return true;
211}
212
213 /** Weighted ridge Regression.
214
215 Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
216 a vector \a b of length <tt>m</tt>, a weight vector \a weights of length <tt>m</tt>
217 with non-negative entries, and a regularization parameter <tt>lambda >= 0.0</tt>
218 this function computes the vector \a x of length <tt>n</tt>
219 that solves the optimization problem
220
221 \f[ \tilde \textrm{\bf x} = \textrm{argmin}
222 \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right)^T
223 \textrm{diag}(\textrm{\bf weights})
224 \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right) +
225 \lambda \textrm{\bf x}^T\textrm{\bf x}
226 \f]
227
228 where <tt>diag(weights)</tt> creates a diagonal matrix from \a weights.
229 The algorithm calls \ref ridgeRegression() on the equivalent problem
230
231 \f[ \tilde \textrm{\bf x} = \textrm{argmin}
232 \left|\left|\textrm{diag}(\textrm{\bf weights})^{1/2}\textrm{\bf A} \textrm{\bf x} -
233 \textrm{diag}(\textrm{\bf weights})^{1/2} \textrm{\bf b}\right|\right|_2^2 +
234 \lambda \textrm{\bf x}^T\textrm{\bf x}
235 \f]
236
237 where the square root of \a weights is just taken element-wise. This solution is
238 computed by means of \ref singularValueDecomposition().
239
240 When \a b is a matrix with <tt>k</tt> columns, \a x must also have
241 <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
242 \a b. Note that all matrices must already have the correct shape.
243
244 The function returns <tt>false</tt> if the rank of \a A is less than <tt>n</tt>
245 and <tt>lambda == 0.0</tt>.
246
247 <b>\#include</b> <vigra/regression.hxx><br/>
248 Namespaces: vigra and vigra::linalg
249 */
250template <class T, class C1, class C2, class C3, class C4>
251bool
253 MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> const &weights,
254 MultiArrayView<2, T, C4> &x, double lambda)
255{
256 const unsigned int rows = rowCount(A);
257 const unsigned int cols = columnCount(A);
258 const unsigned int rhsCount = columnCount(b);
259 vigra_precondition(rows >= cols,
260 "weightedRidgeRegression(): Input matrix A must be rectangular with rowCount >= columnCount.");
261 vigra_precondition(rowCount(b) == rows,
262 "weightedRidgeRegression(): Shape mismatch between matrices A and b.");
263 vigra_precondition(rowCount(b) == rowCount(weights) && columnCount(weights) == 1,
264 "weightedRidgeRegression(): Weight matrix has wrong shape.");
265 vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount,
266 "weightedRidgeRegression(): Result matrix x has wrong shape.");
267 vigra_precondition(lambda >= 0.0,
268 "weightedRidgeRegression(): lambda >= 0.0 required.");
269
270 Matrix<T> wa(A.shape()), wb(b.shape());
271
272 for(unsigned int k=0; k<rows; ++k)
273 {
274 vigra_precondition(weights(k,0) >= 0,
275 "weightedRidgeRegression(): Weights must be positive.");
276 T w = std::sqrt(weights(k,0));
277 for(unsigned int l=0; l<cols; ++l)
278 wa(k,l) = w * A(k,l);
279 for(unsigned int l=0; l<rhsCount; ++l)
280 wb(k,l) = w * b(k,l);
281 }
282
283 return ridgeRegression(wa, wb, x, lambda);
284}
285
286 /** Ridge Regression with many lambdas.
287
288 This executes \ref ridgeRegression() for a sequence of regularization parameters. This
289 is implemented so that the \ref singularValueDecomposition() has to be executed only once.
290 \a lambda must be an array conforming to the <tt>std::vector</tt> interface, i.e. must
291 support <tt>lambda.size()</tt> and <tt>lambda[k]</tt>. The columns of the matrix \a x
292 will contain the solutions for the corresponding lambda, so the number of columns of
293 the matrix \a x must be equal to <tt>lambda.size()</tt>, and \a b must be a columns vector,
294 i.e. cannot contain several right hand sides at once.
295
296 The function returns <tt>false</tt> when the matrix \a A is rank deficient. If this
297 happens, and one of the lambdas is zero, the corresponding column of \a x will be skipped.
298
299 <b>\#include</b> <vigra/regression.hxx><br/>
300 Namespaces: vigra and vigra::linalg
301 */
302template <class T, class C1, class C2, class C3, class Array>
303bool
305 MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> &x, Array const & lambda)
306{
307 const unsigned int rows = rowCount(A);
308 const unsigned int cols = columnCount(A);
309 const unsigned int lambdaCount = lambda.size();
310 vigra_precondition(rows >= cols,
311 "ridgeRegressionSeries(): Input matrix A must be rectangular with rowCount >= columnCount.");
312 vigra_precondition(rowCount(b) == rows && columnCount(b) == 1,
313 "ridgeRegressionSeries(): Shape mismatch between matrices A and b.");
314 vigra_precondition(rowCount(x) == cols && columnCount(x) == lambdaCount,
315 "ridgeRegressionSeries(): Result matrix x has wrong shape.");
316
317 unsigned int m = rows;
318 unsigned int n = cols;
319
320 Matrix<T> u(m, n), s(n, 1), v(n, n);
321
322 unsigned int rank = singularValueDecomposition(A, u, s, v);
323
324 Matrix<T> xl = transpose(u)*b;
325 Matrix<T> xt(cols,1);
326 for(unsigned int i=0; i<lambdaCount; ++i)
327 {
328 vigra_precondition(lambda[i] >= 0.0,
329 "ridgeRegressionSeries(): lambda >= 0.0 required.");
330 if(lambda[i] == 0.0 && rank < rows)
331 continue;
332 for(unsigned int k=0; k<cols; ++k)
333 xt(k,0) = xl(k,0) * s(k,0) / (sq(s(k,0)) + lambda[i]);
334 columnVector(x, i) = v*xt;
335 }
336 return (rank == n);
337}
338
339/** \brief Pass options to leastAngleRegression().
340
341 <b>\#include</b> <vigra/regression.hxx><br/>
342 Namespaces: vigra and vigra::linalg
343*/
345{
346 public:
347 enum Mode { LARS, LASSO, NNLASSO };
348
349 /** Initialize all options with default values.
350 */
352 : max_solution_count(0),
353 unconstrained_dimension_count(0),
354 mode(LASSO),
355 least_squares_solutions(true)
356 {}
357
358 /** Maximum number of solutions to be computed.
359
360 If \a n is 0 (the default), the number of solutions is determined by the length
361 of the solution array. Otherwise, the minimum of maxSolutionCount() and that
362 length is taken.<br>
363 Default: 0 (use length of solution array)
364 */
366 {
367 max_solution_count = static_cast<int>(n);
368 return *this;
369 }
370
371 /** Set the mode of the algorithm.
372
373 Mode must be one of "lars", "lasso", "nnlasso". The function just calls
374 the member function of the corresponding name to set the mode.
375
376 Default: "lasso"
377 */
379 {
380 mode = tolower(mode);
381 if(mode == "lars")
382 this->lars();
383 else if(mode == "lasso")
384 this->lasso();
385 else if(mode == "nnlasso")
386 this->nnlasso();
387 else
388 vigra_fail("LeastAngleRegressionOptions.setMode(): Invalid mode.");
389 return *this;
390 }
391
392
393 /** Use the plain LARS algorithm.
394
395 Default: inactive
396 */
398 {
399 mode = LARS;
400 return *this;
401 }
402
403 /** Use the LASSO modification of the LARS algorithm.
404
405 This allows features to be removed from the active set under certain conditions.<br>
406 Default: active
407 */
409 {
410 mode = LASSO;
411 return *this;
412 }
413
414 /** Use the non-negative LASSO modification of the LARS algorithm.
415
416 This enforces all non-zero entries in the solution to be positive.<br>
417 Default: inactive
418 */
420 {
421 mode = NNLASSO;
422 return *this;
423 }
424
425 /** Compute least squares solutions.
426
427 Use least angle regression to determine active sets, but
428 return least squares solutions for the features in each active set,
429 instead of constrained solutions.<br>
430 Default: <tt>true</tt>
431 */
433 {
434 least_squares_solutions = select;
435 return *this;
436 }
437
438 int max_solution_count, unconstrained_dimension_count;
439 Mode mode;
440 bool least_squares_solutions;
441};
442
443namespace detail {
444
445template <class T, class C1, class C2>
446struct LarsData
447{
448 typedef typename MultiArrayShape<2>::type Shape;
449
450 int activeSetSize;
453 Matrix<T> R, qtb, lars_solution, lars_prediction, next_lsq_solution, next_lsq_prediction, searchVector;
454 ArrayVector<MultiArrayIndex> columnPermutation;
455
456 // init data for a new run
457 LarsData(MultiArrayView<2, T, C1> const & Ai, MultiArrayView<2, T, C2> const & bi)
458 : activeSetSize(1),
459 A(Ai), b(bi), R(A), qtb(b),
460 lars_solution(A.shape(1), 1), lars_prediction(A.shape(0), 1),
461 next_lsq_solution(A.shape(1), 1), next_lsq_prediction(A.shape(0), 1), searchVector(A.shape(0), 1),
462 columnPermutation(A.shape(1))
463 {
464 for(unsigned int k=0; k<columnPermutation.size(); ++k)
465 columnPermutation[k] = k;
466 }
467
468 // copy data for the recursive call in nnlassolsq
469 LarsData(LarsData const & d, int asetSize)
470 : activeSetSize(asetSize),
471 A(d.R.subarray(Shape(0,0), Shape(d.A.shape(0), activeSetSize))), b(d.qtb), R(A), qtb(b),
472 lars_solution(d.lars_solution.subarray(Shape(0,0), Shape(activeSetSize, 1))), lars_prediction(d.lars_prediction),
473 next_lsq_solution(d.next_lsq_solution.subarray(Shape(0,0), Shape(activeSetSize, 1))),
474 next_lsq_prediction(d.next_lsq_prediction), searchVector(d.searchVector),
475 columnPermutation(A.shape(1))
476 {
477 for(unsigned int k=0; k<columnPermutation.size(); ++k)
478 columnPermutation[k] = k;
479 }
480};
481
482template <class T, class C1, class C2, class Array1, class Array2, class Array3>
483unsigned int
484leastAngleRegressionMainLoop(LarsData<T, C1, C2> & d,
485 Array1 & activeSets,
486 Array2 * lars_solutions, Array3 * lsq_solutions,
487 LeastAngleRegressionOptions const & options)
488{
489 using namespace vigra::functor;
490
491 typedef typename MultiArrayShape<2>::type Shape;
492 typedef typename Matrix<T>::view_type Subarray;
493 typedef ArrayVector<MultiArrayIndex> Permutation;
494 typedef typename Permutation::view_type ColumnSet;
495
496 vigra_precondition(d.activeSetSize > 0,
497 "leastAngleRegressionMainLoop() must not be called with empty active set.");
498
499 bool enforce_positive = (options.mode == LeastAngleRegressionOptions::NNLASSO);
500 bool lasso_modification = (options.mode != LeastAngleRegressionOptions::LARS);
501
502 const MultiArrayIndex rows = rowCount(d.R);
503 const MultiArrayIndex cols = columnCount(d.R);
504 const MultiArrayIndex maxRank = std::min(rows, cols);
505
506 MultiArrayIndex maxSolutionCount = options.max_solution_count;
507 if(maxSolutionCount == 0)
508 maxSolutionCount = lasso_modification
509 ? 10*maxRank
510 : maxRank;
511
512 bool needToRemoveColumn = false;
513 MultiArrayIndex columnToBeAdded = 0, columnToBeRemoved = 0;
514 MultiArrayIndex currentSolutionCount = 0;
515 while(currentSolutionCount < maxSolutionCount)
516 {
517 //ColumnSet activeSet = d.columnPermutation.subarray(0, static_cast<unsigned int>(d.activeSetSize));
518 ColumnSet inactiveSet = d.columnPermutation.subarray(static_cast<unsigned int>(d.activeSetSize), static_cast<unsigned int>(cols));
519
520 // find next dimension to be activated
521 Matrix<T> cLARS = transpose(d.A) * (d.b - d.lars_prediction), // correlation with LARS residual
522 cLSQ = transpose(d.A) * (d.b - d.next_lsq_prediction); // correlation with LSQ residual
523
524 // In theory, all vectors in the active set should have the same correlation C, and
525 // the correlation of all others should not exceed this. In practice, we may find the
526 // maximum correlation in any variable due to tiny numerical inaccuracies. Therefore, we
527 // determine C from the entire set of variables.
528 MultiArrayIndex cmaxIndex = enforce_positive
529 ? argMax(cLARS)
530 : argMax(abs(cLARS));
531 T C = abs(cLARS(cmaxIndex, 0));
532
533 Matrix<T> ac(cols - d.activeSetSize, 1);
534 for(MultiArrayIndex k = 0; k<cols-d.activeSetSize; ++k)
535 {
536 T rho = cLSQ(inactiveSet[k], 0),
537 cc = C - sign(rho)*cLARS(inactiveSet[k], 0);
538
539 if(rho == 0.0) // make sure that 0/0 cannot happen in the other cases
540 ac(k,0) = 1.0; // variable k is linearly dependent on the active set
541 else if(rho > 0.0)
542 ac(k,0) = cc / (cc + rho); // variable k would enter the active set with positive sign
543 else if(enforce_positive)
544 ac(k,0) = 1.0; // variable k cannot enter the active set because it would be negative
545 else
546 ac(k,0) = cc / (cc - rho); // variable k would enter the active set with negative sign
547 }
548
549 // in the non-negative case: make sure that a column just removed cannot re-enter right away
550 // (in standard LASSO, this is allowed, because the variable may re-enter with opposite sign)
551 if(enforce_positive && needToRemoveColumn)
552 ac(columnToBeRemoved-d.activeSetSize,0) = 1.0;
553
554 // find candidate
555 // Note: R uses Arg1() > epsilon, but this is only possible because it allows several variables to
556 // join the active set simultaneously, so that gamma = 0 cannot occur.
557 columnToBeAdded = argMin(ac);
558
559 // if no new column can be added, we do a full step gamma = 1.0 and then stop, unless a column is removed below
560 T gamma = (d.activeSetSize == maxRank)
561 ? 1.0
562 : ac(columnToBeAdded, 0);
563
564 // adjust columnToBeAdded: we skipped the active set
565 if(columnToBeAdded >= 0)
566 columnToBeAdded += d.activeSetSize;
567
568 // check whether we have to remove a column from the active set
569 needToRemoveColumn = false;
570 if(lasso_modification)
571 {
572 // find dimensions whose weight changes sign below gamma*searchDirection
573 Matrix<T> s(Shape(d.activeSetSize, 1), NumericTraits<T>::max());
574 for(MultiArrayIndex k=0; k<d.activeSetSize; ++k)
575 {
576 if(( enforce_positive && d.next_lsq_solution(k,0) < 0.0) ||
577 (!enforce_positive && sign(d.lars_solution(k,0))*sign(d.next_lsq_solution(k,0)) == -1.0))
578 s(k,0) = d.lars_solution(k,0) / (d.lars_solution(k,0) - d.next_lsq_solution(k,0));
579 }
580
581 columnToBeRemoved = argMinIf(s, Arg1() <= Param(gamma));
582 if(columnToBeRemoved >= 0)
583 {
584 needToRemoveColumn = true; // remove takes precedence over add
585 gamma = s(columnToBeRemoved, 0);
586 }
587 }
588
589 // compute the current solutions
590 d.lars_prediction = gamma * d.next_lsq_prediction + (1.0 - gamma) * d.lars_prediction;
591 d.lars_solution = gamma * d.next_lsq_solution + (1.0 - gamma) * d.lars_solution;
592 if(needToRemoveColumn)
593 d.lars_solution(columnToBeRemoved, 0) = 0.0; // turn possible epsilon into an exact zero
594
595 // write the current solution
596 ++currentSolutionCount;
597 activeSets.push_back(typename Array1::value_type(d.columnPermutation.begin(), d.columnPermutation.begin()+d.activeSetSize));
598
599 if(lsq_solutions != 0)
600 {
601 if(enforce_positive)
602 {
603 ArrayVector<Matrix<T> > nnresults;
605 LarsData<T, C1, C2> nnd(d, d.activeSetSize);
606
607 leastAngleRegressionMainLoop(nnd, nnactiveSets, &nnresults, static_cast<Array3*>(0),
608 LeastAngleRegressionOptions().leastSquaresSolutions(false).nnlasso());
609 //Matrix<T> nnlsq_solution(d.activeSetSize, 1);
610 typename Array2::value_type nnlsq_solution(Shape(d.activeSetSize, 1));
611 for(unsigned int k=0; k<nnactiveSets.back().size(); ++k)
612 {
613 nnlsq_solution(nnactiveSets.back()[k],0) = nnresults.back()[k];
614 }
615 //lsq_solutions->push_back(nnlsq_solution);
616 lsq_solutions->push_back(typename Array3::value_type());
617 lsq_solutions->back() = nnlsq_solution;
618 }
619 else
620 {
621 //lsq_solutions->push_back(d.next_lsq_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1)));
622 lsq_solutions->push_back(typename Array3::value_type());
623 lsq_solutions->back() = d.next_lsq_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
624 }
625 }
626 if(lars_solutions != 0)
627 {
628 //lars_solutions->push_back(d.lars_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1)));
629 lars_solutions->push_back(typename Array2::value_type());
630 lars_solutions->back() = d.lars_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
631 }
632
633 // no further solutions possible
634 if(gamma == 1.0)
635 break;
636
637 if(needToRemoveColumn)
638 {
639 --d.activeSetSize;
640 if(columnToBeRemoved != d.activeSetSize)
641 {
642 // remove column 'columnToBeRemoved' and restore triangular form of R
643 // note: columnPermutation is automatically swapped here
644 detail::upperTriangularSwapColumns(columnToBeRemoved, d.activeSetSize, d.R, d.qtb, d.columnPermutation);
645
646 // swap solution entries
647 std::swap(d.lars_solution(columnToBeRemoved, 0), d.lars_solution(d.activeSetSize,0));
648 std::swap(d.next_lsq_solution(columnToBeRemoved, 0), d.next_lsq_solution(d.activeSetSize,0));
649 columnToBeRemoved = d.activeSetSize; // keep track of removed column
650 }
651 d.lars_solution(d.activeSetSize,0) = 0.0;
652 d.next_lsq_solution(d.activeSetSize,0) = 0.0;
653 }
654 else
655 {
656 vigra_invariant(columnToBeAdded >= 0,
657 "leastAngleRegression(): internal error (columnToBeAdded < 0)");
658 // add column 'columnToBeAdded'
659 if(d.activeSetSize != columnToBeAdded)
660 {
661 std::swap(d.columnPermutation[d.activeSetSize], d.columnPermutation[columnToBeAdded]);
662 columnVector(d.R, d.activeSetSize).swapData(columnVector(d.R, columnToBeAdded));
663 columnToBeAdded = d.activeSetSize; // keep track of added column
664 }
665
666 // zero the corresponding entries of the solutions
667 d.next_lsq_solution(d.activeSetSize,0) = 0.0;
668 d.lars_solution(d.activeSetSize,0) = 0.0;
669
670 // reduce R (i.e. its newly added column) to triangular form
671 detail::qrColumnHouseholderStep(d.activeSetSize, d.R, d.qtb);
672 ++d.activeSetSize;
673 }
674
675 // compute the LSQ solution of the new active set
676 Subarray Ractive = d.R.subarray(Shape(0,0), Shape(d.activeSetSize, d.activeSetSize));
677 Subarray qtbactive = d.qtb.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
678 Subarray next_lsq_solution_view = d.next_lsq_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
679 linearSolveUpperTriangular(Ractive, qtbactive, next_lsq_solution_view);
680
681 // compute the LSQ prediction of the new active set
682 d.next_lsq_prediction.init(0.0);
683 for(MultiArrayIndex k=0; k<d.activeSetSize; ++k)
684 d.next_lsq_prediction += next_lsq_solution_view(k,0)*columnVector(d.A, d.columnPermutation[k]);
685 }
686
687 return static_cast<unsigned int>(currentSolutionCount);
688}
689
690template <class T, class C1, class C2, class Array1, class Array2>
691unsigned int
692leastAngleRegressionImpl(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
693 Array1 & activeSets, Array2 * lasso_solutions, Array2 * lsq_solutions,
694 LeastAngleRegressionOptions const & options)
695{
696 using namespace vigra::functor;
697
698 const MultiArrayIndex rows = rowCount(A);
699
700 vigra_precondition(rowCount(b) == rows && columnCount(b) == 1,
701 "leastAngleRegression(): Shape mismatch between matrices A and b.");
702
703 bool enforce_positive = (options.mode == LeastAngleRegressionOptions::NNLASSO);
704
705 detail::LarsData<T, C1, C2> d(A, b);
706
707 // find dimension with largest correlation
708 Matrix<T> c = transpose(A)*b;
709 MultiArrayIndex initialColumn = enforce_positive
710 ? argMaxIf(c, Arg1() > Param(0.0))
711 : argMax(abs(c));
712 if(initialColumn == -1)
713 return 0; // no solution found
714
715 // prepare initial active set and search direction etc.
716 std::swap(d.columnPermutation[0], d.columnPermutation[initialColumn]);
717 columnVector(d.R, 0).swapData(columnVector(d.R, initialColumn));
718 detail::qrColumnHouseholderStep(0, d.R, d.qtb);
719 d.next_lsq_solution(0,0) = d.qtb(0,0) / d.R(0,0);
720 d.next_lsq_prediction = d.next_lsq_solution(0,0) * columnVector(A, d.columnPermutation[0]);
721 d.searchVector = d.next_lsq_solution(0,0) * columnVector(A, d.columnPermutation[0]);
722
723 return leastAngleRegressionMainLoop(d, activeSets, lasso_solutions, lsq_solutions, options);
724}
725
726} // namespace detail
727
728/** Least Angle Regression.
729
730 <b>\#include</b> <vigra/regression.hxx><br/>
731 Namespaces: vigra and vigra::linalg
732
733 <b> Declarations:</b>
734
735 \code
736 namespace vigra {
737 namespace linalg {
738 // compute either LASSO or least squares solutions
739 template <class T, class C1, class C2, class Array1, class Array2>
740 unsigned int
741 leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
742 Array1 & activeSets, Array2 & solutions,
743 LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions());
744
745 // compute LASSO and least squares solutions
746 template <class T, class C1, class C2, class Array1, class Array2>
747 unsigned int
748 leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
749 Array1 & activeSets, Array2 & lasso_solutions, Array2 & lsq_solutions,
750 LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions());
751 }
752 using linalg::leastAngleRegression;
753 }
754 \endcode
755
756 This function implements Least Angle Regression (LARS) as described in
757
758 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
759 B.Efron, T.Hastie, I.Johnstone, and R.Tibshirani: <i>"Least Angle Regression"</i>,
760 Annals of Statistics 32(2):407-499, 2004.
761
762 It is an efficient algorithm to solve the L1-regularized least squares (LASSO) problem
763
764 \f[ \tilde \textrm{\bf x} = \textrm{argmin}
765 \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
766 \textrm{ subject to } \left|\left|\textrm{\bf x}\right|\right|_1\le s
767 \f]
768
769 and the L1-regularized non-negative least squares (NN-LASSO) problem
770
771 \f[ \tilde \textrm{\bf x} = \textrm{argmin} \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
772 \textrm{ subject to } \left|\left|\textrm{\bf x}\right|\right|_1\le s \textrm{ and } \textrm{\bf x}\ge \textrm{\bf 0}
773 \f]
774
775 where \a A is a matrix with <tt>m</tt> rows and <tt>n</tt> columns (often with <tt>m < n</tt>),
776 \a b a vector of length <tt>m</tt>, and a regularization parameter s >= 0.0.
777 L1-regularization has the desirable effect that it causes the solution <b>x</b> to be sparse, i.e. only
778 the most important elements in <b>x</b> (called the <em>active set</em>) have non-zero values. The
779 key insight of the LARS algorithm is the following: When the solution vector <b>x</b> is considered
780 as a function of the regularization parameter s, then <b>x</b>(s) is a piecewise
781 linear function, i.e. a polyline in n-dimensional space. The knots of the polyline <b>x</b>(s)
782 are located precisely at those values of s where one variable enters or leaves the active set
783 and can be efficiently computed.
784
785 Therefore, leastAngleRegression() returns the entire solution path as a sequence of knot points, starting
786 at \f$\textrm{\bf x}(s=0)\f$ (where the only feasible solution is obviously <b>x</b> = 0) and ending at
787 \f$\textrm{\bf x}(s=\infty)\f$ (where the solution becomes the ordinary least squares solution). Actually,
788 the initial null solution is not explicitly returned, i.e. the sequence starts at the first non-zero
789 solution with one variable in the active set. The function leastAngleRegression() returns the number
790 of solutions (i.e. knot points) computed.
791
792 The sequences of active sets and corresponding variable weights are returned in \a activeSets and
793 \a solutions respectively. That is, <tt>activeSets[i]</tt> is an \ref vigra::ArrayVector "ArrayVector<int>"
794 containing the indices of the variables that are active at the i-th knot, and <tt>solutions</tt> is a
795 \ref vigra::linalg::Matrix "Matrix<T>" containing the weights of those variables, in the same order (see
796 example below). Variables not contained in <tt>activeSets[i]</tt> are zero at this solution.
797
798 The behavior of the algorithm can be adapted by \ref vigra::linalg::LeastAngleRegressionOptions
799 "LeastAngleRegressionOptions":
800 <DL>
801 <DT><b>options.lasso()</b> (active by default)
802 <DD> Compute the LASSO solution as described above.
803 <DT><b>options.nnlasso()</b> (inactive by default)
804 <DD> Compute non-negative LASSO solutions, i.e. use the additional constraint that
805 <b>x</b> >= 0 in all solutions.
806 <DT><b>options.lars()</b> (inactive by default)
807 <DD> Compute a solution path according to the plain LARS rule, i.e. never remove
808 a variable from the active set once it entered.
809 <DT><b>options.leastSquaresSolutions(bool)</b> (default: true)
810 <DD> Use the algorithm mode selected above
811 to determine the sequence of active sets, but then compute and return an
812 ordinary (unconstrained) least squares solution for every active set.<br>
813 <b>Note:</b> The second form of leastAngleRegression() ignores this option and
814 does always compute both constrained and unconstrained solutions (returned in
815 \a lasso_solutions and \a lsq_solutions respectively).
816 <DT><b>maxSolutionCount(unsigned int n)</b> (default: n = 0, i.e. compute all solutions)
817 <DD> Compute at most <tt>n</tt> solutions.
818 </DL>
819
820 <b>Usage:</b>
821
822 \code
823 int m = ..., n = ...;
824 Matrix<double> A(m, n), b(m, 1);
825 ... // fill A and b
826
827 // normalize the input
828 Matrix<double> offset(1,n), scaling(1,n);
829 prepareColumns(A, A, offset, scaling, DataPreparationGoals(ZeroMean|UnitVariance));
830 prepareColumns(b, b, DataPreparationGoals(ZeroMean));
831
832 // arrays to hold the output
833 ArrayVector<ArrayVector<int> > activeSets;
834 ArrayVector<Matrix<double> > solutions;
835
836 // run leastAngleRegression() in non-negative LASSO mode
837 int numSolutions = leastAngleRegression(A, b, activeSets, solutions,
838 LeastAngleRegressionOptions().nnlasso());
839
840 // print results
841 Matrix<double> denseSolution(1, n);
842 for (MultiArrayIndex k = 0; k < numSolutions; ++k)
843 {
844 // transform the sparse solution into a dense vector
845 denseSolution.init(0.0); // ensure that inactive variables are zero
846 for (unsigned int i = 0; i < activeSets[k].size(); ++i)
847 {
848 // set the values of the active variables;
849 // activeSets[k][i] is the true index of the i-th variable in the active set
850 denseSolution(0, activeSets[k][i]) = solutions[k](i,0);
851 }
852
853 // invert the input normalization
854 denseSolution = denseSolution * pointWise(scaling);
855
856 // output the solution
857 std::cout << "solution " << k << ":\n" << denseSolution << std::endl;
858 }
859 \endcode
860
861 <b>Required Interface:</b>
862
863 <ul>
864 <li> <tt>T</tt> must be numeric type (compatible to double)
865 <li> <tt>Array1 a1;</tt><br>
866 <tt>a1.push_back(ArrayVector<int>());</tt>
867 <li> <tt>Array2 a2;</tt><br>
868 <tt>a2.push_back(Matrix<T>());</tt>
869 </ul>
870 */
871doxygen_overloaded_function(template <...> unsigned int leastAngleRegression)
872
873template <class T, class C1, class C2, class Array1, class Array2>
874inline unsigned int
875leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
876 Array1 & activeSets, Array2 & solutions,
878{
879 if(options.least_squares_solutions)
880 return detail::leastAngleRegressionImpl(A, b, activeSets, static_cast<Array2*>(0), &solutions, options);
881 else
882 return detail::leastAngleRegressionImpl(A, b, activeSets, &solutions, static_cast<Array2*>(0), options);
883}
884
885template <class T, class C1, class C2, class Array1, class Array2>
886inline unsigned int
887leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
888 Array1 & activeSets, Array2 & lasso_solutions, Array2 & lsq_solutions,
890{
891 return detail::leastAngleRegressionImpl(A, b, activeSets, &lasso_solutions, &lsq_solutions, options);
892}
893
894 /** Non-negative Least Squares Regression.
895
896 Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
897 and a column vector \a b of length <tt>m</tt> rows, this function computes
898 a column vector \a x of length <tt>n</tt> with <b>non-negative entries</b> that solves the optimization problem
899
900 \f[ \tilde \textrm{\bf x} = \textrm{argmin}
901 \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
902 \textrm{ subject to } \textrm{\bf x} \ge \textrm{\bf 0}
903 \f]
904
905 Both \a b and \a x must be column vectors (i.e. matrices with <tt>1</tt> column).
906 Note that all matrices must already have the correct shape. The solution is computed by means
907 of \ref leastAngleRegression() with non-negativity constraint.
908
909 <b>\#include</b> <vigra/regression.hxx><br/>
910 Namespaces: vigra and vigra::linalg
911
912 <b> Declarations:</b>
913
914 \code
915 namespace vigra {
916 namespace linalg {
917 template <class T, class C1, class C2, class C3>
918 void
919 nonnegativeLeastSquares(MultiArrayView<2, T, C1> const & A,
920 MultiArrayView<2, T, C2> const &b,
921 MultiArrayView<2, T, C3> &x);
922 }
923 using linalg::nonnegativeLeastSquares;
924 }
925 \endcode
926 */
927doxygen_overloaded_function(template <...> unsigned int nonnegativeLeastSquares)
928
929template <class T, class C1, class C2, class C3>
930inline void
931nonnegativeLeastSquares(MultiArrayView<2, T, C1> const & A,
933{
934 vigra_precondition(columnCount(A) == rowCount(x) && rowCount(A) == rowCount(b),
935 "nonnegativeLeastSquares(): Matrix shape mismatch.");
936 vigra_precondition(columnCount(b) == 1 && columnCount(x) == 1,
937 "nonnegativeLeastSquares(): RHS and solution must be vectors (i.e. columnCount == 1).");
938
940 ArrayVector<Matrix<T> > results;
941
942 leastAngleRegression(A, b, activeSets, results,
943 LeastAngleRegressionOptions().leastSquaresSolutions(false).nnlasso());
944 x.init(NumericTraits<T>::zero());
945 if(activeSets.size() > 0)
946 for(unsigned int k=0; k<activeSets.back().size(); ++k)
947 x(activeSets.back()[k],0) = results.back()[k];
948}
949
950
951//@}
952
953} // namespace linalg
954
955using linalg::leastSquares;
956using linalg::weightedLeastSquares;
957using linalg::ridgeRegression;
958using linalg::weightedRidgeRegression;
959using linalg::ridgeRegressionSeries;
960using linalg::nonnegativeLeastSquares;
961using linalg::leastAngleRegression;
963
964namespace detail {
965
966template <class T, class S>
967inline T
968getRow(MultiArrayView<1, T, S> const & a, MultiArrayIndex i)
969{
970 return a(i);
971}
972
973template <class T, class S>
975getRow(MultiArrayView<2, T, S> const & a, MultiArrayIndex i)
976{
977 return a.bindInner(i);
978}
979
980} // namespace detail
981
982/** \addtogroup Optimization
983 */
984//@{
985
986/** \brief Pass options to nonlinearLeastSquares().
987
988 <b>\#include</b> <vigra/regression.hxx>
989 Namespace: vigra
990*/
992{
993 public:
994
995 double epsilon, lambda, tau;
996 int max_iter;
997
998 /** \brief Initialize options with default values.
999 */
1001 : epsilon(0.0),
1002 lambda(0.1),
1003 tau(1.4),
1004 max_iter(50)
1005 {}
1006
1007 /** \brief Set minimum relative improvement in residual.
1008
1009 The algorithm stops when the relative improvement in residuals
1010 between consecutive iterations is less than this value.
1011
1012 Default: 0 (i.e. choose tolerance automatically, will be 10*epsilon of the numeric type)
1013 */
1015 {
1016 epsilon = eps;
1017 return *this;
1018 }
1019
1020 /** \brief Set maximum number of iterations.
1021
1022 Default: 50
1023 */
1025 {
1026 max_iter = iter;
1027 return *this;
1028 }
1029
1030 /** \brief Set damping parameters for Levenberg-Marquardt algorithm.
1031
1032 \a lambda determines by how much the diagonal is emphasized, and \a v is
1033 the factor by which lambda will be increased if more damping is needed
1034 for convergence
1035 (see <a href="http://en.wikipedia.org/wiki/Levenberg%E2%80%93Marquardt_algorithm">Wikipedia</a>
1036 for more explanations).
1037
1038 Default: lambda = 0.1, v = 1.4
1039 */
1040 NonlinearLSQOptions & dampingParamters(double lambda, double v)
1041 {
1042 vigra_precondition(lambda > 0.0 && v > 0.0,
1043 "NonlinearLSQOptions::dampingParamters(): parameters must be positive.");
1044 this->lambda = lambda;
1045 tau = v;
1046 return *this;
1047 }
1048};
1049
1050template <unsigned int D, class T, class S1, class S2,
1051 class U, int N,
1052 class Functor>
1053T
1054nonlinearLeastSquaresImpl(MultiArrayView<D, T, S1> const & features,
1055 MultiArrayView<1, T, S2> const & response,
1056 TinyVector<U, N> & p,
1057 Functor model,
1058 NonlinearLSQOptions const & options)
1059{
1060 vigra_precondition(features.shape(0) == response.shape(0),
1061 "nonlinearLeastSquares(): shape mismatch between features and response.");
1062
1063 double t = options.tau, l = options.lambda; // initial damping parameters
1064
1065 double epsilonT = NumericTraits<T>::epsilon()*10.0,
1066 epsilonU = NumericTraits<U>::epsilon()*10.0,
1067 epsilon = options.epsilon <= 0.0
1068 ? std::max(epsilonT, epsilonU)
1069 : options.epsilon;
1070
1071 linalg::Matrix<T> jj(N,N); // outer product of the Jacobian
1072 TinyVector<U, N> jr, dp;
1073
1074 T residual = 0.0;
1075 bool didStep = true;
1076
1077 for(int iter=0; iter<options.max_iter; ++iter)
1078 {
1079 if(didStep)
1080 {
1081 // update the residual and Jacobian
1082 residual = 0.0;
1083 jr = 0.0;
1084 jj = 0.0;
1085
1086 for(int i=0; i<features.shape(0); ++i)
1087 {
1088 autodiff::DualVector<U, N> res = model(detail::getRow(features, i), autodiff::dualMatrix(p));
1089
1090 T r = response(i) - res.v;
1091 jr += r * res.d;
1092 jj += outer(res.d);
1093 residual += sq(r);
1094 }
1095 }
1096
1097 // perform a damped gradient step
1098 linalg::Matrix<T> djj(jj);
1099 djj.diagonal() *= 1.0 + l;
1100 linearSolve(djj, jr, dp);
1101
1102 TinyVector<U, N> p_new = p + dp;
1103
1104 // compute the new residual
1105 T residual_new = 0.0;
1106 for(int i=0; i<features.shape(0); ++i)
1107 {
1108 residual_new += sq(response(i) - model(detail::getRow(features, i), p_new));
1109 }
1110
1111 if(residual_new < residual)
1112 {
1113 // accept the step
1114 p = p_new;
1115 if(std::abs((residual - residual_new) / residual) < epsilon)
1116 return residual_new;
1117 // try less damping in the next iteration
1118 l /= t;
1119 didStep = true;
1120 }
1121 else
1122 {
1123 // reject the step und use more damping in the next iteration
1124 l *= t;
1125 didStep = false;
1126 }
1127 }
1128
1129 return residual;
1130}
1131
1132/********************************************************/
1133/* */
1134/* nonlinearLeastSquares */
1135/* */
1136/********************************************************/
1137
1138/** \brief Fit a non-linear model to given data by minimizing least squares loss.
1139
1140 <b> Declarations:</b>
1141
1142 \code
1143 namespace vigra {
1144 // variant 1: optimize a univariate model ('x' is a 1D array of scalar data points)
1145 template <class T, class S1, class S2,
1146 class U, int N,
1147 class Functor>
1148 T
1149 nonlinearLeastSquares(MultiArrayView<1, T, S1> const & x,
1150 MultiArrayView<1, T, S2> const & y,
1151 TinyVector<U, N> & model_parameters,
1152 Functor model,
1153 NonlinearLSQOptions const & options = NonlinearLSQOptions());
1154
1155 // variant 2: optimize a multivariate model ('x' is a 2D array of vector-valued data points)
1156 template <class T, class S1, class S2,
1157 class U, int N,
1158 class Functor>
1159 T
1160 nonlinearLeastSquares(MultiArrayView<2, T, S1> const & x,
1161 MultiArrayView<1, T, S2> const & y,
1162 TinyVector<U, N> & model_parameters,
1163 Functor model,
1164 NonlinearLSQOptions const & options = NonlinearLSQOptions());
1165 }
1166 \endcode
1167
1168 This function implements the
1169 <a href="http://en.wikipedia.org/wiki/Levenberg%E2%80%93Marquardt_algorithm">Levenberg-Marquardt algorithm</a>
1170 to fit a non-linear model to given data. The model depends on a vector of
1171 parameters <b>p</b> which are to be choosen such that the least-squares residual
1172 between the data and the model's predictions is minimized according to the objective function:
1173
1174 \f[ \tilde \textrm{\bf p} = \textrm{argmin}_{\textrm{\bf p}} \sum_i \left( y_i - f(\textrm{\bf x}_i; \textrm{\bf p}) \right)^2
1175 \f]
1176
1177 where \f$f(\textrm{\bf x}; \textrm{\bf p})\f$ is the model to be optimized
1178 (with arguments \f$\textrm{\bf x}\f$ and parameters \f$\textrm{\bf p}\f$), and
1179 \f$(\textrm{\bf x}_i; y_i)\f$ are the feature/response pairs of the given data.
1180 Since the model is non-linear (otherwise, you should use ordinary \ref leastSquares()),
1181 it must be linearized in terms of a first-order Taylor expansion, and the optimal
1182 parameters <b>p</b> have to be determined iteratively. In order for the iterations to
1183 converge to the desired solution, a good initial guess on <b>p</b> is required.
1184
1185 The model must be specified by a functor which takes one of the following forms:
1186 \code
1187 typedef double DataType; // type of your data samples, may be any numeric type
1188 static const int N = ...; // number of model parameters
1189
1190 // variant 1: the features x are scalars
1191 struct UnivariateModel
1192 {
1193 template <class T>
1194 T operator()(DataType x, TinyVector<T, N> const & p) const { ... }
1195 };
1196
1197 // variant 2: the features x are vectors
1198 struct MultivariateModel
1199 {
1200 template <class T>
1201 T operator()(MultiArrayView<1, DataType> const & x, TinyVector<T, N> const & p) const { ... }
1202 };
1203 \endcode
1204 Each call to the functor's <tt>operator()</tt> computes the model's prediction for a single data
1205 point. The current model parameters are specified in a TinyVector of appropriate length.
1206 The type <tt>T</tt> must be templated: normally, it is the same as <tt>DataType</tt>, but
1207 the nonlinearLeastSquares() function will temporarily replace it with a special number type
1208 that supports <a href="http://en.wikipedia.org/wiki/Automatic_differentiation">automatic differentiation</a>
1209 (see \ref vigra::autodiff::DualVector). In this way, the derivatives needed in the model's Taylor
1210 expansion can be computed automatically.
1211
1212 When the model is univariate (has a single scalar argument), the samples must be passed to
1213 nonlinearLeastSquares() in a pair 'x', 'y' of 1D <tt>MultiArrayView</tt>s (variant 1).
1214 When the model is multivariate (has a vector-valued argument), the 'x' input must
1215 be a 2D <tt>MultiArrayView</tt> (variant 2) whose rows represent a single data sample
1216 (i.e. the number of columns corresponds to the length of the model's argument vector).
1217 The number of rows in 'x' defines the number of data samples and must match the length
1218 of array 'y'.
1219
1220 The <tt>TinyVector</tt> 'model_parameters' holds the initial guess for the model parameters and
1221 will be overwritten by the optimal values found by the algorithm. The algorithm's internal behavior
1222 can be controlled by customizing the option object \ref vigra::NonlinearLSQOptions.
1223
1224 The function returns the residual sum of squared errors of the final solution.
1225
1226 <b> Usage:</b>
1227
1228 <b>\#include</b> <vigra/regression.hxx><br>
1229 Namespace: vigra
1230
1231 Suppose that we want to fit a centered Gaussian function of the form
1232 \f[ f(x ; a, s, b) = a \exp\left(-\frac{x^2}{2 s^2}\right) + b \f]
1233 to noisy data \f$(x_i, y_i)\f$, i.e. we want to find parameters a, s, b such that
1234 the residual \f$\sum_i \left(y_i - f(x_i; a,s,b)\right)^2\f$ is minimized.
1235 The model parameters are placed in a <tt>TinyVector<T, 3></tt> <b>p</b> according to the rules<br/>
1236 <tt>p[0] <=> a</tt>, <tt>p[1] <=> s</tt> and <tt>p[2] <=> b</tt>.<br/> The following
1237 functor computes the model's prediction for a single data point <tt>x</tt>:
1238 \code
1239 struct GaussianModel
1240 {
1241 template <class T>
1242 T operator()(double x, TinyVector<T, 3> const & p) const
1243 {
1244 return p[0] * exp(-0.5 * sq(x / p[1])) + p[2];
1245 }
1246 };
1247 \endcode
1248 Now we can find optimal values for the parameters like this:
1249 \code
1250 int size = ...; // number of data points
1251 MultiArray<1, double> x(size), y(size);
1252 ... // fill the data arrays
1253
1254 TinyVector<double, 3> p(2.0, 1.0, 0.5); // your initial guess of the parameters
1255 // (will be overwritten with the optimal values)
1256 double residual = nonlinearLeastSquares(x, y, p, GaussianModel());
1257
1258 std::cout << "Model parameters: a=" << p[0] << ", s=" << p[1] << ", b=" << p[2] << " (residual: " << residual << ")\n";
1259 \endcode
1260*/
1261doxygen_overloaded_function(template <...> void nonlinearLeastSquares)
1262
1263template <class T, class S1, class S2,
1264 class U, int N,
1265 class Functor>
1266inline T
1268 MultiArrayView<1, T, S2> const & response,
1269 TinyVector<U, N> & p,
1270 Functor model,
1271 NonlinearLSQOptions const & options = NonlinearLSQOptions())
1272{
1273 return nonlinearLeastSquaresImpl(features, response, p, model, options);
1274}
1275
1276template <class T, class S1, class S2,
1277 class U, int N,
1278 class Functor>
1279inline T
1280nonlinearLeastSquares(MultiArrayView<2, T, S1> const & features,
1281 MultiArrayView<1, T, S2> const & response,
1282 TinyVector<U, N> & p,
1283 Functor model,
1284 NonlinearLSQOptions const & options = NonlinearLSQOptions())
1285{
1286 return nonlinearLeastSquaresImpl(features, response, p, model, options);
1287}
1288
1289//@}
1290
1291} // namespace vigra
1292
1293#endif // VIGRA_REGRESSION_HXX
size_type size() const
Definition array_vector.hxx:358
reference back()
Definition array_vector.hxx:321
Definition array_vector.hxx:514
Base class for, and view to, vigra::MultiArray.
Definition multi_fwd.hxx:127
const difference_type & shape() const
Definition multi_array.hxx:1650
MultiArrayView & init(const U &init)
Definition multi_array.hxx:1208
MultiArrayView< N-M, T, StridedArrayTag > bindInner(const TinyVector< Index, M > &d) const
Pass options to nonlinearLeastSquares().
Definition regression.hxx:992
NonlinearLSQOptions & dampingParamters(double lambda, double v)
Set damping parameters for Levenberg-Marquardt algorithm.
Definition regression.hxx:1040
NonlinearLSQOptions & maxIterations(int iter)
Set maximum number of iterations.
Definition regression.hxx:1024
NonlinearLSQOptions()
Initialize options with default values.
Definition regression.hxx:1000
NonlinearLSQOptions & tolerance(double eps)
Set minimum relative improvement in residual.
Definition regression.hxx:1014
Class for fixed size vectors.
Definition tinyvector.hxx:1008
Definition splines.hxx:50
Pass options to leastAngleRegression().
Definition regression.hxx:345
LeastAngleRegressionOptions & nnlasso()
Definition regression.hxx:419
LeastAngleRegressionOptions & lars()
Definition regression.hxx:397
LeastAngleRegressionOptions & lasso()
Definition regression.hxx:408
LeastAngleRegressionOptions()
Definition regression.hxx:351
LeastAngleRegressionOptions & maxSolutionCount(unsigned int n)
Definition regression.hxx:365
LeastAngleRegressionOptions & leastSquaresSolutions(bool select=true)
Definition regression.hxx:432
LeastAngleRegressionOptions & setMode(std::string mode)
Definition regression.hxx:378
Definition matrix.hxx:125
unsigned int singularValueDecomposition(MultiArrayView< 2, T, C1 > const &A, MultiArrayView< 2, T, C2 > &U, MultiArrayView< 2, T, C3 > &S, MultiArrayView< 2, T, C4 > &V)
Definition singular_value_decomposition.hxx:75
bool weightedLeastSquares(MultiArrayView< 2, T, C1 > const &A, MultiArrayView< 2, T, C2 > const &b, MultiArrayView< 2, T, C3 > const &weights, MultiArrayView< 2, T, C4 > &x, std::string method="QR")
Definition regression.hxx:123
void transpose(const MultiArrayView< 2, T, C1 > &v, MultiArrayView< 2, T, C2 > &r)
Definition matrix.hxx:965
bool leastSquares(MultiArrayView< 2, T, C1 > const &A, MultiArrayView< 2, T, C2 > const &b, MultiArrayView< 2, T, C3 > &x, std::string method="QR")
Definition regression.hxx:81
bool linearSolveUpperTriangular(const MultiArrayView< 2, T, C1 > &r, const MultiArrayView< 2, T, C2 > &b, MultiArrayView< 2, T, C3 > x)
Definition linear_solve.hxx:1068
MultiArrayIndex columnCount(const MultiArrayView< 2, T, C > &x)
Definition matrix.hxx:684
int argMaxIf(MultiArrayView< 2, T, C > const &a, UnaryFunctor condition)
Find the index of the maximum element in a matrix subject to a condition.
Definition matrix.hxx:2056
linalg::TemporaryMatrix< T > sq(MultiArrayView< 2, T, C > const &v)
int argMax(MultiArrayView< 2, T, C > const &a)
Find the index of the maximum element in a matrix.
Definition matrix.hxx:1986
int argMin(MultiArrayView< 2, T, C > const &a)
Find the index of the minimum element in a matrix.
Definition matrix.hxx:1953
bool ridgeRegressionSeries(MultiArrayView< 2, T, C1 > const &A, MultiArrayView< 2, T, C2 > const &b, MultiArrayView< 2, T, C3 > &x, Array const &lambda)
Definition regression.hxx:304
bool ridgeRegression(MultiArrayView< 2, T, C1 > const &A, MultiArrayView< 2, T, C2 > const &b, MultiArrayView< 2, T, C3 > &x, double lambda)
Definition regression.hxx:181
linalg::TemporaryMatrix< T > abs(MultiArrayView< 2, T, C > const &v)
MultiArrayIndex rowCount(const MultiArrayView< 2, T, C > &x)
Definition matrix.hxx:671
bool weightedRidgeRegression(MultiArrayView< 2, T, C1 > const &A, MultiArrayView< 2, T, C2 > const &b, MultiArrayView< 2, T, C3 > const &weights, MultiArrayView< 2, T, C4 > &x, double lambda)
Definition regression.hxx:252
bool linearSolve(...)
MultiArrayView< 2, T, C > columnVector(MultiArrayView< 2, T, C > const &m, MultiArrayIndex d)
Definition matrix.hxx:727
linalg::TemporaryMatrix< T > sign(MultiArrayView< 2, T, C > const &v)
std::string tolower(std::string s)
Definition utilities.hxx:96
double gamma(double x)
The gamma function.
Definition mathutil.hxx:1587
void nonlinearLeastSquares(...)
Fit a non-linear model to given data by minimizing least squares loss.
std::ptrdiff_t MultiArrayIndex
Definition multi_fwd.hxx:60

© Ullrich Köthe (ullrich.koethe@iwr.uni-heidelberg.de)
Heidelberg Collaboratory for Image Processing, University of Heidelberg, Germany

html generated using doxygen and Python
vigra 1.12.2