Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
ols_main.cc
1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1996,1997 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* Author : Alan W Black */
34/* Date : January 1998 */
35/*-----------------------------------------------------------------------*/
36/* Ordinary least squares */
37/* */
38/*=======================================================================*/
39#include <cstdlib>
40#include <iostream>
41#include <fstream>
42#include <cstring>
43#include "EST_Wagon.h"
44#include "EST_multistats.h"
45#include "EST_cmd_line.h"
46
47static void load_ols_data(EST_FMatrix &X, EST_FMatrix &Y, WDataSet &d);
48static int ols_main(int argc, char **argv);
49
50
51/** @name <command>ols</command> <emphasis>Train linear regression model</emphasis>
52 @id ols-manual
53 * @toc
54 */
55
56//@{
57
58
59/**@name Synopsis
60 */
61//@{
62
63//@synopsis
64
65/**
66 */
67
68//@}
69
70/**@name OPTIONS
71 */
72//@{
73
74//@options
75
76//@}
77
78
79int main(int argc, char **argv)
80{
81 return ols_main(argc,argv);
82}
83
84static int ols_main(int argc, char **argv)
85{
86 // Top level function loads in sample data and finds coefficients
87 EST_Option al;
88 EST_StrList files;
89 EST_String ofile = "-";
90 WDataSet dataset,test_dataset;
91 EST_FMatrix coeffs;
92 EST_FMatrix X,Y,Xtest,Ytest;
93 LISP ignores = NIL;
94
95 parse_command_line
96 (argc, argv,
97 EST_String("[options]\n")+
98 "Summary: Linear Regression by ordinary least squares (defaults in {})\n"+
99 "-desc <ifile> Field description file\n"+
100 "-data <ifile> Datafile, one vector per line\n"+
101 "-test <ifile> Datafile, for testing\n"+
102 "-robust Robust, may take longer\n"+
103 "-stepwise Order the features by contribution,\n"+
104 " implies robust.\n"+
105 "-swlimit <float> {0.0}\n"+
106 " Percentage necessary improvement for stepwise\n"+
107 "-quiet No summary\n"+
108 "-o <ofile> \n"+
109 "-output <ofile> Output file for coefficients\n"+
110 "-ignore <string> Filename or bracket list of fields to ignore\n",
111 files, al);
112
113
114 if (al.present("-output"))
115 ofile = al.val("-output");
116 if (al.present("-o"))
117 ofile = al.val("-o");
118
119 siod_init();
120
121 if (al.present("-ignore"))
122 {
123 EST_String ig = al.val("-ignore");
124 if (ig[0] == '(')
125 ignores = read_from_string(ig);
126 else
127 ignores = vload(ig,1);
128 }
129
130 // Load in the data
131 if (!al.present("-desc"))
132 {
133 cerr << "ols: no description file specified\n";
134 return -1;
135 }
136 else
137 {
138 dataset.load_description(al.val("-desc"),ignores);
139 dataset.ignore_non_numbers();
140 }
141 if (!al.present("-data"))
142 {
143 cerr << "ols: no data file specified\n";
144 return -1;
145 }
146 else
147 wgn_load_dataset(dataset,al.val("-data"));
148 if (al.present("-test"))
149 {
150 test_dataset.load_description(al.val("-desc"),ignores);
151 test_dataset.ignore_non_numbers();
152 wgn_load_dataset(test_dataset,al.val("-test"));
153 load_ols_data(Xtest,Ytest,test_dataset);
154 }
155 else
156 // No test data specified so use training data
157 load_ols_data(Xtest,Ytest,dataset);
158
159 load_ols_data(X,Y,dataset);
160
161 if (al.present("-stepwise"))
162 {
163 EST_StrList names;
164 float swlimit = al.fval("-swlimit");
165 EST_IVector included;
166 int i;
167
168 names.append("Intercept");
169 for (i=1; i < dataset.width(); i++)
170 names.append(dataset.feat_name(i));
171
172 included.resize(X.num_columns());
173 included[0] = TRUE; // always guarantee interceptor
174 for (i=1; i<included.length(); i++)
175 {
176 if (dataset.ignore(i) == TRUE)
177 included.a_no_check(i) = OLS_IGNORE;
178 else
179 included.a_no_check(i) = FALSE;
180 }
181
182 if (!stepwise_ols(X,Y,names,swlimit,coeffs,Xtest,Ytest,included))
183 {
184 cerr << "OLS: failed stepwise ols" << endl;
185 return -1;
186 }
187 }
188 else if (al.present("-robust"))
189 {
190 EST_IVector included;
191 int i;
192
193 included.resize(X.num_columns());
194 included[0] = TRUE; // always guarantee interceptor
195 for (i=1; i<included.length(); i++)
196 {
197 if (dataset.ignore(i) == TRUE)
198 included.a_no_check(i) = OLS_IGNORE;
199 else
200 included.a_no_check(i) = TRUE;
201 }
202
203 if (!robust_ols(X,Y,included,coeffs))
204 {
205 cerr << "OLS: failed robust ols" << endl;
206 return -1;
207 }
208 }
209 else if (!ols(X,Y,coeffs))
210 {
211 cerr << "OLS: failed no pseudo_inverse" << endl;
212 return -1;
213 }
214
215 if (coeffs.save(ofile) != write_ok)
216 {
217 cerr << "OLS: failed to save coefficients in \"" << ofile << "\""
218 << endl;
219 return -1;
220 }
221
222 if (!al.present("-quiet"))
223 {
224 EST_FMatrix pred;
225 float cor,rmse;
226
227 ols_apply(Xtest,coeffs,pred);
228 ols_test(Ytest,pred,cor,rmse);
229
230 printf(";; RMSE %f Correlation is %f\n",rmse,cor);
231 }
232
233 return 0;
234}
235
236static void load_ols_data(EST_FMatrix &X, EST_FMatrix &Y, WDataSet &d)
237{
238 EST_Litem *p;
239 int n,m;
240
241 X.resize(d.length(),d.width());
242 Y.resize(d.length(),1);
243
244 for (n=0,p=d.head(); p != 0; p=p->next(),n++)
245 {
246 Y.a_no_check(n,0) = d(p)->get_flt_val(0);
247 X.a_no_check(n,0) = 1;
248 for (m=1; m < d.width(); m++)
249 {
250 if (d.ignore(m))
251 {
252 X.a_no_check(n,m) = 0;
253 }
254 else
255 X.a_no_check(n,m) = d(p)->get_flt_val(m);
256 }
257 }
258
259}
EST_write_status save(const EST_String &filename, const EST_String &type=EST_FMatrix::default_file_type)
Save in file (ascii or binary)
float fval(const EST_String &rkey, int m=1) const
Definition EST_Option.cc:98
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition EST_TKVL.cc:145
const int present(const K &rkey) const
Returns true if key is present.
Definition EST_TKVL.cc:222
void append(const T &item)
add item onto end of list
Definition EST_TList.h:191
INLINE const T & a_no_check(int row, int col) const
const access with no bounds check, care recommend
void resize(int rows, int cols, int set=1)
resize matrix
void resize(int n, int set=1)
resize vector
INLINE int length() const
number of items in vector.
INLINE const T & a_no_check(int n) const
read-only const access operator: without bounds checking