Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
sigpr_utt.cc
1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1996 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* Authors: Paul Taylor and Simon King */
34/* Date : March 1998 */
35/*-----------------------------------------------------------------------*/
36/* Signal processing functions which operate on entire utterances */
37/* */
38/*=======================================================================*/
39
40
41#include "EST_error.h"
42#include "EST_track_aux.h"
43#include "EST_inline_utils.h"
44#include "sigpr/EST_fft.h"
45#include "sigpr/EST_sigpr_frame.h"
46#include "sigpr/EST_sigpr_utt.h"
47
48#include "EST_Features.h"
49#include "EST_types.h"
50#include "EST_string_aux.h"
51
52void sigpr_acc(EST_Wave &sig, EST_Track &fv, EST_Features &op,
53 const EST_StrList &slist);
54
55void sigpr_delta(EST_Wave &sig, EST_Track &fv, EST_Features &op,
56 const EST_StrList &slist);
57
58
59
60static void parse_op_settings(EST_Features &op, EST_WindowFunc *&wf, float &f)
61{
62 EST_String w_name;
63
64 if (op.present("window_type"))
65 w_name = op.S("window_type");
66 else
67 w_name = DEFAULT_WINDOW_NAME;
68 wf = EST_Window::creator(w_name);
69
70 f = op.present("frame_factor") ? op.F("frame_factor")
71 : DEFAULT_FRAME_FACTOR;
72}
73
74void add_channels_to_map(EST_StrList &map, EST_StrList &types,
75 EST_Features &op, int delta_order)
76{
77 EST_String t;
78 EST_String dos;
79
80 if (delta_order == 0)
81 dos = "";
82 else if (delta_order == 1)
83 dos = "_d";
84 else if (delta_order == 2)
85 dos = "_a";
86 else
87 EST_error("Requested delta order too high: %d\n", delta_order);
88
89
90
91 for (EST_Litem *s = types.head(); s; s = s->next())
92 {
93 t = types(s);
94 if (op.present(t + "_order"))
95 {
96 int actual_order = op.I(t + "_order");
97 if(actual_order < 1)
98 {
99 cerr << "Invalid " << t << "_order" << " : ";
100 cerr << actual_order;
101 cerr << " (using 1 instead) " << endl;
102 actual_order = 1;
103 }
104
105 int lowest_coef=0,highest_coef=actual_order-1;
106
107 if(t == "lpc")
108 // For lpc coefficients, we ALWAYS include energy as the
109 // 0th coefficient, so when the users gives lpc_order of
110 // 16, we produce 17 coefficients (0 to 16)
111 highest_coef=actual_order;
112
113
114 if(t == "melcep")
115 {
116 // Mel cepstra have special names - if we are not
117 // including c0, then the coefficients are numbered
118 // 1...order, and NOT 0...order-1
119 highest_coef=actual_order;
120 if(op.present("include_c0"))
121 lowest_coef = 0;
122 else
123 lowest_coef = 1;
124 }
125
126 if(actual_order == 1)
127 map.append(t + dos);
128 else
129 map.append("$" + t + dos + "-"+itoString(lowest_coef)+"+"+itoString(highest_coef));
130 }
131 else
132 map.append(t + dos);
133 }
134}
135
136void sigpr_base(EST_Wave &sig, EST_Track &fv, EST_Features &op,
137 const EST_StrList &slist)
138{
139 EST_Track fill, tmp;
140 EST_String b_name;
141 EST_String k;
142 float frame_factor;
143 EST_WindowFunc *wf;
144
145 int fbank_order;
146 float liftering_parameter=0;
147 bool use_power_rather_than_energy=false, take_logs=true, include_c0=false;
148
149 parse_op_settings(op, wf, frame_factor);
150
151 for (EST_Litem *s = slist.head(); s; s = s->next())
152 {
153 k = slist(s);
154
155 EST_String start_channel="0";
156 if( (slist(s) == "melcep") && !op.present("include_c0"))
157 start_channel = "1";
158
159 if (fv.has_channel(k))
160 fv.sub_track(fill, 0, EST_ALL, k , 1);
161 else
162 fv.sub_track(fill, 0, EST_ALL, k + "_" + start_channel, k + "_N");
163
164 if(op.present("usepower"))
165 cerr << "USING POWER" << endl;
166
167 if ((slist(s) == "lpc") || (slist(s) == "cep")
168 ||(slist(s) == "ref") || (slist(s) == "lsf"))
169 sig2coef(sig, fill, slist(s), frame_factor, wf);
170 else if (slist(s) == "power")
171 power(sig, fill, frame_factor);
172 else if (slist(s) == "energy")
173 energy(sig, fill, frame_factor);
174 else if (slist(s) == "f0")
175 {
176 op.set("srpd_resize", 0);
177 op.set("pda_frame_shift", op.F("frame_shift"));
178 pda(sig, fill, op, "srpd");
179 }
180// else if (slist(s) == "rasta")
181// rasta(sig, fill, op);
182
183 else if (slist(s) == "fbank")
184 {
185 use_power_rather_than_energy = op.present("usepower");
186 fbank(sig, fill, frame_factor, wf, use_power_rather_than_energy,
187 take_logs);
188 }
189
190 else if (slist(s) == "melcep")
191 {
192 fbank_order=op.I("fbank_order");
193 use_power_rather_than_energy = op.present("usepower");
194 include_c0=op.present("include_c0");
195
196 if(op.present("lifter"))
197 liftering_parameter=op.F("lifter");
198
199 //cerr << "calling melcep " << fill.num_channels() << endl;
200
201 melcep(sig, fill, frame_factor, fbank_order,
202 liftering_parameter, wf, include_c0,
203 use_power_rather_than_energy);
204 }
205 else
206 EST_error("Error: Unnknown type of processing requested: %s\n",
207 ((const char*) slist(s)));
208 }
209}
210
211void sigpr_delta(EST_Wave &sig, EST_Track &fv, EST_Features &op,
212 const EST_String &k)
213{
214 EST_Track base, fill;
215
216// cout << "type: " << k << endl;
217
218 // look to see if base coefficients already exist
219 EST_String start_channel="0";
220 if( (k == "melcep") && !op.present("include_c0"))
221 start_channel = "1";
222
223 if (fv.has_channel(k))
224 fv.sub_track(base, 0, EST_ALL, k , 1);
225 else if (fv.has_channel(k + "_" + start_channel))
226 fv.sub_track(base, 0, EST_ALL, k + "_" + start_channel, k + "_N");
227 else // otherwise make them in temporary track
228 {
229// cout << "making tmp cpoefs\n";
230 EST_StrList tmp_base, tmp_map;
231 tmp_base.append(k);
232 add_channels_to_map(tmp_map, tmp_base, op, 0);
233 base.resize(fv.num_frames(), tmp_map);
234
235 base.fill_time(fv);
236
237 base.set_equal_space(false);
238 sigpr_base(sig, base, op, tmp_base);
239// cout << "BASE\n" << base;
240// cout <<"after\n";
241 }
242
243 if (fv.has_channel(k + "_d"))
244 fv.sub_track(fill, 0, EST_ALL, k+"_d", 1);
245 else
246 fv.sub_track(fill, 0, EST_ALL, k+"_d_" + start_channel, k+"_d_N");
247
248/* cout << "base\n";
249 track_info(base);
250 cout << "fill\n";
251 track_info(fill);
252*/
253
254 delta(base, fill);
255}
256
257void sigpr_acc(EST_Wave &sig, EST_Track &fv, EST_Features &op,
258 const EST_String &k)
259{
260 EST_Track base, fill;
261
262// cout << endl << endl << "acc\n";
263
264// cout << "type: " << k << endl;
265
266 // look to see if delta coefficients already exist
267 EST_String start_channel="0";
268 if( (k == "melcep") && !op.present("include_c0"))
269 start_channel = "1";
270 if (fv.has_channel(k+"_d"))
271 fv.sub_track(base, 0, EST_ALL, k + "_d", 1);
272 else if (fv.has_channel(k + "_d_" + start_channel))
273 fv.sub_track(base, 0, EST_ALL, k + "_d_" + start_channel, k + "_d_N");
274 else // otherwise make them in temporary track
275 {
276 EST_StrList tmp_base, tmp_map;
277 tmp_base.append(k);
278 add_channels_to_map(tmp_map, tmp_base, op, 1);
279 base.resize(fv.num_frames(), tmp_map);
280
281 base.fill_time(fv);
282
283 base.set_equal_space(false);
284 sigpr_delta(sig, base, op, tmp_base);
285 }
286
287 if (fv.has_channel(k + "_a"))
288 fv.sub_track(fill, 0, EST_ALL, k+"_a", 1);
289 else
290 fv.sub_track(fill, 0, EST_ALL, k+"_a_" + start_channel, k+"_a_N");
291
292// cout << "base\n";
293// track_info(base);
294// cout << "fill\n";
295// track_info(fill);
296
297 delta(base, fill);
298}
299
300void sigpr_acc(EST_Wave &sig, EST_Track &fv, EST_Features &op,
301 const EST_StrList &slist)
302{
303 for (EST_Litem *s = slist.head(); s; s = s->next())
304 sigpr_acc(sig, fv, op, slist(s));
305}
306
307void sigpr_delta(EST_Wave &sig, EST_Track &fv, EST_Features &op,
308 const EST_StrList &slist)
309{
310 for (EST_Litem *s = slist.head(); s; s = s->next())
311 sigpr_delta(sig, fv, op, slist(s));
312}
313
314
315int get_frame_size(EST_Track &pms,
316 int i, int sample_rate, int prefer_prev)
317{
318 int prev = -1;
319 int next = -1;
320
321 if (i>0)
322 prev = irint((pms.t(i) - pms.t(i-1))*sample_rate);
323 if (i<pms.num_frames()-1)
324 next = irint((pms.t(i+1) - pms.t(i))*sample_rate);
325
326 if (prefer_prev)
327 return prev>=0?prev:(next>=0?next:0);
328 return next>=0?next:(prev>=0?prev:0);
329}
330
331float get_time_frame_size(EST_Track &pms, int i, int prefer_prev)
332{
333 float prev = -1;
334 float next = -1;
335
336 if (i > 0)
337 prev = pms.t(i) - pms.t(i-1);
338 if (i < pms.num_frames() -1)
339 next = pms.t(i+1) - pms.t(i);
340
341 if (prefer_prev)
342 return prev>=0 ? prev: (next>=0 ? next : 0.0);
343 return next>=0 ? next: (prev>=0 ? prev : 0.0);
344}
345
346/*void sig2lpc(EST_Wave &sig, EST_Track &lpc, EST_WindowFunc *wf, float factor)
347{
348 int order = lpc.num_channels() - 1;
349 EST_FVector coefs(order + 1);
350 int k;
351 int window_start, window_size, length; // can be merged with window_size
352
353 int sample_rate = sig.sample_rate();
354
355 EST_FVector frame;
356
357 for (k = 0; k < lpc.num_frames(); ++k)
358 {
359 int pos = irint(lpc.t(k) * sample_rate);
360
361 length = get_local_frame_size(lpc, k, sig.sample_rate());
362 window_size = irint(length * factor);
363 window_start = pos - (window_size/2);
364
365 EST_Window::window_signal(sig, wf, window_start,
366 window_size, frame, 1);
367
368 lpc.frame(coefs, k);
369 sig2lpc(frame, coefs);
370 }
371 lpc.save("test.est", "est");
372}
373*/
374
375/*typedef void EST_FrameFunc(const EST_FVector &in_frame,
376 EST_FVector &out_frame);
377
378void sig2coef(EST_Wave &sig, EST_Track &lpc, EST_WindowFunc *wf,
379 EST_FrameFunc *ff, float factor)
380{
381 EST_FVector coefs, frame;
382 int start, size;
383
384 for (int k = 0; k < lpc.num_frames(); ++k)
385 {
386 size = irint(get_local_frame_size(lpc, k, sig.sample_rate())* factor);
387 start = (irint(lpc.t(k) * sig.sample_rate()) - (size/2));
388
389 EST_Window::window_signal(sig, wf, start, size, frame, 1);
390
391 lpc.frame(coefs, k);
392 (*ff)(frame, coefs);
393 }
394}
395*/
396
397void sig2coef(EST_Wave &sig, EST_Track &tr, EST_String type,
398 float factor, EST_WindowFunc *wf)
399{
400 EST_FVector coefs, frame;
401 int start, size;
402
403// cout << "TYPE IS " << type << endl;
404
405 for (int k = 0; k < tr.num_frames(); ++k)
406 {
407 if (factor < 0) // want fixed frame rate
408 size = (int)(-1.0 * factor * (float)sig.sample_rate());
409 else
410 size = irint(get_frame_size(tr, k, sig.sample_rate())* factor);
411 start = (irint(tr.t(k) * sig.sample_rate()) - (size/2));
412
413 EST_Window::window_signal(sig, wf, start, size, frame, 1);
414
415 tr.frame(coefs, k);
416 frame_convert(frame, "sig", coefs, type);
417 }
418}
419
420void power(EST_Wave &sig, EST_Track &pow, float factor)
421{
422 EST_FVector frame;
423 int window_start, window_size, pos, k;
424
425 EST_WindowFunc *wf = EST_Window::creator("rectangular");
426
427 for (k = 0; k < pow.num_frames(); ++k)
428 {
429 pos = irint(pow.t(k) * sig.sample_rate());
430 if (factor < 0) // want fixed frame rate
431 window_size = (int)(-1.0 * factor * (float)sig.sample_rate());
432 else
433 window_size = irint(get_frame_size(pow, k, sig.sample_rate())
434 * factor);
435 window_start = pos - window_size/2;
436 EST_Window::window_signal(sig, wf, window_start, window_size,frame, 1);
437
438 sig2pow(frame, pow.a(k));
439 }
440}
441
442void energy(EST_Wave &sig, EST_Track &pow, float factor)
443{
444 EST_FVector frame;
445 int window_start, window_size, pos, k;
446
447 EST_WindowFunc *wf = EST_Window::creator("rectangular");
448
449 for (k = 0; k < pow.num_frames(); ++k)
450 {
451 pos = irint(pow.t(k) * sig.sample_rate());
452 if (factor < 0) // want fixed frame rate
453 window_size = (int)(-1.0 * factor * (float)sig.sample_rate());
454 else
455 window_size = irint(get_frame_size(pow, k, sig.sample_rate())
456 * factor);
457 window_start = pos - window_size/2;
458 EST_Window::window_signal(sig, wf, window_start, window_size,frame,1);
459
460 sig2rms(frame, pow.a(k));
461 }
462}
463
464static EST_String determine_type(const EST_String &intype)
465{
466 return (intype.contains("_") ? intype.before("_"): intype);
467}
468
469void convert_track(EST_Track &in_track, EST_Track &out_track,
470 const EST_String &out_type, const EST_String &in_type)
471{
472 if (in_track.num_frames() != out_track.num_frames())
473 EST_error("In track has %d frames, out track has %d\n",
474 in_track.num_frames(), out_track.num_frames());
475
476 EST_String tmp;
477 tmp = ((in_type == "") ? determine_type(in_track.channel_name(0)):in_type);
478
479 EST_FVector in_frame(in_track.num_channels());
480 EST_FVector out_frame(out_track.num_channels());
481
482 for (int i = 0; i < in_track.num_frames(); ++i)
483 {
484 in_track.frame(in_frame, i);
485 out_track.frame(out_frame, i);
486 frame_convert(in_frame, tmp, out_frame, out_type);
487 }
488}
489
490
491
492void fbank(EST_Wave &sig,
493 EST_Track &fbank_track,
494 const float factor,
495 EST_WindowFunc *wf,
496 const bool use_power_rather_than_energy,
497 const bool take_log)
498{
499
500 // still to add : high/low pass filtering
501
502 int window_start, window_size, pos, k;
503 EST_FVector frame,fbank_frame;
504
505 // get_order(...) gives wrong answer ... Paul ?
506 int fbank_order = fbank_track.num_channels();
507
508 // sanity check
509 if(fbank_order < 1)
510 {
511 EST_error("Filterbank order of %i makes no sense.\n",fbank_order);
512 return;
513 }
514
515 for (k = 0; k < fbank_track.num_frames(); ++k)
516 {
517 if (factor < 0) // want fixed frame rate
518 window_size = (int)(-1.0 * factor * (float)sig.sample_rate());
519 else
520 window_size = irint(get_frame_size(fbank_track, k, sig.sample_rate())
521 * factor);
522 pos = irint(fbank_track.t(k) * sig.sample_rate());
523 window_start = pos - window_size/2;
524 EST_Window::window_signal(sig, wf, window_start, window_size,frame, 1);
525
526 fbank_track.frame(fbank_frame,k);
527 sig2fbank(frame,fbank_frame,sig.sample_rate(),
528 use_power_rather_than_energy,take_log);
529
530 }
531
532
533}
534
535
536void melcep(EST_Wave &sig, EST_Track &mfcc_track,
537 float factor,
538 int fbank_order,
539 float liftering_parameter,
540 EST_WindowFunc *wf,
541 const bool include_c0,
542 const bool use_power_rather_than_energy)
543{
544
545 EST_FVector frame,mfcc_frame,fbank_frame;
546 int k;
547
548 // first, do filterbank analysis
549 // need a temporary track, with the same setup as mfcc_track
550 EST_Track fbank_track;
551
552// cout << "MELPCEP\n" << fbank_order << endl;
553
554 fbank_track.resize(mfcc_track.num_frames(), fbank_order);
555 fbank_track.fill_time(mfcc_track);
556 fbank_track.set_equal_space(false);
557
558 // temp removed by pault 24/02/99
559// make_timed_track(mfcc_track, fbank_track, "filter", fbank_order, 0);
560
561 // 'true' makes fbank(...) take logs
562 fbank(sig, fbank_track, factor, wf, use_power_rather_than_energy, true);
563
564 /*
565 if(include_c0)
566 cerr << "melcep c0" << endl;
567 else
568 cerr << "melcep no c0" << endl;
569 */
570 for (k = 0; k < mfcc_track.num_frames(); ++k)
571 {
572
573 mfcc_track.frame(mfcc_frame,k);
574 fbank_track.frame(fbank_frame,k);
575
576 fbank2melcep(fbank_frame, mfcc_frame,liftering_parameter,include_c0);
577 }
578}
void set(const EST_String &name, int ival)
const float F(const EST_String &path) const
const EST_String S(const EST_String &path) const
int present(const EST_String &name) const
const int I(const EST_String &path) const
EST_String before(int pos, int len=0) const
Part before position.
Definition EST_String.h:286
int contains(const char *s, int pos=-1) const
Does it contain this substring?
Definition EST_String.h:375
void append(const T &item)
add item onto end of list
Definition EST_TList.h:191
float & t(int i=0)
return time position of frame i
Definition EST_Track.h:477
float & a(int i, int c=0)
void sub_track(EST_Track &st, int start_frame=0, int nframes=EST_ALL, int start_chan=0, int nchans=EST_ALL)
bool has_channel(const char *name) const
Definition EST_Track.h:384
int num_channels() const
return number of channels in track
Definition EST_Track.h:656
int num_frames() const
return number of frames in track
Definition EST_Track.h:650
void resize(int num_frames, int num_channels, bool preserve=1)
Definition EST_Track.cc:211
void fill_time(float t, int start=1)
Definition EST_Track.cc:786
void frame(EST_FVector &fv, int n, int startf=0, int nf=EST_ALL)
Definition EST_Track.h:209
int sample_rate() const
return the sampling rate (frequency)
Definition EST_Wave.h:147
static void window_signal(const EST_Wave &sig, EST_WindowFunc *make_window, int start, int size, EST_TBuffer< float > &frame)
static Func * creator(const char *name, bool report_error=false)
Return the creation function for the given window type.