Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
tilt_analysis_main.cc
1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1995,1996 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* Authors: Paul Taylor */
34/* Date : Oct 95 */
35/*-----------------------------------------------------------------------*/
36/* Event RFC and Tilt labelling */
37/* */
38/*=======================================================================*/
39
40#include <cstdlib>
41#include "EST_tilt.h"
42#include "sigpr/EST_sigpr_utt.h"
43#include "EST_cmd_line_options.h"
44#include "ling_class/EST_relation_aux.h"
45#include "EST_string_aux.h"
46
47#define SIL_NAMES "sil !ENTER !EXIT"
48#define EVENT_NAMES "a rb arb m mrb"
49
50void set_fn_start(EST_Relation &ev);
51void default_rfc_params(EST_Features &op);
52void override_rfc_params(EST_Features &rfc, EST_Option &al);
53void rfc_analysis(EST_Track &fz, EST_Relation &ev, EST_Features &op);
54void change_label(EST_Relation &seg, const EST_StrList &oname,
55 const EST_String &nname);
56
57void set_options(EST_Option &al, EST_Features &op);
58
59void option_override(EST_Features &op, EST_Option al,
60 const EST_String &option, const EST_String &arg);
61
62
63
64/** @name <command>tilt_analysis</command> <emphasis>Produce tilt descriptions from F0 contours</emphasis>
65 * @id tilt_analysis-manual
66 * @toc
67 */
68
69//@{
70
71void extract_channels(EST_Wave &single, const EST_Wave &multi, EST_IList &ch_list);
72
73/**@name Synopsis
74 */
75//@{
76
77//@synopsis
78
79/**
80tilt_analysis produces a Tilt or RFC analysis of a F0 contour, given a set
81label file containing a set of approximate intonational event boundaries.
82
83A detailed description of the Tilt intonation model can be found in the
84<link linkend="tilt-overview">Tilt model overview</link> section.
85
86*/
87
88//@}
89
90/**@name OPTIONS
91 */
92//@{
93
94//@options
95
96//@}
97
98
99
100int main(int argc, char *argv[])
101{
102 EST_Track fz, nfz;
103 EST_Relation ev;
104 EST_Option al;
105 EST_Features op;
106 EST_StrList files, event_list, sil_list;
107 EST_String out_file, pstring;
108 EST_Track speech, raw_fz;
109 EST_Relation sil_lab;
110 EST_Features rfc_op;
111
112 parse_command_line
113 (argc, argv,
114 EST_String("[input f0 file] -e [input event label file] -o [output file]"
115 "[options]")+
116 "Summary: produce rfc file from events and f0 contour\n"
117 "use \"-\" to make input and output files stdin/out\n"
118 "-h Options help\n\n"+
119 options_track_input()+ "\n"
120 "-event_names <string> List of labels to be classed as events. \n"
121 " Lists are specified as quoted strings with spaces \n"
122 " separating each item, e.g.: \"a b c d\"\n\n"
123 "-sil_names <string> List of labels to be classed as silence \n"
124 " Lists are specified as quoted strings with spaces \n"
125 " separating each item, e.g.: \"pau sil #\"\n\n"
126 "-e <ifile> Input event label file. This file contains \n"
127 " the list of events to be parameterized, each with its approximate \n"
128 " start and stop time marked. This file also contains silencesn \n"
129 " which are used to decide where to insert and stop phrases \n\n"
130 "-o <ofile> Output label file\n\n"
131 "-otype <string> File type of output file \n\n"
132 "-limit <float> start and stop limit in seconds. The rfc \n"
133 " matching algorithm defines a search region within which it tries \n"
134 " all possible rise and fall shapes. This option specifies how much \n"
135 " before the input label start time and how much after the input \n"
136 " label end time the search region should be. Typical value, 0.1 \n\n"
137 "-range <float> Range of RFC search region. In addition to \n"
138 " the limit, the range defines the limits of the rfc matching \n"
139 " search region as a percentage of the overall input label \n"
140 " duration. Typical value, 0.25 (the search region is the first and \n"
141 " last 25% of the label) \n\n"
142 "-smooth Smooth and Interpolate input F0 contour. \n"
143 " rfc matching can only operate on smooth fully interpolated \n"
144 " contours. This option must be used if the contour hasn't already \n"
145 " been smoothed and interpolated\n\n"
146 "-w1 <float> length in seconds of smoothing window prior\n"
147 " to interpolation. Default value 0.05 \n\n"
148 "-w2 <float> length in seconds of smoothing window after\n"
149 " to interpolation. Default value 0.05 \n\n"
150 "-sf0 <ofile> Save f0 contour that results from smoothing \n"
151 "-rfc Save as RFC parameters instead of tilt\n\n",
152 files, al);
153
154 default_rfc_params(rfc_op);
155 override_rfc_params(rfc_op, al);
156 set_options(al, op);
157
158 out_file = al.present("-o") ? al.val("-o") : (EST_String)"-";
159
160 if (read_track(nfz, files.first(), al) == -1)
161 exit(-1);
162 // REORG - extract proper f0 channel here
163 nfz.copy_sub_track(fz, 0, EST_ALL, 0, 1);
164
165 if (ev.load(al.val("-e")) != format_ok)
166 exit(-1);
167
168 pstring = (al.present("-event_names") ? al.val("-event_names"):
169 EST_String("a b ab pos"));
170 StringtoStrList(pstring, event_list);
171 convert_to_broad(ev, event_list, "int_event", 1);
172
173 // ensure all sil_names are re-written as sil
174 pstring = (al.present("-sil_names") ? al.val("-sil_names"):
175 EST_String(SIL_NAMES));
176 StringtoStrList(pstring, sil_list);
177 change_label(ev, sil_list, "sil");
178
179 if (al.present("-smooth"))
180 {
181 sil_lab = ev;
182 StringtoStrList("sil", sil_list);
183 convert_to_broad(sil_lab, sil_list, "pos", 0);
184 label_to_track(sil_lab, speech, fz.shift());
185 raw_fz = fz;
186 smooth_phrase(raw_fz, speech, op, fz);
187 }
188
189 if (al.present("-sf0"))
190 fz.save(al.val("-sf0"));
191
192 ev.f.set("name", "intevents");
193 ev.f.set("timing_style", "segment");
194
195// set_fn_start(ev);
196
197 // main RFC analysis function
198 rfc_analysis(fz, ev, rfc_op);
199
200 // convert to Tilt if necessary
201 if (!al.present("-rfc"))
202 {
203 rfc_to_tilt(ev);
204 ev.remove_item_feature("rfc");
205 }
206
207 ev.save(out_file);
208}
209
210/** @name Input Intonation Files
211
212A label file containing approximate intonational event boundaries must
213be given as input. A typical file in xlabel format is shown below:
214</para>
215<para>
216<screen>
217 0.290 146 sil
218 0.480 146 c
219 0.620 146 a
220 0.760 146 c
221 0.960 146 a
222 1.480 146 c
223 1.680 146 a
224 1.790 146 sil
225</screen>
226</para>
227<para>
228The set of intonational events can be given on the command line with
229the -event_names option. The default set is "a rb arb m mrb" and so
230the above example would not need the -event_names option. The label
231"c" (connection) is to separate events, in effect giving each event a
232start time as well as a end time. The silence labels are important
233also: they specify where phrases should start and end.
234*/
235
236//@{
237//@}
238
239/** @name Input F0 Files
240
241tilt_analysis can operate on all the F0 file types supported by the
242EST library. Tilt analysis can only operate on smooth and continuous
243F0 contours.(i.e. F0 values must be defined during unvoiced
244regons). If the input contour is not in this format, use the -smooth
245option. The -w1 and -w2 options can be used to control the amount of
246smoothing. The smoothed version of the input contour can be examined
247by saving it using the -sf0 option.
248
249*/
250
251//@{
252//@}
253
254/** @name Output Intonation Files
255
256The output will be a label file containing the tilt parameters for the
257events in feature format. An example, in xlabel format, is shown below:
258</para>
259<para>
260<screen>
261intonation_style tilt
262#
2630.29 26 phrase_start ; ev.f0 115.234 ; time 0.29 ;
2640.53 26 a ; int_event 1 ; ev.f0 118.171 ; time 0.53 ; tilt.amp 21.8602 ;
265 tilt.dur 0.26 ; tilt.tilt -0.163727 ;
2660.77 26 a ; int_event 1 ; ev.f0 112.694 ; time 0.77 ; tilt.amp 27.0315 ;
267 tilt.dur 0.32 ; tilt.tilt -0.446791 ;
2681.53 26 a ; int_event 1 ; ev.f0 100.83 ; time 1.53 ; tilt.amp 7.507 ;
269 tilt.dur 0.22 ; tilt.tilt -0.296317 ;
2701.79 26 phrase_end ; ev.f0 92.9785 ; time 1.79 ;
271</screen>
272</para>
273<para>
274The -rfc option will make a file containing the RFC parameters instead:
275</para>
276<para>
277<screen>
278intonation_style rfc
279#
2800.29 26 phrase_start ; ev.f0 115.234 ; time 0.29 ;
2810.53 26 a ; ev.f0 118.171 ; rfc.rise_amp 8.19178 ; rfc.rise_dur 0.12 ;
282 rfc.fall_amp -13.6684 ; rfc.fall_dur 0.14 ; time 0.53 ;
283 0.77 26 a ; ev.f0 112.694 ; rfc.rise_amp 6.50673 ; rfc.rise_dur 0.1 ;
284 rfc.fall_amp -20.5248 ; rfc.fall_dur 0.22 ; time 0.77 ;
2851.53 26 a ; ev.f0 100.83 ; rfc.rise_amp 1.55832 ; rfc.rise_dur 0.11 ;
286 rfc.fall_amp -6.09238 ; rfc.fall_dur 0.11 ; time 1.53 ;
2871.79 26 phrase_end ; ev.f0 92.9785 ; time 1.79 ;
288</screen>
289</para>
290<para>
291The feature in the header, "intonation_style tilt" or
292"intonation_style rfc" is needed for the tilt_synthesis program to
293work.
294
295*/
296
297//@{
298//@}
299
300//@}
301
302
303void override_rfc_params(EST_Features &rfc, EST_Option &al)
304{
305 if (al.present("-limit"))
306 {
307 rfc.set("start_limit", al.fval("-limit"));
308 rfc.set("stop_limit", al.fval("-limit", 0));
309 }
310 if (al.present("-range"))
311 rfc.set("range", al.fval("-range"));
312 if (al.present("-min_dur"))
313 rfc.set("min_event_duration", al.fval("-min_dur"));
314}
315
316void set_options(EST_Option &al, EST_Features &op)
317{
318 // Nobody else has set window_length or second_length so
319 // set defaults here
320 op.set("window_length",0.05);
321 op.set("second_length",0.05);
322 option_override(op, al, "window_length", "-w1");
323 option_override(op, al, "second_length", "-w2");
324}
void set(const EST_String &name, int ival)
float fval(const EST_String &rkey, int m=1) const
Definition EST_Option.cc:98
void remove_item_feature(const EST_String &name)
EST_Features f
EST_read_status load(const EST_String &filename, const EST_String &type="esps")
EST_write_status save(const EST_String &filename, bool evaluate_ff=false) const
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition EST_TKVL.cc:145
const int present(const K &rkey) const
Returns true if key is present.
Definition EST_TKVL.cc:222
const T & first() const
return const reference to first item in list
Definition EST_TList.h:146
void copy_sub_track(EST_Track &st, int start_frame=0, int nframes=EST_ALL, int start_chan=0, int nchans=EST_ALL) const
EST_write_status save(const EST_String name, const EST_String EST_filetype="")
float shift() const
Definition EST_Track.cc:599