Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
EST_SCFG.cc
1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1997 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* Author : Alan W Black */
34/* Date : October 1997 */
35/*-----------------------------------------------------------------------*/
36/* */
37/* A class for representing Stochastic Context Free Grammars */
38/* */
39/*=======================================================================*/
40#include <iostream>
41#include "EST_Pathname.h"
42#include "EST_SCFG.h"
43
44EST_SCFG_Rule::EST_SCFG_Rule(double prob,int p, int m)
45{
46 set_rule(prob,p,m);
47}
48
49EST_SCFG_Rule::EST_SCFG_Rule(double prob,int p, int q, int r)
50{
51 set_rule(prob,p,q,r);
52}
53
54void EST_SCFG_Rule::set_rule(double prob,int p, int m)
55{
56 p_prob = prob;
57 p_mother = p;
58 p_daughter1 = m;
59 p_type = est_scfg_unary_rule;
60}
61
62void EST_SCFG_Rule::set_rule(double prob,int p, int q, int r)
63{
64 p_prob = prob;
65 p_mother = p;
66 p_daughter1 = q;
67 p_daughter2 = r;
68 p_type = est_scfg_binary_rule;
69}
70
71EST_SCFG::EST_SCFG()
72{
73 p_prob_B=0;
74 p_prob_U=0;
75}
76
77EST_SCFG::EST_SCFG(LISP rs)
78{
79 p_prob_B=0;
80 p_prob_U=0;
81 set_rules(rs);
82}
83
84EST_SCFG::~EST_SCFG(void)
85{
86
87 delete_rule_prob_cache();
88
89}
90
92{
93 // Cummulate the nonterminals and terminals
94 LISP r;
95
96 for (r=rs; r != NIL; r=cdr(r))
97 {
98 LISP p = car(cdr(car(r)));
99 if (!strlist_member(nt,get_c_string(p)))
100 nt.append(get_c_string(p));
101 if (siod_llength(car(r)) == 3) // unary rule
102 {
103 LISP d = car(cdr(cdr(car(r))));
104 if (!strlist_member(t,get_c_string(d)))
105 t.append(get_c_string(d));
106 }
107 else // binary rules
108 {
109 LISP d1 = car(cdr(cdr(car(r))));
110 LISP d2 = car(cdr(cdr(cdr(car(r)))));
111 if (!strlist_member(nt,get_c_string(d1)))
112 nt.append(get_c_string(d1));
113 if (!strlist_member(nt,get_c_string(d2)))
114 nt.append(get_c_string(d2));
115 }
116 }
117
118}
119
120void EST_SCFG::set_rules(LISP lrules)
121{
122 // Initialise rule base from Lisp form
123 LISP r;
124 EST_StrList nt_list, term_list;
125
126 rules.clear();
127 delete_rule_prob_cache();
128
129 find_terms_nonterms(nt_list,term_list,lrules);
130 nonterminals.init(nt_list);
131 terminals.init(term_list);
132
133 if (!consp(car(cdr(car(lrules)))))
134 p_distinguished_symbol =
135 nonterminal(get_c_string(car(cdr(car(lrules)))));
136 else
137 cerr << "SCFG: no distinguished symbol" << endl;
138
139 for (r=lrules; r != NIL; r=cdr(r))
140 {
141 if ((siod_llength(car(r)) < 3) ||
142 (siod_llength(car(r)) > 4) ||
143 (!numberp(car(car(r)))))
144 cerr << "SCFG rule is malformed" << endl;
145// est_error("SCFG rule is malformed\n");
146 else
147 {
148 EST_SCFG_Rule rule;
149 if (siod_llength(car(r)) == 3)
150 {
151 int m = nonterminal(get_c_string(car(cdr(car(r)))));
152 int d = terminal(get_c_string(car(cdr(cdr(car(r))))));
153 rule.set_rule(get_c_float(car(car(r))),m,d);
154 }
155 else
156 {
157 int p = nonterminal(get_c_string(car(cdr(car(r)))));
158 int d1=nonterminal(get_c_string(car(cdr(cdr(car(r))))));
159 int d2 = nonterminal(get_c_string(car(cdr(cdr(cdr(car(r)))))));
160 rule.set_rule(get_c_float(car(car(r))),p,d1,d2);
161 }
162 rules.append(rule);
163 }
164 }
165
166 rule_prob_cache();
167}
168
170{
171 // Return LISP form of rules
172 EST_Litem *p;
173 LISP r;
174
175 for (r=NIL,p=rules.head(); p != 0; p=p->next())
176 {
177 if (rules(p).type() == est_scfg_unary_rule)
178 r = cons(cons(flocons(rules(p).prob()),
179 cons(rintern(nonterminal(rules(p).mother())),
180 cons(rintern(terminal(rules(p).daughter1())),NIL))),
181 r);
182 else if (rules(p).type() == est_scfg_binary_rule)
183 r = cons(cons(flocons(rules(p).prob()),
184 cons(rintern(nonterminal(rules(p).mother())),
185 cons(rintern(nonterminal(rules(p).daughter1())),
186 cons(rintern(nonterminal(rules(p).daughter2())),
187 NIL)))),
188 r);
189 }
190 return reverse(r);
191}
192
193EST_read_status EST_SCFG::load(const EST_String &filename)
194{
195 LISP rs;
196
197 rs = vload(filename,1);
198
199 set_rules(rs);
200
201 return format_ok;
202}
203
204EST_write_status EST_SCFG::save(const EST_String &filename)
205{
206 EST_Pathname outfile(filename);
207 FILE *fd;
208 LISP r;
209
210 if (outfile == "-")
211 fd = stdout;
212 else
213 {
214 if ((fd=fopen(outfile,"w")) == NULL)
215 {
216 cerr << "scfg_train: failed to open file \"" << outfile <<
217 "\" for writing" << endl;
218 return misc_write_error;
219 }
220 }
221
222 for (r=get_rules(); r != NIL; r=cdr(r))
223 pprint_to_fd(fd,car(r));
224
225 if (fd != stdout)
226 fclose(fd);
227
228 return write_ok;
229}
230
231
232void EST_SCFG::rule_prob_cache()
233{
234 // Build access cache for the probabilities of binary rules
235 // This will have to made much more efficient
236 int i,j;
237
238 p_prob_B = new double**[num_nonterminals()];
239 p_prob_U = new double*[num_nonterminals()];
240 for (i=0; i < num_nonterminals(); i++)
241 {
242 p_prob_B[i] = new double*[num_nonterminals()];
243 p_prob_U[i] = new double[num_terminals()];
244 memset(p_prob_U[i],0,sizeof(double)*num_terminals());
245 for (j=0; j < num_nonterminals(); j++)
246 {
247 p_prob_B[i][j] = new double[num_nonterminals()];
248 memset(p_prob_B[i][j],0,sizeof(double)*num_nonterminals());
249 }
250 }
251
253
254}
255
257{
258 EST_Litem *pp;
259
260 for (pp=rules.head(); pp != 0; pp = pp->next())
261 {
262 if (rules(pp).type() == est_scfg_binary_rule)
263 {
264 int p = rules(pp).mother();
265 int q = rules(pp).daughter1();
266 int r = rules(pp).daughter2();
267 p_prob_B[p][q][r] = rules(pp).prob();
268 }
269 else if (rules(pp).type() == est_scfg_unary_rule)
270 {
271 int p = rules(pp).mother();
272 int m = rules(pp).daughter1();
273 p_prob_U[p][m] = rules(pp).prob();
274 }
275 }
276}
277
278void EST_SCFG::delete_rule_prob_cache()
279{
280 int i,j;
281
282 if (p_prob_B == 0)
283 return;
284
285 for (i=0; i < num_nonterminals(); i++)
286 {
287 for (j=0; j < num_nonterminals(); j++)
288 delete [] p_prob_B[i][j];
289 delete [] p_prob_B[i];
290 delete [] p_prob_U[i];
291 }
292 delete [] p_prob_B;
293 delete [] p_prob_U;
294
295 p_prob_B = 0;
296 p_prob_U = 0;
297}
298
299ostream &operator << (ostream &s, const EST_SCFG_Rule &rule)
300{
301 (void)rule;
302 return s << "<<EST_SCFG_Rule>>";
303}
304
305Declare_TList(EST_SCFG_Rule)
306#if defined(INSTANTIATE_TEMPLATES)
307#include "../base_class/EST_TList.cc"
308#include "../base_class/EST_TSortable.cc"
309
310Instantiate_TList(EST_SCFG_Rule)
311#endif
312
bool init(const EST_StrList &vocab)
(re-)initialise
double prob() const
The rule's probability.
Definition EST_SCFG.h:140
EST_String nonterminal(int p) const
Convert nonterminal index to string form.
Definition EST_SCFG.h:214
int num_nonterminals() const
Number of nonterminals.
Definition EST_SCFG.h:222
int num_terminals() const
Number of terminals.
Definition EST_SCFG.h:224
void find_terms_nonterms(EST_StrList &nt, EST_StrList &t, LISP rules)
Definition EST_SCFG.cc:91
EST_read_status load(const EST_String &filename)
Load grammar from named file.
Definition EST_SCFG.cc:193
LISP get_rules()
Return rules as LISP list.
Definition EST_SCFG.cc:169
void set_rule_prob_cache()
(re-)set rule probability caches
Definition EST_SCFG.cc:256
SCFGRuleList rules
The rules themselves.
Definition EST_SCFG.h:207
EST_write_status save(const EST_String &filename)
Save current grammar to named file.
Definition EST_SCFG.cc:204
EST_String terminal(int m) const
Convert terminal index to string form.
Definition EST_SCFG.h:216
void set_rules(LISP rules)
Set (or reset) rules from external source after construction.
Definition EST_SCFG.cc:120
void clear(void)
remove all items in list
Definition EST_TList.h:239
void append(const T &item)
add item onto end of list
Definition EST_TList.h:191