Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
wfst_ops.cc
1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1997 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* Author : Alan W Black */
34/* Date : November 1997 */
35/*-----------------------------------------------------------------------*/
36/* */
37/* Basic WFST operations: minimization, determinization, intersection, */
38/* union, composition */
39/* */
40/*=======================================================================*/
41#include <iostream>
42#include <cstdlib>
43#include "EST_WFST.h"
44#include "wfst_aux.h"
45#include "EST_String.h"
46#include "EST_TList.h"
47#include "EST_TKVL.h"
48#include "EST_THash.h"
49
50Declare_TList_T(EST_WFST_MultiState *,EST_WFST_MultiStateP)
51
52 // Declare_KVL(int, EST_IList)
53
54 typedef EST_TKVI<int, EST_IList> KVI_int_EST_IList_t;
56
57 static EST_IList int_EST_IList_kv_def_EST_IList_s;
58 static int int_EST_IList_kv_def_int_s;
59
60 template <> EST_IList *EST_TKVL< int, EST_IList >::default_val=&int_EST_IList_kv_def_EST_IList_s;
61 template <> int *EST_TKVL< int, EST_IList >::default_key=&int_EST_IList_kv_def_int_s;
62
63 Declare_TList_N(KVI_int_EST_IList_t, 0)
64
65
66#if defined(INSTANTIATE_TEMPLATES)
67#include "../base_class/EST_TList.cc"
68
69 Instantiate_TList_T_MIN(EST_WFST_MultiState *,EST_WFST_MultiStateP)
70
71#include "../base_class/EST_TKVL.cc"
72
73 // Instantiate_KVL(int, EST_IList)
74
75 template class EST_TKVL<int, EST_IList>;
76 template class EST_TKVI<int, EST_IList>;
77// ostream &operator<<(ostream &s, EST_TKVI< int , EST_IList > const &i){ return s << i.k << "\t" << i.v << "\n"; }
78// ostream& operator << (ostream& s, EST_TKVL< int , EST_IList > const &l) {EST_Litem *p; for (p = l.list.head(); p ; p = p->next()) s << l.list(p).k << "\t" << l.list(p).v << endl; return s;}
79 Instantiate_TIterator_T(KVL_int_EST_IList_t, KVL_int_EST_IList_t::IPointer_k, int, KVL_int_EST_IList_kitt)
80 Instantiate_TStructIterator_T(KVL_int_EST_IList_t, KVL_int_EST_IList_t::IPointer, KVI_int_EST_IList_t, KVL_int_EST_IList_itt)
81 Instantiate_TIterator_T(KVL_int_EST_IList_t, KVL_int_EST_IList_t::IPointer, KVI_int_EST_IList_t, KVL_int_EST_IList_itt)
82
83 // Instantiate_TList(KVI_int_EST_IList_t)
84
87 template const char *error_name(EST_TList< KVI_int_EST_IList_t > val);
88
89 Instantiate_TIterator_T( EST_TList<KVI_int_EST_IList_t>, EST_TList<KVI_int_EST_IList_t>::IPointer, KVI_int_EST_IList_t, TList_KVI_int_EST_IList_t_itt);
90
91
92#endif
93
95
96static enum wfst_state_type intersect_state_type(wfst_list &wl,
98static int check_distinguished(const EST_WFST &nmwfst,
99 int p, int q,
100 wfst_marks &marks,
101 wfst_assumes &assumptions);
102
103void EST_WFST_MultiState::add(int i)
104{
105 // If of set type only add it if its not already there
106 EST_Litem *p;
107
108 if (p_type == wfst_ms_set)
109 for (p=head(); p != 0; p=p->next())
110 {
111 if ((*this)(p) == i)
112 return;
113 else if (i < (*this)(p)) // keep the list ordered
114 {
115 insert_before(p,i);
116 return;
117 }
118 }
119
120 append(i);
121}
122
123int multistate_index(EST_WFST_MultiStateIndex &index,
124 EST_WFST_MultiState *ms,int proposed)
125{
126 // Returns proposed if ms is not already in index, otherwise
127 // returns the value that was proposed when it was first new.
128
129 // I'll have to make this more efficient in future.
130 EST_String istring("");
131 EST_Litem *p;
132 int ns,found;
133
134 for (p=ms->head(); p != 0; p = p->next())
135 istring += itoString((*ms)(p)) + " ";
136
137 ns = index.val(istring,found);
138 if (found)
139 return ns;
140 else
141 {
142 index.add_item(istring,proposed);
143 return proposed;
144 }
145}
146
147static int pair_check(EST_THash<int,int> &pairs_done, int i, int o, int odim)
148{
149 int p;
150 int found;
151
152 p = (i*odim)+o; // unique number representing i/o pair
153
154 pairs_done.val(p,found);
155 if (!found)
156 { // first time seeing this pair
157 pairs_done.add_item(p,1);
158 return 0;
159 }
160 return 1;
161
162}
163
165{
166 // Determinise a non-deterministic WFST
167 EST_WFST_MultiState *start_state,*nms,*current;
168 int ns;
169 Agenda multistate_agenda;
170 EST_WFST_MultiStateIndex index(100);
171 int i,o, new_name;
172 int c=0;
173 EST_Litem *sp, *tp;
174
175 clear();
176 p_in_symbols.copy(ndwfst.p_in_symbols);
177 p_out_symbols.copy(ndwfst.p_out_symbols);
178
179 // Create a starting state and add it to this WFST
180 start_state = new EST_WFST_MultiState(wfst_ms_set);
181 start_state->add(ndwfst.start_state());
182 ndwfst.add_epsilon_reachable(start_state);
183
184 p_start_state = add_state(ndwfst.ms_type(start_state));
185 start_state->set_name(p_start_state);
186
187 multistate_agenda.append(start_state); // initialize agenda
188
189 while (multistate_agenda.length() > 0)
190 {
191 EST_THash<int,int> pairs_done(100);
192 current = multistate_agenda.first();
193 multistate_agenda.remove(multistate_agenda.head());
194 if ((c % 100) == 99)
195 cout << "Determinizing " << summary() << " Agenda "
196 << multistate_agenda.length() << endl;
197 c++;
198
199 for (sp=current->head(); sp != 0; sp=sp->next())
200 {
201 const EST_WFST_State *s = ndwfst.state((*current)(sp));
202 for (tp=s->transitions.head(); tp != 0; tp = tp->next())
203 {
204 i = s->transitions(tp)->in_symbol();
205 o = s->transitions(tp)->out_symbol();
206 // Need to check if i/o has already been proposed
207 if (pair_check(pairs_done,i,o,p_out_symbols.length()) == 1)
208 continue; // already prosed those
209// for (i=0; i < p_in_symbols.length(); i++)
210// { // start at 2 to skip any and epsilon characters -- hmm bad
211// for (o=0; o < p_out_symbols.length(); o++)
212// {
213 if ((i==o) && (i==0))
214 continue; // don't deal here with epsilon transitions
215 nms = apply_multistate(ndwfst,current,i,o);
216 if ((nms->length() == 0) ||
217 (ndwfst.ms_type(nms) == wfst_error))
218 {
219 delete nms;
220 continue; // no state to go to
221 }
222 new_name = multistate_index(index,nms,p_num_states);
223 if (new_name == p_num_states) // genuinely new
224 { // create a real state and add it to the agenda
225 ns = add_state(ndwfst.ms_type(nms));
226 nms->set_name(ns);
227 multistate_agenda.append(nms);
228 }
229 else
230 {
231 nms->set_name(new_name);
232 delete nms;
233 }
234
235 // Add new transition to current state
236 p_states[current->name()]
237 ->add_transition(nms->weight(),
238 nms->name(),
239 i,o);
240
241 }
242 }
243 delete current;
244
245 // Probably want some progress summary
246 }
247
248}
249
252 int in, int out) const
253{
254 // Apply in and out to ms and find all new states it becomes
255 EST_Litem *p;
256 EST_WFST_MultiState *new_ms = new EST_WFST_MultiState(wfst_ms_set);
257
258 new_ms->clear();
259
260 for (p=ms->head(); p != 0; p=p->next())
261 // Add all new possible states from ms(p) given in/out
262 wfst.transition_all((*ms)(p),in,out,new_ms);
263
264 // Add epsilon reachable states from any states in multistates
265 wfst.add_epsilon_reachable(new_ms);
266
267 return new_ms;
268
269}
270
271enum wfst_state_type EST_WFST::ms_type(EST_WFST_MultiState *ms) const
272{
273 // Returns wfst_error if ms contains an error state, wfst_final
274 // if there is at least one final and wfst_non_final
275 EST_Litem *p;
276 enum wfst_state_type r = wfst_nonfinal;
277
278 for (p=ms->head(); p != 0; p = p->next())
279 if (p_states((*ms)(p))->type() == wfst_error)
280 return wfst_error;
281 else if (p_states((*ms)(p))->type() == wfst_licence)
282 // wfst_licence states are generated in KK compilation
283 r = wfst_licence;
284 else if ((p_states((*ms)(p))->type() == wfst_final) &&
285 (r != wfst_licence))
286 r = wfst_final;
287
288 if (r == wfst_licence)
289 return wfst_nonfinal;
290 else
291 return r;
292}
293
295 int in,
296 int out,
297 EST_WFST_MultiState *ms) const
298{
299 // Find all possible new states from state given in/out
300 const EST_WFST_State *s = p_states(state);
301 EST_Litem *i;
302
303 for (i=s->transitions.head(); i != 0; i=i->next())
304 {
305 if ((in == s->transitions(i)->in_symbol()) &&
306 (out == s->transitions(i)->out_symbol()))
307 ms->add(s->transitions(i)->state());
308 }
309}
310
311static int is_a_member(const EST_IList &ii, int i)
312{
313 for (EST_Litem *p=ii.head(); p != 0; p=p->next())
314 if (ii(p) == i)
315 return TRUE;
316 return FALSE;
317}
318
320{
321 // As ms->add() adds in order we need to copy to a new list and append
322 // to it any new epsilon accessible states
323 EST_Litem *p;
324 EST_IList ii;
325 int ie = p_in_symbols.name(get_c_string(epsilon_label()));
326 int oe = p_out_symbols.name(get_c_string(epsilon_label()));
327
328 for (p=ms->head(); p != 0; p=p->next())
329 ii.append((*ms)(p));
330
331 for (p=ii.head(); p != 0; p=p->next())
332 {
333 const EST_WFST_State *s = p_states(ii(p));
334 EST_Litem *i;
335
336 for (i=s->transitions.head(); i != 0; i=i->next())
337 {
338 if ((ie == s->transitions(i)->in_symbol()) &&
339 (oe == s->transitions(i)->out_symbol()))
340 {
341 // Add to end of ii if not already there
342 int nstate = s->transitions(i)->state();
343 if (!is_a_member(ii,nstate))
344 {
345 ii.append(nstate);
346 ms->add(nstate); // gets added in order
347 }
348 }
349 }
350 }
351}
352
353/************************************************************************/
354/* Intersection of a list of transducers, i.e. what is accepted by all */
355/************************************************************************/
357{
358 // This is very similar to determinisation, similar complexity too
359 EST_WFST_MultiState *start_state = new EST_WFST_MultiState(wfst_ms_list);
360 EST_WFST_MultiState *nms,*current;
361 int ns;
362 Agenda multistate_agenda;
363 EST_WFST_MultiStateIndex index(100);
364 int i,o, new_name, n;
365 EST_Litem *p,*q;
366 int c=0;
367
368 // Initialize this WFST from the given ones
369 clear();
370 p_in_symbols.copy(wl.first().p_in_symbols);
371 p_out_symbols.copy(wl.first().p_out_symbols);
372
373 // Determinize each input WFST and make a new start state consisting of
374 // the start states of each of the input WFSTs
375 for (p=wl.tail(); p != 0; p=p->prev())
376 {
377 if (!wl(p).deterministic())
378 {
379 cout << "...intersection deterministing" << endl;
380 EST_WFST tt = wl(p);
381 wl(p).determinize(tt);
382 }
383 start_state->add(wl(p).start_state());
384 }
385
386 p_start_state = add_state(intersect_state_type(wl,start_state));
387 // Label multistate start start with single-state name
388 start_state->set_name(p_start_state);
389
390 multistate_agenda.append(start_state); // initialize agenda
391
392 while (multistate_agenda.length() > 0)
393 {
394 current = multistate_agenda.first();
395 multistate_agenda.remove(multistate_agenda.head());
396 if ((c % 100) == 99)
397 cout << "Intersection " << summary() << " Agenda "
398 << multistate_agenda.length() << endl;
399 c++;
400
401 // For all possible in/out pairs
402 for (i=0; i < p_in_symbols.length(); i++)
403 { // start at 2 to skip any and epsilon characters -- hmm bad
404 for (o=0; o < p_out_symbols.length(); o++)
405 {
406 if ((i==o) && (i==0)) // shouldn't be epsilon/epsilon here
407 continue;
408 nms = new EST_WFST_MultiState(wfst_ms_list);
409 // Increment multistate to new multistate for each individual
410 // state using each WFST
411 for (n=0,p=wl.head(),q=current->head();
412 (p != 0) && (q != 0);
413 p=p->next(),q=q->next(),n++)
414 nms->add(wl(p).transition((*current)(q),i,o));
415 if (intersect_state_type(wl,nms) == wfst_error)
416 {
417 delete nms;
418 continue; // no state to go to
419 }
420 new_name = multistate_index(index,nms,p_num_states);
421 if (new_name == p_num_states) // genuinely new and unseen
422 { // create a real state and add it to the agenda
423 ns = add_state(intersect_state_type(wl,nms));
424 nms->set_name(ns);
425 multistate_agenda.append(nms);
426 }
427 else // already seen this state, and is already named
428 {
429 nms->set_name(new_name);
430 delete nms;
431 }
432
433 // Add new transition to current state
434 p_states[current->name()]
435 ->add_transition(nms->weight(),nms->name(),i,o);
436 }
437 }
438 delete current;
439 // Probably want some progress summary
440 }
441
442}
443
444static enum wfst_state_type intersect_state_type(wfst_list &wl,
446{
447 // Find the state type of the combined states
448 // If any are error, return error, if one is nonfinal return nonfinal
449 // otherwise its final
450 EST_Litem *p,*q;
451 enum wfst_state_type r = wfst_final;
452
453 for (p=wl.head(),q=ms->head();
454 (p != 0) && (q != 0);
455 p=p->next(),q=q->next())
456 {
457 if ((*ms)(q) == WFST_ERROR_STATE)
458 return wfst_error;
459
460 enum wfst_state_type dd = wl(p).state((*ms)(q))->type();
461
462 if (dd == wfst_error)
463 return wfst_error;
464 else if (dd == wfst_nonfinal)
465 r = wfst_nonfinal;
466 }
467 return r;
468}
469
471{
472 // Intersect two WFSTs
473 wfst_list wl;
474
475 wl.append(a);
476 wl.append(b);
477
478 intersection(wl);
479}
480
481/*******************************************************************/
482/* Minimization is of complexity of O(n^2) */
483/*******************************************************************/
484void EST_WFST::minimize(const EST_WFST &nmwfst)
485{
486 // Minimize a WFST
487 int p,q;
488 wfst_marks marks(nmwfst.num_states());
489 wfst_assumes assumptions;
490
491 // For each combination of states
492 for (p=0; p < nmwfst.num_states()-1; p++)
493 for (q=p+1; q < nmwfst.num_states(); q++)
494 check_distinguished(nmwfst,p,q,marks,assumptions);
495
496 // The marked array now has all different and lists to equivalent states
497 // Build an array mapping old name to new name.
498 int num_new_states;
499 int i;
500 EST_IVector state_map;
501
502 marks.find_state_map(state_map,num_new_states);
503
504 // Build the new minimized WFST mapping existing transitions
505 clear();
506 p_in_symbols.copy(nmwfst.p_in_symbols);
507 p_out_symbols.copy(nmwfst.p_out_symbols);
508
509 init(num_new_states);
510 p_start_state = state_map(nmwfst.p_start_state);
511
512 for (i=0; i < nmwfst.num_states(); i++)
513 {
514 if (p_states[state_map(i)] == 0)
515 p_states[state_map(i)] =
516 copy_and_map_states(state_map,nmwfst.state(i),nmwfst);
517 }
518
519}
520
521static int check_distinguished(const EST_WFST &nmwfst,
522 int p, int q,
523 wfst_marks &marks,
524 wfst_assumes &assumptions)
525{
526 // Check to see if these two are equivalent
527 EST_Litem *t;
528 EST_IList from_p,from_q;
529
530 if (marks.distinguished(p,q)) // been here, done that
531 return TRUE;
532 else if (marks.undistinguished(p,q)) // been here too
533 return FALSE;
534 // Not been here yet so do some work to try to find out if
535 // these states can be distinguished
536 else if ((nmwfst.state(p)->type() != nmwfst.state(q)->type()) ||
537 (nmwfst.state(p)->num_transitions() !=
538 nmwfst.state(q)->num_transitions()))
539 { // Different final/non-final type or different number
540 // of transitions so obviously different states
541 marks.distinguish(p,q);
542 return TRUE;
543 }
544 else
545 { // Have to check their transitions individually
546 for (t=nmwfst.state(p)->transitions.head(); t != 0; t=t->next())
547 {
548 int in = nmwfst.state(p)->transitions(t)->in_symbol();
549 int out = nmwfst.state(p)->transitions(t)->out_symbol();
550 int y = nmwfst.state(p)->transitions(t)->state();
551 int z = nmwfst.transition(q,in,out);
552 if ((z == WFST_ERROR_STATE) ||
553 (marks.distinguished(y,z)))
554 { // no equiv transition or obviously different states
555 marks.distinguish(p,q);
556 return TRUE;
557 }
558 else if (equivalent_to(y,z,assumptions))
559 continue;
560 else // Potential equivalence, record y,z for later
561 {
562 from_p.append(y);
563 from_q.append(z);
564 }
565 }
566 // All transitions had potential match so only now
567 // actually check their follow sets
568 EST_Litem *yp, *zp;
569 int tl = FALSE;
570 if (assumptions.length() == 0)
571 tl = TRUE;
572 // assume they are undistinguished
573 add_assumption(p,q,assumptions);
574 for (yp=from_p.head(),zp=from_q.head();
575 yp != 0; yp=yp->next(),zp=zp->next())
576 if (check_distinguished(nmwfst,from_p(yp),from_q(zp),
577 marks,
578 assumptions))
579 {
580 marks.distinguish(p,q); // set the distinguished
581 assumptions.clear();
582 return TRUE;
583 }
584 // ok I give up, they are the same
585 if (tl)
586 { // This is equivalent given the assumptions (and no
587 // higher level assumptions) so we can mark these states
588 // as undistinguished (and all the assumptions)
589 mark_undistinguished(marks,assumptions);
590 assumptions.clear();
591 }
592 return FALSE;
593 }
594}
595
596void EST_WFST::extend_alphabets(const EST_WFST &b)
597{
598 // Extend current in/out alphabets to accommodate anything in b's
599 // that are not in a's
600 // This guarantees that the number in this will still be valid
601 EST_StrList ivocab, ovocab;
602 int i;
603
604 for (i=0; i<p_in_symbols.length(); i++)
605 ivocab.append(in_symbol(i));
606 for (i=0; i<b.p_in_symbols.length(); i++)
607 if (!strlist_member(ivocab,b.in_symbol(i)))
608 ivocab.append(b.in_symbol(i));
609 for (i=0; i<p_out_symbols.length(); i++)
610 ovocab.append(out_symbol(i));
611 for (i=0; i<b.p_out_symbols.length(); i++)
612 if (!strlist_member(ovocab,b.out_symbol(i)))
613 ovocab.append(b.out_symbol(i));
614
615 p_in_symbols.init(ivocab);
616 p_out_symbols.init(ovocab);
617}
618
619EST_WFST_State *EST_WFST::copy_and_map_states(const EST_IVector &state_map,
620 const EST_WFST_State *s,
621 const EST_WFST &b) const
622{
623 // Copy s into new state mapping new states to those in state map
624 EST_WFST_State *ns = new EST_WFST_State(state_map(s->name()));
625 EST_Litem *i;
626
627 ns->set_type(s->type());
628
629 for (i=s->transitions.head(); i != 0; i=i->next())
630 {
631 int mapped_state= state_map(s->transitions(i)->state());
632 if (mapped_state != WFST_ERROR_STATE)
633 ns->add_transition(s->transitions(i)->weight(),
634 mapped_state,
635 in_symbol(b.in_symbol(s->transitions(i)->in_symbol())),
636 out_symbol(b.out_symbol(s->transitions(i)->out_symbol())));
637 }
638
639 return ns;
640}
641
642/***********************************************************************/
643/* Build a WFST which doesn't accept any of the strings that a accepts */
644/* This keeps the same in/out alphabet */
645/***********************************************************************/
647{
648 int i;
649
650 copy(a);
651
652 for (i=0; i < num_states(); i++)
653 {
654 if (p_states[i]->type() == wfst_final)
655 p_states[i]->set_type(wfst_nonfinal);
656 else if (p_states[i]->type() == wfst_nonfinal)
657 p_states[i]->set_type(wfst_final);
658 // errors remain errors
659 }
660}
661
662static int noloopstostart(const EST_WFST &a)
663{
664 // TRUE if there are no transitions leading to the start state
665 // when this is true there is a union operation which preserves
666 // deterministicness
667 int i;
668
669 for (i=0; i < a.num_states(); i++)
670 {
671 const EST_WFST_State *s = a.state(i);
672 EST_Litem *p;
673 for (p=s->transitions.head(); p != 0; p=p->next())
674 {
675 if (s->transitions(p)->state() == a.start_state())
676 return FALSE;
677 }
678 }
679 return TRUE;
680}
681
682int EST_WFST::deterministiconstartstates(const EST_WFST &a, const EST_WFST &b) const
683{
684 // TRUE if there are no common transition labels from a and b's
685 // start state
686 EST_IMatrix tab;
687 int in,out;
688
689 tab.resize(a.p_in_symbols.length(),a.p_out_symbols.length());
690 tab.fill(0);
691
692 for (EST_Litem *t=a.state(a.start_state())->transitions.head();
693 t != 0; t=t->next())
694 {
695 tab(a.state(a.start_state())->transitions(t)->in_symbol(),
696 a.state(a.start_state())->transitions(t)->out_symbol()) = 1;
697 }
698
699 for (EST_Litem *tt=b.state(b.start_state())->transitions.head();
700 tt != 0; tt=tt->next())
701 {
702 in = a.in_symbol(b.in_symbol(b.state(b.start_state())->transitions(tt)->in_symbol()));
703 out = a.out_symbol(b.out_symbol(b.state(b.start_state())->transitions(tt)->out_symbol()));
704 if (in == -1)
705 continue; // obviously not a clash
706 else if (out == -1)
707 continue; // obviously not a clash
708 else if (tab(in,out) == 1)
709 return FALSE;
710 }
711 return TRUE;
712}
713
714/***********************************************************************/
715/* Build a WFST which accepts both strings of a and of b */
716/***********************************************************************/
717void EST_WFST::uunion(const EST_WFST &a,const EST_WFST &b)
718{
719 EST_IVector smap;
720 int i;
721
722 copy(a);
723 extend_alphabets(b);
724
725 if (a.deterministic() && b.deterministic() &&
726 noloopstostart(a) && noloopstostart(b) &&
727 deterministiconstartstates(a,b))
728 {
729 // This does the union without the epsilon and will preserve
730 // deterministic wfsts in this special case
731 smap.resize(b.num_states());
732 smap[0] = start_state();
733 for (i=1; i < b.num_states(); ++i)
734 smap[i] = i+a.num_states()-1;
735
736 more_states(a.num_states()+b.num_states()-1);
737 p_num_states += b.num_states()-1;
738 for (i=1; i < b.num_states(); i++)
739 p_states[smap(i)] = copy_and_map_states(smap,b.state(i),b);
740
741 const EST_WFST_State *s = b.state(b.start_state());
742 EST_Litem *p;
743 for (p=s->transitions.head(); p != 0; p=p->next())
744 {
745 int mapped_state= smap(s->transitions(p)->state());
746 if (mapped_state != WFST_ERROR_STATE)
747 p_states[start_state()]->
748 add_transition(s->transitions(p)->weight(),
749 mapped_state,
750 in_symbol(b.in_symbol(s->transitions(p)->in_symbol())),
751 out_symbol(b.out_symbol(s->transitions(p)->out_symbol())));
752 }
753 }
754 else
755 { // do it the hard way
756 smap.resize(b.num_states());
757 for (i=0; i < b.num_states(); ++i)
758 smap[i] = i+a.num_states();
759
760 more_states(a.num_states()+b.num_states());
761 p_num_states += b.num_states();
762 for (i=0; i < b.num_states(); i++)
763 p_states[smap(i)] = copy_and_map_states(smap,b.state(i),b);
764
765 // Actually do the union bit by adding an epsilon transition from
766 // the start of a to start state of b
767 p_states[start_state()]->add_transition(0.0,smap[b.start_state()],
769 }
770
771}
772
773/***********************************************************************/
774/* Build a WFST which a followed by b */
775/***********************************************************************/
776void EST_WFST::concat(const EST_WFST &a,const EST_WFST &b)
777{
778 EST_IVector smap;
779 int i;
780
781 copy(a);
782 extend_alphabets(b);
783
784 smap.resize(b.num_states());
785 for (i=0; i < b.num_states(); ++i)
786 smap[i] = i+a.num_states();
787
788 more_states(a.num_states()+b.num_states());
789
790 // everything final in a becomes non final and an epsilon transition
791 // goes from them to the start of b
792 for (i=0; i < num_states(); i++)
793 {
794 if (p_states[i]->type() == wfst_final)
795 {
796 p_states[i]->set_type(wfst_nonfinal);
797 p_states[i]->add_transition(0.0,smap[b.start_state()],
799 }
800 }
801
802 p_num_states += b.num_states();
803 for (i=0; i < b.num_states(); i++)
804 p_states[smap(i)] = copy_and_map_states(smap,b.state(i),b);
805
806}
807
808/***********************************************************************/
809/* Build a WFST from composing a and b (feeding output of a to */
810/* input of b) */
811/***********************************************************************/
812void EST_WFST::compose(const EST_WFST &a,const EST_WFST &b)
813{
814 EST_WFST_MultiState *start_state = new EST_WFST_MultiState(wfst_ms_list);
815 EST_WFST_MultiState *nms,*current;
816 Agenda multistate_agenda;
817 EST_WFST_MultiStateIndex index(100);
818 wfst_list wl;
819 EST_WFST t;
820 int i,new_name;
821 EST_Litem *p,*q;
822
823 clear();
824 p_in_symbols.copy(a.p_in_symbols);
825 p_out_symbols.copy(b.p_out_symbols);
826
827 // Unfortunately need to needlessly copy a and b here
828 wl.append(a);
829 start_state->add(a.start_state());
830 wl.append(b);
831 start_state->add(b.start_state());
832
833 p_start_state = add_state(intersect_state_type(wl,start_state));
834 // Label multistate start start with single-state name
835 start_state->set_name(p_start_state);
836
837 multistate_agenda.append(start_state); // initialize agenda
838
839 while (multistate_agenda.length() > 0)
840 {
841 current = multistate_agenda.first();
842 multistate_agenda.remove(multistate_agenda.head());
843
844 // For all possible in/out pairs
845 for (i=0; i < p_in_symbols.length(); i++)
846 { // start at 2 to skip any and epsilon characters -- hmm bad
847 // find transitions
848 wfst_translist transa;
849
850 wl.first().transduce(current->first(),i,transa);
851 for (p=transa.head(); p != 0; p=p->next())
852 {
853 wfst_translist transb;
854 // feed a's out to b'is in
855 wl.last().transduce(
856 current->last(),
857 b.in_symbol(a.out_symbol(transa(p)->out_symbol())),
858 transb);
859 for (q = transb.head(); q != 0; q=q->next())
860 {
861 nms = new EST_WFST_MultiState(wfst_ms_list);
862 nms->add(transa(p)->state());
863 nms->add(transb(q)->state());
864
865 if (intersect_state_type(wl,nms) == wfst_error)
866 {
867 delete nms;
868 continue; // no state to go to
869 }
870 new_name = multistate_index(index,nms,p_num_states);
871 if (new_name == p_num_states) // genuinely new and unseen
872 { // create a real state and add it to the agenda
873 int ns = add_state(intersect_state_type(wl,nms));
874 nms->set_name(ns);
875 multistate_agenda.append(nms);
876 }
877 else // already seen this state, and is already named
878 nms->set_name(new_name);
879
880 // Add new transition to current state
881 p_states[current->name()]
882 ->add_transition(nms->weight(),nms->name(),
883 i,transb(q)->out_symbol());
884
885 }
886
887 }
888 }
889 delete current;
890 // Probably want some progress summary
891 }
892
893}
894
895/***********************************************************************/
896/* Build a WFST which accepts strings in a but not in b */
897/***********************************************************************/
899{
900 EST_WFST compb;
901
902 // This is sort of a complement, but not quite
903 // But what would the name of this operation be?
904 compb.copy(b);
905 for (int i=0; i < compb.num_states(); i++)
906 {
907 if (compb.p_states[i]->type() == wfst_final)
908 compb.p_states[i]->set_type(wfst_error);
909 }
910
911 uunion(a,compb);
912}
913
914/***********************************************************************/
915/* Remove states from which a final state can't be reached */
916/***********************************************************************/
918{
919 // Find all states which are reachable from the start state, and
920 // can reach a final state. Mark all others as error states
921 wfst_list wl;
922
923 wl.append(a);
924 EST_WFST &ab = wl.first();
925
926 ab.current_tag = ++traverse_tag;
927 for (int i=0; i < ab.p_num_states; i++)
928 ab.can_reach_final(i);
929 // This will copy only the non-error states
930 intersection(wl);
931
932}
933
934int EST_WFST::can_reach_final(int state)
935{
936 // Return TRUE iff this state is final or can reach a final state
937
938 if (p_states[state]->type() == wfst_final)
939 return TRUE;
940 else if (p_states[state]->type() == wfst_error)
941 return FALSE;
942 else if (p_states[state]->tag() == current_tag)
943 // Been here and it is reachable
944 return TRUE;
945 else
946 {
947 EST_Litem *i;
948 enum wfst_state_type current_val = p_states[state]->type();
949 enum wfst_state_type r = wfst_error;
950 // temporarily set this to error to stop infinite recursion
951 p_states[state]->set_type(wfst_error);
952
953 for (i=p_states[state]->transitions.head(); i != 0; i=i->next())
954 // if any transition goes to something that reaches a final state
955 // set the value back to its original
956 if (can_reach_final(p_states[state]->transitions(i)->state()))
957 r = current_val;
958
959 // Will be set back to original value iff a final state
960 // is reachable from here
961 p_states[state]->set_type(r);
962 if (r == wfst_error)
963 return FALSE;
964 else
965 {
966 p_states[state]->set_tag(current_tag);
967 return TRUE;
968 }
969 }
970}
971
972/***********************************************************************/
973/* True is wfst is deterministic */
974/***********************************************************************/
976{
977 // True if all states contains no multiple arcs with the same symbol
978 EST_IMatrix tab;
979
980 tab.resize(p_in_symbols.length(),p_out_symbols.length());
981
982 for (int i=0; i < p_num_states; i++)
983 {
984 tab.fill(0);
985 for (EST_Litem *t=state(i)->transitions.head(); t != 0; t=t->next())
986 {
987 if (tab(state(i)->transitions(t)->in_symbol(),
988 state(i)->transitions(t)->out_symbol()) == 1)
989 return FALSE;
990 else
991 tab(state(i)->transitions(t)->in_symbol(),
992 state(i)->transitions(t)->out_symbol()) = 1;
993 }
994 }
995 return TRUE;
996}
bool init(const EST_StrList &vocab)
(re-)initialise
const EST_String & name(const int n) const
The name given the index.
const int length(void) const
The number of members in the discrete.
int length(void) const
Length of string ({not} length of underlying chunk)
Definition EST_String.h:241
V & val(const K &key, int &found) const
Definition EST_THash.cc:114
int add_item(const K &key, const V &value, int no_search=0)
Add an entry to the table.
Definition EST_THash.cc:167
const int length() const
number of key value pairs in list
Definition EST_TKVL.h:96
void clear()
Empty list.
Definition EST_TKVL.cc:50
const T & last() const
return const reference to last item in list
Definition EST_TList.h:149
void clear(void)
remove all items in list
Definition EST_TList.h:239
void append(const int &item)
Definition EST_TList.h:191
EST_Litem * insert_before(EST_Litem *ptr, const int &item)
Definition EST_TList.h:206
const T & first() const
return const reference to first item in list
Definition EST_TList.h:146
void fill(const T &v)
fill matrix with value v
void resize(int rows, int cols, int set=1)
resize matrix
void resize(int n, int set=1)
resize vector
void difference(const EST_WFST &a, const EST_WFST &b)
Definition wfst_ops.cc:898
int add_state(enum wfst_state_type state_type)
Add a new state, returns new name.
Definition EST_WFST.cc:652
void uunion(EST_TList< EST_WFST > &wl)
void add_epsilon_reachable(EST_WFST_MultiState *ms) const
Extend multi-state with epsilon reachable states.
Definition wfst_ops.cc:319
void complement(const EST_WFST &a)
Build complement of a.
Definition wfst_ops.cc:646
void init(int init_num_states=10)
Clear with (estimation of number of states required)
Definition EST_WFST.cc:145
void clear()
clear removing existing states if any
Definition EST_WFST.cc:115
void transition_all(int state, int in, int out, EST_WFST_MultiState *ms) const
Find all possible transitions for given state/input/output.
Definition wfst_ops.cc:294
void compose(const EST_WFST &a, const EST_WFST &b)
Definition wfst_ops.cc:812
EST_WFST_MultiState * apply_multistate(const EST_WFST &wfst, EST_WFST_MultiState *ms, int in, int out) const
Transduce a multi-state given n and out.
Definition wfst_ops.cc:250
int out_epsilon() const
Internal index for output epsilon.
Definition EST_WFST.h:220
int in_symbol(const EST_String &s) const
Map input symbol to input alphabet index.
Definition EST_WFST.h:204
void remove_error_states(const EST_WFST &a)
Remove error states from the WFST.
Definition wfst_ops.cc:917
int in_epsilon() const
Internal index for input epsilon.
Definition EST_WFST.h:218
int deterministic() const
True if WFST is deterministic.
Definition wfst_ops.cc:975
void copy(const EST_WFST &wfst)
Copy from existing wfst.
Definition EST_WFST.cc:132
const EST_WFST_State * state(int i) const
Return internal state information.
Definition EST_WFST.h:222
int out_symbol(const EST_String &s) const
Map output symbol to output alphabet index.
Definition EST_WFST.h:210
LISP epsilon_label() const
LISP for on epsilon symbols.
Definition EST_WFST.h:216
void minimize(const EST_WFST &a)
Build minimized form of a.
Definition wfst_ops.cc:484
enum wfst_state_type ms_type(EST_WFST_MultiState *ms) const
Given a multi-state return type (final, ok, error)
Definition wfst_ops.cc:271
void concat(const EST_WFST &a, const EST_WFST &b)
Definition wfst_ops.cc:776
void intersection(EST_TList< EST_WFST > &wl)
Definition wfst_ops.cc:356
void determinize(const EST_WFST &a)
Build determinized form of a.
Definition wfst_ops.cc:164
int transition(int state, int in, int out) const
Find (first) new state given in and out symbols.
Definition EST_WFST.cc:260