Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
wfst_train_main.cc
1/*************************************************************************/
2/* */
3/* Language Technologies Institute */
4/* Carnegie Mellon University */
5/* Copyright (c) 1999 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* Author : Alan W Black */
34/* Date : October 1999 */
35/*-----------------------------------------------------------------------*/
36/* A training method for splitting states in a WFST from data */
37/* */
38/*=======================================================================*/
39#include <cstdlib>
40#include <cstdio>
41#include <iostream>
42#include <fstream>
43#include <cstring>
44#include "EST.h"
45#include "EST_simplestats.h"
46#include "EST_WFST.h"
47
48LISP load_string_data(EST_WFST &wfst,EST_String &filename);
49void wfst_train(EST_WFST &wfst, LISP data);
50
51static int wfst_train_main(int argc, char **argv);
52
53/** @name <command>wfst_train</command> <emphasis>Train a weighted finite-state transducer</emphasis>
54 @id wfst-train-manual
55 * @toc
56 */
57
58//@{
59
60
61/**@name Synopsis
62 */
63//@{
64
65//@synopsis
66
67/**
68This takes an existing WFST and data and splits states in an entropy
69reduce way to produced a new WFST that better models the given data.
70
71 */
72
73//@}
74
75/**@name OPTIONS
76 */
77//@{
78
79//@options
80
81//@}
82
83
84int main(int argc, char **argv)
85{
86
87 wfst_train_main(argc,argv);
88
89 exit(0);
90 return 0;
91}
92
93static int wfst_train_main(int argc, char **argv)
94{
95 // Train a WFST from data building new states
96 EST_Option al;
97 EST_StrList files;
98 EST_String wfstfile;
99 FILE *ofd;
100
101 parse_command_line
102 (argc, argv,
103 EST_String("[WFSTFILE] [input file0] ... [-o output file]\n")+
104 "Summary: Train a WFST on data\n"+
105 "-wfst <ifile> The WFST to start from\n"+
106 "-data <ifile> Sentences in the language recognised by WFST\n"+
107 "-o <ofile> Output file for trained WFST\n"+
108 "-heap <int> {210000}\n"+
109 " Set size of Lisp heap, needed for large rulesets\n",
110 files, al);
111
112 if (al.present("-o"))
113 {
114 if ((ofd=fopen(al.val("-o"),"w")) == NULL)
115 EST_error("can't open output file for writing \"%s\"",
116 (const char *)al.val("-o"));
117 }
118 else
119 ofd = stdout;
120
121 if (al.present("-wfst"))
122 wfstfile = al.val("-wfst");
123 else
124 EST_error("no WFST specified");
125
126 siod_init(al.ival("-heap"));
127 siod_est_init();
128
129 EST_WFST wfst;
130 LISP data;
131
132 if (wfst.load(wfstfile) != format_ok)
133 EST_error("failed to read WFST from \"%s\"",
134 (const char *)wfstfile);
135
136 data = load_string_data(wfst,al.val("-data"));
137
138 wfst_train(wfst,data);
139
140 if (wfst.save(al.val("-o")) != write_ok)
141 EST_error("failed to write trained WFST to \"%s\"",
142 (const char *)al.val("-o"));
143
144 return 0;
145
146}
147
int ival(const EST_String &rkey, int m=1) const
Definition EST_Option.cc:76
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition EST_TKVL.cc:145
const int present(const K &rkey) const
Returns true if key is present.
Definition EST_TKVL.cc:222
EST_write_status save(const EST_String &filename, const EST_String type="ascii")
?
Definition EST_WFST.cc:349
EST_read_status load(const EST_String &filename)
?
Definition EST_WFST.cc:508