Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
scfg_parse_main.cc
1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1996,1997 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* Author : Alan W Black */
34/* Date : October 1997 */
35/*-----------------------------------------------------------------------*/
36/* Parse a list of sentences with a given stochastic context free */
37/* grammar */
38/* */
39/*=======================================================================*/
40#include <cstdlib>
41#include <cstdio>
42#include <iostream>
43#include <fstream>
44#include <cstring>
45#include "EST.h"
46#include "EST_SCFG.h"
47#include "EST_SCFG_Chart.h"
48#include "siod.h"
49
50static EST_String outfile = "-";
51
52static int scfg_parse_main(int argc, char **argv);
53
54
55/** @name <command>scfg_parse</command> <emphasis>Parse text using a pre-trained stochastic context free grammar</emphasis>
56 @id scfg-parse-manual
57 * @toc
58 */
59
60//@{
61
62
63/**@name Synopsis
64 */
65//@{
66
67//@synopsis
68
69/**
70
71This parses given text with a given stochastic context free grammar.
72Note this program is not designed as an arbitrary parser for
73unrestricted English. It simply parses the input non-terminals
74with the given grammar. If you want to English (or other language)
75parses consider using the festival script <command>scfg_parse</command>
76which does proper tokenization and part of speech tagging, before
77passing it to a SCFG.
78
79 */
80
81//@}
82
83/**@name OPTIONS
84 */
85//@{
86
87//@options
88
89//@}
90
91int main(int argc, char **argv)
92{
93
94 scfg_parse_main(argc,argv);
95
96 exit(0);
97 return 0;
98}
99
100static int scfg_parse_main(int argc, char **argv)
101{
102 // Top level function generates a probabilistic grammar
103 EST_Option al;
104 EST_StrList files;
105 EST_SCFG_Chart chart;
106 LISP rules,s,parse;
107 FILE *corpus,*output;
108 int i;
109
110 parse_command_line
111 (argc, argv,
112 EST_String("[options]\n")+
113 "Summary: Parse a corpus with a stochastic context free grammar\n"+
114 "-grammar <ifile> Grammar file, one rule per line.\n"+
115 "-corpus <ifile> Corpus file, one bracketed sentence per line.\n"+
116 "-brackets Output bracketing only.\n"+
117 "-o <ofile> Output file for parsed sentences.\n",
118 files, al);
119
120 if (al.present("-o"))
121 outfile = al.val("-o");
122 else
123 outfile = "-";
124
125 siod_init();
126
127 if (al.present("-grammar"))
128 {
129 rules = vload(al.val("-grammar"),1);
130 gc_protect(&rules);
131 }
132 else
133 {
134 cerr << "scfg_parse: no grammar specified" << endl;
135 exit(1);
136 }
137
138 if (al.present("-corpus"))
139 {
140 if ((corpus = fopen(al.val("-corpus"),"r")) == NULL)
141 {
142 cerr << "scfg_parse: can't open corpus file \"" <<
143 al.val("-corpus") << "\" for reading " << endl;
144 exit(1);
145 }
146 }
147 else
148 {
149 cerr << "scfg_parse: no corpus specified" << endl;
150 exit(1);
151 }
152
153 if (al.present("-o"))
154 {
155 if ((output=fopen(al.val("-o"),"w")) == NULL)
156 {
157 cerr << "scfg_parse: can't open output file \"" <<
158 al.val("-o") << "\" for writing " << endl;
159 exit(1);
160 }
161 }
162 else
163 output = stdout;
164
165 gc_protect(&s);
166 gc_protect(&parse);
167 for (i=0; ((s=lreadf(corpus)) != get_eof_val()); i++)
168 {
169 parse = scfg_parse(s,rules);
170 if (al.present("-brackets"))
171 {
172 LISP bparse = scfg_bracketing_only(parse);
173 if (bparse == NIL)
174 bparse = s;
175 pprint_to_fd(output,bparse);
176 }
177 else
178 pprint_to_fd(output,parse);
179 if (i%100 == 99)
180 user_gc(NIL);
181 }
182
183 if (output != stdout)
184 fclose(output);
185 gc_unprotect(&s);
186 gc_unprotect(&parse);
187 gc_unprotect(&rules);
188
189 return 0;
190}
191
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition EST_TKVL.cc:145
const int present(const K &rkey) const
Returns true if key is present.
Definition EST_TKVL.cc:222