45#include "rxp/XML_Parser.h"
47static EST_Regex simpleIDRegex(
".*#id(w\\([0-9]+\\))");
48static EST_Regex rangeIDRegex(
".*#id(w\\([0-9]+\\)).*id(w\\([0-9]+\\))");
62 Parse_State() : contents(100) {}
102 const char *instruction);
112 for(them.
begin(attributes); them ; them++)
114 (
const char *)them->k,
115 (
const char *)them->v);
118EST_read_status solexml_read(FILE *file,
124 (void)print_attributes;
125 Sole_Parser_Class pclass;
136 return read_format_error;
145static
void ensure_relation(Parse_State *state)
147 if (state->rel==NULL)
149 state->rel = state->utt->create_relation(state->relName);
172 val = attributes.
val(
"id");
175 else if (attributes.
present(
"href"))
177 val = attributes.
val(
"href");
178 int starts[EST_Regex_max_subexpressions];
179 int ends[EST_Regex_max_subexpressions];
181 if (val.matches(simpleIDRegex, 0, starts, ends))
183 EST_String n = val.at(starts[1], ends[1]-starts[1]);
187 else if (val.matches(rangeIDRegex, 0, starts, ends))
189 int n1 = atoi(val.at(starts[1], ends[1]-starts[1]));
190 int n2 = atoi(val.at(starts[2], ends[2]-starts[2]));
192 for(
int i=n1; i<=n2; i++)
195 sprintf(buf,
"w%d", i);
202 EST_warning(
"element with bad ID or HREF '%s'", (
const char *)val);
207 sprintf(buf,
"n%d", ++count);
224 Parse_State *state = (Parse_State *)data;
236 (void)c; (void)p; (void)data;
246 (void)c; (void)p; (void)attributes;
247 Parse_State *state = (Parse_State *)data;
251 if (strcmp(name,
"solexml")==0)
253 state->relName=attributes.
val(
"relation");
254 printf(
"start solexml relation=%s\n", (
const char *)state->relName);
257 else if (strcmp(name,
"text-elem")==0)
263 ensure_relation(state);
265 if (strcmp(name,
"anaphora-elem")==0
266 || strcmp(name,
"wordlist")==0
267 || strcmp(name,
"w")==0)
270 extract_ids(attributes, ids);
274 for(; idp!= NULL; idp = idp->next())
284 state->current = state->parent;
285 state->parent=iup(state->parent);
294 for(them.
begin(attributes); them ; them++)
303 if (state->current == NULL)
304 if (state->parent == NULL)
305 item = state->rel->append();
307 item = state->parent->insert_below();
309 item = state->current->insert_after();
311 item->set_contents(cont);
318 EST_warning(
"SOLE XML Parser: unknown element %s", name);
328 (void)c; (void)p; (void)attributes;
329 Parse_State *state = (Parse_State *)data;
331 if (strcmp(name,
"language")==0)
333 state->utt->
f.
set(
"language", attributes.
val(
"name"));
337 element_open(c, p, data, name, attributes);
338 element_close(c, p, data, name);
347 (void)c; (void)p; (void)name;
348 Parse_State *state = (Parse_State *)data;
350 if (strcmp(name,
"anaphora-elem")==0
351 || strcmp(name,
"wordlist")==0
352 || strcmp(name,
"w")==0)
355 state->current = state->parent;
356 state->parent=iup(state->parent);
368 Parse_State *state = (Parse_State *)data;
370 if (state->parent != NULL && p.
context(0) ==
"w")
382 (void)c; (void)p; (void)data; (void)chars;
392 const char *instruction)
395 Parse_State *state = (Parse_State *)data;
397 printf(
"SOLE XML Parser [proc[%s]] %d\n", instruction, state->depth);
405 (void)c; (void)p; (void)data;
408 EST_error(
"SOLE XML Parser %s",
get_error(p));
void set(const EST_String &name, int ival)
EST_Features f
General features for this item.
void set_name(const EST_String &s)
set name
void set(const EST_String &name, int ival)
static const EST_String Empty
Constant empty string.
V & val(const K &key, int &found) const
int add_item(const K &key, const V &value, int no_search=0)
Add an entry to the table.
int present(const K &key) const
Does the key have an entry?
void begin(const Container &over)
Set the iterator ready to run over this container.
void append(const T &item)
add item onto end of list
EST_Features f
Utterance level features.
void clear()
remove everything in utterance
const char * get_error(XML_Parser &p)
Get the error message for the last error.
XML_Parser * make_parser(InputSource source, void *data)
Create a parser for the RXP InputSource.
virtual void error(XML_Parser_Class &c, XML_Parser &p, void *data)
EST_String context(int n)
void track_context(bool flag)