Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
hash_regression.cc
1
2 /************************************************************************/
3 /* */
4 /* Centre for Speech Technology Research */
5 /* University of Edinburgh, UK */
6 /* Copyright (c) 1996,1997 */
7 /* All Rights Reserved. */
8 /* */
9 /* Permission is hereby granted, free of charge, to use and distribute */
10 /* this software and its documentation without restriction, including */
11 /* without limitation the rights to use, copy, modify, merge, publish, */
12 /* distribute, sublicense, and/or sell copies of this work, and to */
13 /* permit persons to whom this work is furnished to do so, subject to */
14 /* the following conditions: */
15 /* 1. The code must retain the above copyright notice, this list of */
16 /* conditions and the following disclaimer. */
17 /* 2. Any modifications must be clearly marked as such. */
18 /* 3. Original authors' names are not deleted. */
19 /* 4. The authors' names are not used to endorse or promote products */
20 /* derived from this software without specific prior written */
21 /* permission. */
22 /* */
23 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
24 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
25 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
26 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
27 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
28 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
29 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
30 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
31 /* THIS SOFTWARE. */
32 /* */
33 /************************************************************************/
34 /* Author: Richard Caley (rjc@cstr.ed.ac.uk) */
35 /* Date: Wed Apr 9 1997 */
36 /************************************************************************/
37 /* */
38 /* Simple inverted index as a test of the hash type. */
39 /* */
40 /************************************************************************/
41
42#include <iostream>
43#include <fstream>
44#include "EST_String.h"
45#include "EST_Token.h"
46#include "EST_THash.h"
47
48#define LINE_LENGTH 1000
49
50EST_Regex RX_Word("[A-Z]?[a-z]+\\('[a-z]+\\)?");
51
52#define WORD "Latitude"
53
54int
55main(int argc, const char *argv[])
56{
57 EST_TStringHash<int> places(10);
58 int line_no = 1;
59 EST_TokenStream file;
60
61 if (argc != 2)
62 return 1;
63
64 file.open(argv[1]);
65 file.set_WhiteSpaceChars("");
66 file.set_SingleCharSymbols("\n");
69
70while(! file.eof())
71 {
72 EST_String line;
73
74 line = (EST_String)file.get();
75
76 if (file.eof())
77 break;
78
79 if (line == "\n")
80 line_no++;
81
82 int p=0, len;
83
84 while((p = line.search(RX_Word, len, p)) >= 0)
85 {
86 EST_String word(line.at(p, len));
87
88 places.add_item(word, line_no);
89 p += len;
90 }
91 }
92
93cout << WORD " is on line " << places.val(WORD) << "\n";
94
95places.dump(cout);
96
97return 0;
98}
99
int search(const char *s, int len, int &mlen, int pos=0) const
Find a substring.
Definition EST_String.h:334
EST_String at(int from, int len=0) const
Return part at position.
Definition EST_String.h:302
int eof()
end of file
Definition EST_Token.h:356
void set_SingleCharSymbols(const EST_String &sc)
set which characters are to be treated as single character symbols
Definition EST_Token.h:338
void set_PrePunctuationSymbols(const EST_String &ps)
set which characters are to be treated as (post) punctuation
Definition EST_Token.h:344
void set_PunctuationSymbols(const EST_String &ps)
set which characters are to be treated as (post) punctuation
Definition EST_Token.h:341
int open(const EST_String &filename)
open a \Ref{EST_TokenStream} for a file.
Definition EST_Token.cc:200
void set_WhiteSpaceChars(const EST_String &ws)
set which characters are to be treated as whitespace
Definition EST_Token.h:335
EST_TokenStream & get(EST_Token &t)
get next token in stream
Definition EST_Token.cc:486