Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
EST_Regex.h
1 /************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1997 */
6 /* All Rights Reserved. */
7 /* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /************************************************************************/
33
34#ifndef __EST_REGEX_H__
35#define __EST_REGEX_H__
36
37class EST_Regex;
38
39#include "EST_String.h"
40
41/** A Regular expression class to go with the CSTR EST_String class.
42 *
43 * The regular expression syntax is the FSF syntax used in emacs and
44 * in the FSF String library. This is translated into the syntax supported
45 * by Henry Spensor's regular expression library, this translation is a place
46 * to look if you find regular expressions not matching where expected.
47 *
48 * @see EST_String
49 * @see string_example
50 * @author Richard Caley <rjc@cstr.ed.ac.uk>
51 * @author (regular expression library by Henry Spencer, University of Toronto)
52 * @version $Id: EST_Regex.h,v 1.3 2004/05/04 00:00:16 awb Exp $
53 */
54
55class EST_Regex : protected EST_String {
56
57private:
58 /// The compiled form.
59 void *compiled;
60 /// Compiled form for whole string match.
61 void *compiled_match;
62
63protected:
64 /// Compile expression.
65 void compile();
66 /// Compile expression in a form which only matches whole string.
67 void compile_match();
68 /// Translate the expression into the internally used syntax.
69 char *regularize(int match) const;
70
71public:
72 /// Empty constructor, just for form.
73 EST_Regex(void);
74
75 /// Construct from EST_String.
77
78 /// Construct from C string.
79 EST_Regex(const char *ex);
80
81 /// Copy constructor.
82 EST_Regex(const EST_Regex &ex);
83
84 /// Destructor.
85 ~EST_Regex();
86
87 /// Size of the expression.
88 int size() const { return EST_String::size; };
89
90 /// Run to find a matching substring
91 int run(const char *on, int from, int &start, int &end, int *starts=NULL, int *ends=NULL);
92 /// Run to see if it matches the entire string.
93 int run_match(const char *on, int from=0, int *starts=NULL, int *ends=NULL);
94
95 /// Get the expression as a string.
96 EST_String tostring(void) const {return (*this);};
97
98 /// Cast operator, disambiguates it for some compilers
99 operator const char *() const { return (const char *)tostring(); }
100
101 int operator == (const EST_Regex ex) const
102 { return (const EST_String)*this == (const EST_String)ex; }
103
104 int operator != (const EST_Regex ex) const
105 { return (const EST_String)*this != (const EST_String)ex; }
106
107 /**@name Assignment */
108 //@{
109 ///
110 EST_Regex &operator = (const EST_Regex ex);
111 ///
112 EST_Regex &operator = (const EST_String s);
113 ///
114 EST_Regex &operator = (const char *s);
115 //@}
116
117 /// Stream output of regular expression.
118 friend ostream &operator << (ostream &s, const EST_Regex &str);
119};
120
121ostream &operator << (ostream &s, const EST_Regex &str);
122
123/**@name Predefined_regular_expressions
124 * Some regular expressions matching common things are predefined
125 */
126//@{
127/// White space
128extern EST_Regex RXwhite; // "[ \n\t\r]+"
129/// Sequence of alphabetic characters.
130extern EST_Regex RXalpha; // "[A-Za-z]+"
131/// Sequence of lower case alphabetic characters.
132extern EST_Regex RXlowercase; // "[a-z]+"
133/// Sequence of upper case alphabetic characters.
134extern EST_Regex RXuppercase; // "[A-Z]+"
135/// Sequence of letters and/or digits.
136extern EST_Regex RXalphanum; // "[0-9A-Za-z]+"
137/// Initial letter or underscore followed by letters underscores or digits.
138extern EST_Regex RXidentifier; // "[A-Za-z_][0-9A-Za-z_]+"
139/// Integer.
140extern EST_Regex RXint; // "-?[0-9]+"
141/// Floating point number.
142extern EST_Regex RXdouble; // "-?\\‍(\\‍([0-9]+\\.[0-9]*\\‍)\\|\\‍([0-9]+\\‍)\\|\\‍(\\.[0-9]+\\‍)\\‍)\\‍([eE][---+]?[0-9]+\\‍)?"
143//@}
144
145// GCC lets us use the static constant to declare arrays, Sun CC
146// doesn't, so for a quiet, if ugly, life we declare it here with a suitable
147// value and check in EST_Regex.cc to make sure it`s OK
148
149#define EST_Regex_max_subexpressions 10
150
151#endif
char * regularize(int match) const
Translate the expression into the internally used syntax.
Definition EST_Regex.cc:152
EST_Regex(void)
Empty constructor, just for form.
Definition EST_Regex.cc:111
int run(const char *on, int from, int &start, int &end, int *starts=NULL, int *ends=NULL)
Run to find a matching substring.
Definition EST_Regex.cc:242
void compile()
Compile expression.
Definition EST_Regex.cc:213
friend ostream & operator<<(ostream &s, const EST_Regex &str)
Stream output of regular expression.
Definition EST_Regex.cc:330
void compile_match()
Compile expression in a form which only matches whole string.
Definition EST_Regex.cc:227
EST_String tostring(void) const
Get the expression as a string.
Definition EST_Regex.h:96
int run_match(const char *on, int from=0, int *starts=NULL, int *ends=NULL)
Run to see if it matches the entire string.
Definition EST_Regex.cc:275
int size() const
Size of the expression.
Definition EST_Regex.h:88
~EST_Regex()
Destructor.
Definition EST_Regex.cc:140
EST_String(void)
Construct an empty string.
Definition EST_String.h:201
const char * str(void) const
Get a const-pointer to the actual memory.
Definition EST_String.h:245