Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
slib_str.cc
1/*
2 * COPYRIGHT (c) 1988-1994 BY *
3 * PARADIGM ASSOCIATES INCORPORATED, CAMBRIDGE, MASSACHUSETTS. *
4 * See the source file SLIB.C for more information. *
5
6 * Reorganization of files (Mar 1999) by Alan W Black <awb@cstr.ed.ac.uk>
7
8 * String functions
9
10*/
11#include <cstdio>
12#include <cstring>
13#include <setjmp.h>
14#include <cstdlib>
15#include <cctype>
16
17#include "EST_Pathname.h"
18#include "EST_cutils.h"
19#include "siod.h"
20#include "siodp.h"
21
22LISP strintern(const char *data)
23{
24 return strcons(strlen(data),data);
25}
26
27LISP strcons(long length,const char *data)
28{long flag;
29 LISP s;
30 flag = no_interrupt(1);
31 s = cons(NIL,NIL);
32 s->type = tc_string;
33 s->storage_as.string.data = must_malloc(length+1);
34 s->storage_as.string.dim = length;
35 if (data)
36 memmove(s->storage_as.string.data,data,length+1);
37 no_interrupt(flag);
38 return(s);}
39
40LISP cstrcons(const char *data)
41{long flag;
42 LISP s;
43 flag = no_interrupt(1);
44 s = cons(NIL,NIL);
45 s->type = tc_string;
46 s->storage_as.string.data = (char *)(void *)data;
47 s->storage_as.string.dim = strlen(data);
48 no_interrupt(flag);
49 return(s);}
50
51static int rfs_getc(unsigned char **p)
52{int i;
53 i = **p;
54 if (!i) return(EOF);
55 *p = *p + 1;
56 return(i);}
57
58static void rfs_ungetc(unsigned char c,unsigned char **p)
59{(void)c;
60 *p = *p - 1;}
61
62LISP read_from_lstring(LISP x)
63{return read_from_string(get_c_string(x));}
64
65LISP read_from_string(const char *string)
66{char *p,*q;
67 LISP r;
68 struct gen_readio s;
69 q = wstrdup(string);
70 p = q;
71 s.getc_fcn = (int (*)(char *))rfs_getc;
72 s.ungetc_fcn = (void (*)(int, char *))rfs_ungetc;
73 s.cb_argument = (char *) &p;
74 r = readtl(&s);
75 wfree(q);
76 return r;
77}
78
79LISP string_append(LISP args)
80{long size;
81 LISP l,s;
82 char *data;
83 size = 0;
84 for(l=args;NNULLP(l);l=cdr(l))
85 size += strlen(get_c_string(car(l)));
86 s = strcons(size,NULL);
87 data = s->storage_as.string.data;
88 data[0] = 0;
89 for(l=args;NNULLP(l);l=cdr(l))
90 strcat(data,get_c_string(car(l)));
91 return(s);}
92
93LISP string_length(LISP string)
94{if NTYPEP(string,tc_string) err("not a string",string);
95 return(flocons((double)string->storage_as.string.dim));}
96
97LISP parse_number(LISP x)
98{const char *c;
99 c = get_c_string(x);
100 return(flocons(atof(c)));}
101
102LISP string_downcase(LISP symbol)
103{
104 const char *symname = get_c_string(symbol);
105 char *downsym = wstrdup(symname);
106 LISP newsym;
107 int i;
108
109 for (i=0; symname[i] != '\0'; i++)
110 if (isupper(symname[i]))
111 downsym[i] = tolower(symname[i]);
112 else
113 downsym[i] = symname[i];
114 downsym[i] = '\0';
115 newsym = strintern(downsym);
116 wfree(downsym);
117
118 return newsym;
119}
120
121LISP string_upcase(LISP symbol)
122{
123 const char *symname = get_c_string(symbol);
124 char *upsym = wstrdup(symname);
125 LISP newsym;
126 int i;
127
128 for (i=0; symname[i] != '\0'; i++)
129 if (islower(symname[i]))
130 upsym[i] = toupper(symname[i]);
131 else
132 upsym[i] = symname[i];
133 upsym[i] = '\0';
134 newsym = strintern(upsym);
135 wfree(upsym);
136
137 return newsym;
138}
139
140LISP path_is_dirname(LISP lpath)
141{
142 EST_Pathname path(get_c_string(lpath));
143
144 return path.is_dirname()?lpath:NIL;
145}
146
147LISP path_is_filename(LISP lpath)
148{
149 EST_Pathname path(get_c_string(lpath));
150
151 return path.is_filename()?lpath:NIL;
152}
153
154LISP path_as_directory(LISP lpath)
155{
156 EST_Pathname path(get_c_string(lpath));
157 EST_Pathname res(path.as_directory());
158 return strintern(res);
159}
160
161LISP path_as_file(LISP lpath)
162{
163 EST_Pathname path(get_c_string(lpath));
164 EST_Pathname res(path.as_file());
165
166 return strintern(res);
167}
168
169LISP path_append(LISP lpaths)
170{
171 if (CONSP(lpaths))
172 {
173 EST_Pathname res(get_c_string(car(lpaths)));
174 lpaths = cdr(lpaths);
175 while(lpaths != NIL)
176 {
177 res = res +get_c_string(car(lpaths));
178 lpaths = cdr(lpaths);
179 }
180 return strintern(res);
181 }
182 return NIL;
183}
184
185LISP path_basename(LISP lpath)
186{
187 EST_Pathname path(get_c_string(lpath));
188 EST_Pathname res(path.basename(1));
189
190 return strintern(res);
191}
192
193LISP symbol_basename(LISP path, LISP suffix)
194{
195 // Like UNIX basename
196 const char *pathstr = get_c_string(path);
197 const char *suff;
198 char *bname;
199 int i, j, k, start, end;
200 LISP newsym;
201
202 if (suffix == NIL)
203 suff = "";
204 else
205 suff = get_c_string(suffix);
206
207 for (i=strlen(pathstr); i >= 0; i--)
208 if (pathstr[i] == '/')
209 break;
210 start = i+1;
211 for (j=strlen(pathstr),k=strlen(suff); k >= 0; k--,j--)
212 if (pathstr[j] != suff[k])
213 break;
214 if (k != -1)
215 end = strlen(pathstr);
216 else
217 end = j+1;
218
219 bname = walloc(char,end-start+1);
220 memcpy(bname,&pathstr[start],end-start);
221 bname[end-start] = '\0';
222 newsym = strcons(strlen(bname),bname);
223 wfree(bname);
224
225 return newsym;
226}
227
228
229static LISP lisp_to_string(LISP l)
230{
231 EST_String s;
232
233 s = siod_sprint(l);
234 printf("%s\n",(const char *)s);
235 return strintern(s);
236}
237
238static LISP symbolconc(LISP args)
239{long size;
240 LISP l,s;
241 size = 0;
242 tkbuffer[0] = 0;
243 for(l=args;NNULLP(l);l=cdr(l))
244 {s = car(l);
245 if NSYMBOLP(s) err("wrong type of argument(non-symbol) to symbolconc",s);
246 size = size + strlen(PNAME(s));
247 if (size > TKBUFFERN) err("symbolconc buffer overflow",NIL);
248 strcat(tkbuffer,PNAME(s));}
249 return(rintern(tkbuffer));}
250
251LISP symbolexplode(LISP name)
252{
253 LISP e=NIL;
254 const char *pname = get_c_string(name);
255 char tt[2];
256 int i;
257
258 tt[1]='\0';
259
260 for (i=0; pname[i] != '\0'; i++)
261 {
262 tt[0] = pname[i];
263 e = cons(rintern(tt),e);
264 }
265 return reverse(e);
266}
267
268LISP l_matches(LISP atom, LISP regex)
269{
270 // t if printname of atom matches regex, nil otherwise
271 const EST_String pname = get_c_string(atom);
272
273 if (pname.matches(make_regex(get_c_string(regex))) == TRUE)
274 return truth;
275 else
276 return NIL;
277}
278
279LISP l_strequal(LISP atom1, LISP atom2)
280{
281
282 if (streq(get_c_string(atom1),get_c_string(atom2)))
283 return truth;
284 else
285 return NIL;
286}
287
288LISP l_substring(LISP string, LISP l_start, LISP l_length)
289{
290 // As string might actually be a buffer containing nulls we
291 // do this a little carefully.
292 if (NTYPEP(string,tc_string))
293 err("not a string",string);
294
295 const char *data = string->storage_as.string.data;
296 int dim = string->storage_as.string.dim;
297
298 int start = ( get_c_int(l_start) < dim ? get_c_int(l_start) : dim );
299 int length = ( (get_c_int(l_length) + start) < dim ?
300 get_c_int(l_length)
301 : dim-start
302 );
303
304 char *nbuffer = walloc(char, length+1);
305 memmove(nbuffer,data+start,length);
306 nbuffer[length] = '\0';
307
308 LISP ncell = strcons(length, nbuffer);
309
310 wfree(nbuffer);
311
312 return ncell;
313}
314
315static LISP l_sbefore(LISP atom, LISP before)
316{
317 // Wraparound for EST_String.before function
318 EST_String pname = get_c_string(atom);
319 EST_String b = get_c_string(before);
320 EST_String n = pname.before(b);
321
322 return strintern(n);
323}
324
325static LISP l_safter(LISP atom, LISP after)
326{
327 // Wraparound for EST_String.after function
328 EST_String pname = get_c_string(atom);
329 EST_String a = get_c_string(after);
330 EST_String n = pname.after(a);
331
332 return strintern(n);
333}
334
335void init_subrs_str(void)
336{
337 init_lsubr("string-append",string_append,
338 "(string-append STR1 STR2 ...)\n\
339 Return a string made from the concatenation of the print names of STR1\n\
340 STR2 ...");
341 init_subr_1("string-length",string_length,
342 "(string-length SYMBOL)\n\
343 Return the number of characters in the print name of SYMBOL.");
344 init_subr_1("print_string",lisp_to_string,
345 "(print_string DATA)\n\
346 Returns a string representing the printing of DATA." );
347 init_subr_1("read-from-string",read_from_lstring,
348 "(read-from-string SYMBOL)\n\
349 Return first s-expression in print name of SYMBOL.");
350 init_subr_1("downcase",string_downcase,
351 "(downcase SYMBOL)\n\
352 Returns a string with the downcased version of SYMBOL's printname.");
353 init_subr_1("upcase",string_upcase,
354 "(upcase SYMBOL)\n\
355 Returns a string with the upcased version of SYMBOL's printname.");
356 init_subr_2("string-matches",l_matches,
357 "(string-matches ATOM REGEX)\n\
358 Returns t if ATOM's printname matches the regular expression REGEX,\n\
359 otherwise it returns nil.");
360 init_subr_2("string-equal",l_strequal,
361 "(string-equal ATOM1 ATOM2)\n\
362 Returns t if ATOM's printname is equal to ATOM's print name, otherwise\n\
363 it returns nil.");
364 init_subr_3("substring", l_substring,
365 "(substring STRING START LENGTH)\n\
366 Return a substring of STRING starting at START of length LENGTH.");
367 init_subr_2("string-before",l_sbefore,
368 "(string-before ATOM BEFORE)\n\
369 Returns an atom whose printname is the substring of ATOM's printname \n\
370 which appears before BEFORE. This is a wraparound for the EST_String.before \n\
371 function in C++, and hence has the same conditions for boundary cases.");
372 init_subr_2("string-after",l_safter,
373 "(string-after ATOM AFTER)\n\
374 Returns an atom whose printname is the substring of ATOM's printname \n\
375 which appears after AFTER. This is a wraparound for the EST_String.after \n\
376 function in C++, and hence has the same conditions for boundary cases.");
377
378 init_lsubr("symbolconc",symbolconc,
379 "(symbolconc SYMBOL1 SYMBOL2 ...)\n\
380 Form new symbol by concatenation of the print forms of each of SYMBOL1\n\
381 SYMBOL2 etc.");
382 init_subr_1("symbolexplode",symbolexplode,
383 "(symbolexplode SYMBOL)\n\
384 Returns list of atoms one for each character in the print name of SYMBOL.");
385
386 init_subr_1("parse-number",parse_number,
387 "(parse-number SYMBOL)\n\
388 Returns a number form a symbol or string whose print name is a number.");
389
390 init_subr_2("basename",symbol_basename,
391 "(basename PATH SUFFIX)\n\
392 Return a string with directory removed from basename. If SUFFIX is\n\
393 specified remove that from end of PATH. Basically the same function\n\
394 as the UNIX command of the same name.");
395
396
397 init_subr_1("path-is-filename", path_is_filename,
398 "(path-is-filename PATHNAME)\n\
399 Is PATH a non-directory name.");
400
401 init_subr_1("path-as-directory", path_as_directory,
402 "(path-as-directory PATHNAME)\n\
403 Return PATH as a directory name.");
404
405 init_subr_1("path-as-file", path_as_file,
406 "(path-as-file PATHNAME)\n\
407 Return PATH as a non-directory name.");
408
409 init_lsubr("path-append", path_append,
410 "(path-append DIRECTORY-PATH ADDITION1 ADDITION2 ...)\n\
411 Return a the path for ADDITION in DIRECTORY.");
412
413 init_subr_1("path-basename", path_basename,
414 "(path-basename PATHNAME)\n\
415 Return name part of PATH.");
416
417
418 init_subr_1("path-is-dirname", path_is_dirname,
419 "(path-is-dirname PATHNAME)\n\
420 Is PATH a directory name.");
421
422}
EST_String before(int pos, int len=0) const
Part before position.
Definition EST_String.h:286
EST_String after(int pos, int len=1) const
Part after pos+len.
Definition EST_String.h:318
int matches(const char *e, int pos=0) const
Exactly match this string?