Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
EST_PST.h
1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1996 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* Author : Alan W Black */
34/* Date : July 1996 */
35/*-----------------------------------------------------------------------*/
36/* */
37/* A general class for PredictionSuffixTrees */
38/* */
39/*=======================================================================*/
40
41#ifndef __PredictionSuffixTree_H__
42#define __PredictionSuffixTree_H__
43
44#include "EST_simplestats.h"
45#include "EST_types.h"
46#include "EST_Features.h"
47
49private:
50
51protected:
52
53 int p_level;
54 int state;
56 EST_String path; /* context */
57 void delete_node(void *n) { if (n != 0) delete (EST_PredictionSuffixTree_tree_node *)n;}
58
59public:
60
61// EST_StringTrie nodes;
62// EST_TKVL <EST_String, EST_PredictionSuffixTree_tree_node *> nodes;
63 EST_Features nodes;
66 void clear(void);
67 const EST_String &get_path(void) const {return path;}
68 void set_path(const EST_String &s) {path=s;}
69 void set_level(int l) {p_level=l;}
70 void set_state(int s) {state=s;}
71 int get_state(void) const {return state;}
72 int get_level(void) const {return p_level;}
73 void cumulate(const EST_String &s,double count=1) {pd.cumulate(s,count);}
74 void cumulate(const int i,double count=1) {pd.cumulate(i,count);}
75 const EST_String &most_probable(double *p) const;
76 const EST_DiscreteProbDistribution &prob_dist() const {return pd;}
77 void print_freqs(ostream &os);
78 void print_probs(ostream &os);
79};
80
81VAL_REGISTER_CLASS_DCLS(pstnode,EST_PredictionSuffixTree_tree_node)
82
84
85private:
86
87 enum EST_filetype {PredictionSuffixTree_ascii, PredictionSuffixTree_binary};
88
89protected:
90
91 int p_order;
92 int num_states;
94 EST_DiscreteProbDistribution *pd; // distribution of predictees
96 const EST_StrVector &words,
97 double *prob, int *state,
98 const int index=0) const;
99
100 void p_accumulate(EST_PredictionSuffixTree_tree_node *node,
101 const EST_StrVector &words,
102 double count,
103 const int index=0);
104
105 const EST_DiscreteProbDistribution &p_prob_dist(
107 const EST_StrVector &words,
108 const int index=0) const;
109public:
111 EST_PredictionSuffixTree(const int order) {init(order);}
112 EST_PredictionSuffixTree(const EST_String filename);
113 EST_PredictionSuffixTree(const EST_TList<EST_String> &vocab,int order=2);
115 void clear(void);
116 void init(const int order);
117 double samples() const { return pd->samples(); }
118 int states() const { return num_states; }
119 int order(void) const {return p_order;}
120 void accumulate(const EST_StrVector &words,const double count=1,const int index=0);
121
122 int load(const EST_String filename);
123 int save(const EST_String filename,const EST_PredictionSuffixTree::EST_filetype type=PredictionSuffixTree_ascii);
124
125 // build EST_PredictionSuffixTree from train data
126 void build(const EST_String filename,
127 const EST_String prev,
128 const EST_String prev_prev,
129 const EST_String last);
130
131 void build(const EST_StrList &input); // to go
132
133 void test(const EST_String filename); // test EST_PredictionSuffixTree against test data
134 void print_freqs(ostream &os);
135 void print_probs(ostream &os);
136
137 const EST_String &predict(const EST_StrVector &words) const;
138 const EST_String &predict(const EST_StrVector &words,double *prob) const;
139 const EST_String &predict(const EST_StrVector &words,double *prob,int *state) const;
140 const EST_DiscreteProbDistribution &prob_dist(const EST_StrVector &words)
141 const
142 {return p_prob_dist(nodes,words);}
143 /* Reverse probability, given X what is prob of EST_PredictionSuffixTree Y */
144 double rev_prob(const EST_StrVector &words) const;
145 double rev_prob(const EST_StrVector &words,
146 const EST_DiscreteProbDistribution &pd) const;
147 /* print frequency or probability models */
148 /* build model from file */
149 /* predict and measure success */
150
151};
152
153#endif // __PredictionSuffixTree_H__
double samples(void) const
Total number of example found.
void cumulate(const EST_String &s, double count=1)
Add this observation, may specify number of occurrences.