PocketSphinx 5prealpha
ms_gauden.c
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37
38#include <assert.h>
39#include <string.h>
40#include <math.h>
41#include <float.h>
42
43#include <sphinxbase/bio.h>
44#include <sphinxbase/err.h>
45#include <sphinxbase/ckd_alloc.h>
46
47#include "ms_gauden.h"
48
49#define GAUDEN_PARAM_VERSION "1.0"
50
51#ifndef M_PI
52#define M_PI 3.1415926535897932385e0
53#endif
54
55#define WORST_DIST (int32)(0x80000000)
56
57void
58gauden_dump(const gauden_t * g)
59{
60 int32 c;
61
62 for (c = 0; c < g->n_mgau; c++)
63 gauden_dump_ind(g, c);
64}
65
66
67void
68gauden_dump_ind(const gauden_t * g, int senidx)
69{
70 int32 f, d, i;
71
72 for (f = 0; f < g->n_feat; f++) {
73 E_INFO("Codebook %d, Feature %d (%dx%d):\n",
74 senidx, f, g->n_density, g->featlen[f]);
75
76 for (d = 0; d < g->n_density; d++) {
77 printf("m[%3d]", d);
78 for (i = 0; i < g->featlen[f]; i++)
79 printf(" %7.4f", MFCC2FLOAT(g->mean[senidx][f][d][i]));
80 printf("\n");
81 }
82 printf("\n");
83
84 for (d = 0; d < g->n_density; d++) {
85 printf("v[%3d]", d);
86 for (i = 0; i < g->featlen[f]; i++)
87 printf(" %d", (int)g->var[senidx][f][d][i]);
88 printf("\n");
89 }
90 printf("\n");
91
92 for (d = 0; d < g->n_density; d++)
93 printf("d[%3d] %d\n", d, (int)g->det[senidx][f][d]);
94 }
95 fflush(stderr);
96}
97
107static float ****
108gauden_param_read(const char *file_name,
109 int32 * out_n_mgau,
110 int32 * out_n_feat,
111 int32 * out_n_density,
112 int32 ** out_veclen)
113{
114 char tmp;
115 FILE *fp;
116 int32 i, j, k, l, n, blk;
117 int32 n_mgau;
118 int32 n_feat;
119 int32 n_density;
120 int32 *veclen;
121 int32 byteswap, chksum_present;
122 float32 ****out;
123 float32 *buf;
124 char **argname, **argval;
125 uint32 chksum;
126
127 E_INFO("Reading mixture gaussian parameter: %s\n", file_name);
128
129 if ((fp = fopen(file_name, "rb")) == NULL) {
130 E_ERROR_SYSTEM("Failed to open file '%s' for reading", file_name);
131 return NULL;
132 }
133
134 /* Read header, including argument-value info and 32-bit byteorder magic */
135 if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) {
136 E_ERROR("Failed to read header from file '%s'\n", file_name);
137 fclose(fp);
138 return NULL;
139 }
140
141 /* Parse argument-value list */
142 chksum_present = 0;
143 for (i = 0; argname[i]; i++) {
144 if (strcmp(argname[i], "version") == 0) {
145 if (strcmp(argval[i], GAUDEN_PARAM_VERSION) != 0)
146 E_WARN("Version mismatch(%s): %s, expecting %s\n",
147 file_name, argval[i], GAUDEN_PARAM_VERSION);
148 }
149 else if (strcmp(argname[i], "chksum0") == 0) {
150 chksum_present = 1; /* Ignore the associated value */
151 }
152 }
153 bio_hdrarg_free(argname, argval);
154 argname = argval = NULL;
155
156 chksum = 0;
157
158 /* #Codebooks */
159 if (bio_fread(&n_mgau, sizeof(int32), 1, fp, byteswap, &chksum) != 1) {
160 E_ERROR("Failed to read number fo codebooks from %s\n", file_name);
161 fclose(fp);
162 return NULL;
163 }
164 *out_n_mgau = n_mgau;
165
166 /* #Features/codebook */
167 if (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) != 1) {
168 E_ERROR("Failed to read number of features from %s\n", file_name);
169 fclose(fp);
170 return NULL;
171 }
172 *out_n_feat = n_feat;
173
174 /* #Gaussian densities/feature in each codebook */
175 if (bio_fread(&n_density, sizeof(int32), 1, fp, byteswap, &chksum) != 1) {
176 E_ERROR("fread(%s) (#density/codebook) failed\n", file_name);
177 }
178 *out_n_density = n_density;
179
180 /* #Dimensions in each feature stream */
181 veclen = ckd_calloc(n_feat, sizeof(uint32));
182 *out_veclen = veclen;
183 if (bio_fread(veclen, sizeof(int32), n_feat, fp, byteswap, &chksum) !=
184 n_feat) {
185 E_ERROR("fread(%s) (feature-lengths) failed\n", file_name);
186 fclose(fp);
187 return NULL;
188 }
189
190 /* blk = total vector length of all feature streams */
191 for (i = 0, blk = 0; i < n_feat; i++)
192 blk += veclen[i];
193
194 /* #Floats to follow; for the ENTIRE SET of CODEBOOKS */
195 if (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1) {
196 E_ERROR("Failed to read number of parameters from %s\n", file_name);
197 fclose(fp);
198 return NULL;
199 }
200
201 if (n != n_mgau * n_density * blk) {
202 E_ERROR
203 ("Number of parameters in %s(%d) doesn't match dimensions: %d x %d x %d\n",
204 file_name, n, n_mgau, n_density, blk);
205 fclose(fp);
206 return NULL;
207 }
208
209 /* Allocate memory for mixture gaussian densities if not already allocated */
210 out = (float32 ****) ckd_calloc_3d(n_mgau, n_feat, n_density,
211 sizeof(float32 *));
212 buf = (float32 *) ckd_calloc(n, sizeof(float32));
213 for (i = 0, l = 0; i < n_mgau; i++) {
214 for (j = 0; j < n_feat; j++) {
215 for (k = 0; k < n_density; k++) {
216 out[i][j][k] = &buf[l];
217 l += veclen[j];
218 }
219 }
220 }
221
222 /* Read mixture gaussian densities data */
223 if (bio_fread(buf, sizeof(float32), n, fp, byteswap, &chksum) != n) {
224 E_ERROR("Failed to read density data from file '%s'\n", file_name);
225 fclose(fp);
226 ckd_free_3d(out);
227 return NULL;
228 }
229
230 if (chksum_present)
231 bio_verify_chksum(fp, byteswap, chksum);
232
233 if (fread(&tmp, 1, 1, fp) == 1) {
234 E_ERROR("More data than expected in %s\n", file_name);
235 fclose(fp);
236 ckd_free_3d(out);
237 return NULL;
238 }
239
240 fclose(fp);
241
242 E_INFO("%d codebook, %d feature, size: \n", n_mgau, n_feat);
243 for (i = 0; i < n_feat; i++)
244 E_INFO(" %dx%d\n", n_density, veclen[i]);
245
246 return out;
247}
248
249static void
250gauden_param_free(mfcc_t **** p)
251{
252 ckd_free(p[0][0][0]);
253 ckd_free_3d(p);
254}
255
256/*
257 * Some of the gaussian density computation can be carried out in advance:
258 * log(determinant) calculation,
259 * 1/(2*var) in the exponent,
260 * NOTE; The density computation is performed in log domain.
261 */
262static int32
263gauden_dist_precompute(gauden_t * g, logmath_t *lmath, float32 varfloor)
264{
265 int32 i, m, f, d, flen;
266 mfcc_t *meanp;
267 mfcc_t *varp;
268 mfcc_t *detp;
269 int32 floored;
270
271 floored = 0;
272 /* Allocate space for determinants */
273 g->det = ckd_calloc_3d(g->n_mgau, g->n_feat, g->n_density, sizeof(***g->det));
274
275 for (m = 0; m < g->n_mgau; m++) {
276 for (f = 0; f < g->n_feat; f++) {
277 flen = g->featlen[f];
278
279 /* Determinants for all variance vectors in g->[m][f] */
280 for (d = 0, detp = g->det[m][f]; d < g->n_density; d++, detp++) {
281 *detp = 0;
282 for (i = 0, varp = g->var[m][f][d], meanp = g->mean[m][f][d];
283 i < flen; i++, varp++, meanp++) {
284 float32 *fvarp = (float32 *)varp;
285
286#ifdef FIXED_POINT
287 float32 *fmp = (float32 *)meanp;
288 *meanp = FLOAT2MFCC(*fmp);
289#endif
290 if (*fvarp < varfloor) {
291 *fvarp = varfloor;
292 ++floored;
293 }
294 *detp += (mfcc_t)logmath_log(lmath,
295 1.0 / sqrt(*fvarp * 2.0 * M_PI));
296 /* Precompute this part of the exponential */
297 *varp = (mfcc_t)logmath_ln_to_log(lmath,
298 (1.0 / (*fvarp * 2.0)));
299 }
300 }
301 }
302 }
303
304 E_INFO("%d variance values floored\n", floored);
305
306 return 0;
307}
308
309
310gauden_t *
311gauden_init(char const *meanfile, char const *varfile, float32 varfloor, logmath_t *lmath)
312{
313 int32 i, m, f, d, *flen;
314 gauden_t *g;
315
316 assert(meanfile != NULL);
317 assert(varfile != NULL);
318 assert(varfloor > 0.0);
319
320 g = (gauden_t *) ckd_calloc(1, sizeof(gauden_t));
321 g->lmath = lmath;
322
323 g->mean = (mfcc_t ****)gauden_param_read(meanfile, &g->n_mgau, &g->n_feat, &g->n_density,
324 &g->featlen);
325 if (g->mean == NULL) {
326 return NULL;
327 }
328 g->var = (mfcc_t ****)gauden_param_read(varfile, &m, &f, &d, &flen);
329 if (g->var == NULL) {
330 return NULL;
331 }
332
333 /* Verify mean and variance parameter dimensions */
334 if ((m != g->n_mgau) || (f != g->n_feat) || (d != g->n_density)) {
335 E_ERROR
336 ("Mixture-gaussians dimensions for means and variances differ\n");
337 ckd_free(flen);
338 gauden_free(g);
339 return NULL;
340 }
341 for (i = 0; i < g->n_feat; i++) {
342 if (g->featlen[i] != flen[i]) {
343 E_FATAL("Feature lengths for means and variances differ\n");
344 ckd_free(flen);
345 gauden_free(g);
346 return NULL;
347 }
348 }
349
350 ckd_free(flen);
351
352 gauden_dist_precompute(g, lmath, varfloor);
353
354 return g;
355}
356
357void
358gauden_free(gauden_t * g)
359{
360 if (g == NULL)
361 return;
362 if (g->mean)
363 gauden_param_free(g->mean);
364 if (g->var)
365 gauden_param_free(g->var);
366 if (g->det)
367 ckd_free_3d(g->det);
368 if (g->featlen)
369 ckd_free(g->featlen);
370 ckd_free(g);
371}
372
373/* See compute_dist below */
374static int32
375compute_dist_all(gauden_dist_t * out_dist, mfcc_t* obs, int32 featlen,
376 mfcc_t ** mean, mfcc_t ** var, mfcc_t * det,
377 int32 n_density)
378{
379 int32 i, d;
380
381 for (d = 0; d < n_density; ++d) {
382 mfcc_t *m;
383 mfcc_t *v;
384 mfcc_t dval;
385
386 m = mean[d];
387 v = var[d];
388 dval = det[d];
389
390 for (i = 0; i < featlen; i++) {
391 mfcc_t diff;
392#ifdef FIXED_POINT
393 /* Have to check for underflows here. */
394 mfcc_t pdval = dval;
395 diff = obs[i] - m[i];
396 dval -= MFCCMUL(MFCCMUL(diff, diff), v[i]);
397 if (dval > pdval) {
398 dval = WORST_SCORE;
399 break;
400 }
401#else
402 diff = obs[i] - m[i];
403 /* The compiler really likes this to be a single
404 * expression, for whatever reason. */
405 dval -= diff * diff * v[i];
406#endif
407 }
408
409 out_dist[d].dist = dval;
410 out_dist[d].id = d;
411 }
412
413 return 0;
414}
415
416
417/*
418 * Compute the top-N closest gaussians from the chosen set (mgau,feat)
419 * for the given input observation vector.
420 */
421static int32
422compute_dist(gauden_dist_t * out_dist, int32 n_top,
423 mfcc_t * obs, int32 featlen,
424 mfcc_t ** mean, mfcc_t ** var, mfcc_t * det,
425 int32 n_density)
426{
427 int32 i, j, d;
428 gauden_dist_t *worst;
429
430 /* Special case optimization when n_density <= n_top */
431 if (n_top >= n_density)
432 return (compute_dist_all
433 (out_dist, obs, featlen, mean, var, det, n_density));
434
435 for (i = 0; i < n_top; i++)
436 out_dist[i].dist = WORST_DIST;
437 worst = &(out_dist[n_top - 1]);
438
439 for (d = 0; d < n_density; d++) {
440 mfcc_t *m;
441 mfcc_t *v;
442 mfcc_t dval;
443
444 m = mean[d];
445 v = var[d];
446 dval = det[d];
447
448 for (i = 0; (i < featlen) && (dval >= worst->dist); i++) {
449 mfcc_t diff;
450#ifdef FIXED_POINT
451 /* Have to check for underflows here. */
452 mfcc_t pdval = dval;
453 diff = obs[i] - m[i];
454 dval -= MFCCMUL(MFCCMUL(diff, diff), v[i]);
455 if (dval > pdval) {
456 dval = WORST_SCORE;
457 break;
458 }
459#else
460 diff = obs[i] - m[i];
461 /* The compiler really likes this to be a single
462 * expression, for whatever reason. */
463 dval -= diff * diff * v[i];
464#endif
465 }
466
467 if ((i < featlen) || (dval < worst->dist)) /* Codeword d worse than worst */
468 continue;
469
470 /* Codeword d at least as good as worst so far; insert in the ordered list */
471 for (i = 0; (i < n_top) && (dval < out_dist[i].dist); i++);
472 assert(i < n_top);
473 for (j = n_top - 1; j > i; --j)
474 out_dist[j] = out_dist[j - 1];
475 out_dist[i].dist = dval;
476 out_dist[i].id = d;
477 }
478
479 return 0;
480}
481
482
483/*
484 * Compute distances of the input observation from the top N codewords in the given
485 * codebook (g->{mean,var}[mgau]). The input observation, obs, includes vectors for
486 * all features in the codebook.
487 */
488int32
489gauden_dist(gauden_t * g,
490 int mgau, int32 n_top, mfcc_t** obs, gauden_dist_t ** out_dist)
491{
492 int32 f;
493
494 assert((n_top > 0) && (n_top <= g->n_density));
495
496 for (f = 0; f < g->n_feat; f++) {
497 compute_dist(out_dist[f], n_top,
498 obs[f], g->featlen[f],
499 g->mean[mgau][f], g->var[mgau][f], g->det[mgau][f],
500 g->n_density);
501 E_DEBUG(3, ("Top CW(%d,%d) = %d %d\n", mgau, f, out_dist[f][0].id,
502 (int)out_dist[f][0].dist >> SENSCR_SHIFT));
503 }
504
505 return 0;
506}
507
508int32
509gauden_mllr_transform(gauden_t *g, ps_mllr_t *mllr, cmd_ln_t *config)
510{
511 int32 i, m, f, d, *flen;
512
513 /* Free data if already here */
514 if (g->mean)
515 gauden_param_free(g->mean);
516 if (g->var)
517 gauden_param_free(g->var);
518 if (g->det)
519 ckd_free_3d(g->det);
520 if (g->featlen)
521 ckd_free(g->featlen);
522 g->det = NULL;
523 g->featlen = NULL;
524
525 /* Reload means and variances (un-precomputed). */
526 g->mean = (mfcc_t ****)gauden_param_read(cmd_ln_str_r(config, "_mean"), &g->n_mgau, &g->n_feat, &g->n_density,
527 &g->featlen);
528 g->var = (mfcc_t ****)gauden_param_read(cmd_ln_str_r(config, "_var"), &m, &f, &d, &flen);
529
530 /* Verify mean and variance parameter dimensions */
531 if ((m != g->n_mgau) || (f != g->n_feat) || (d != g->n_density))
532 E_FATAL
533 ("Mixture-gaussians dimensions for means and variances differ\n");
534 for (i = 0; i < g->n_feat; i++)
535 if (g->featlen[i] != flen[i])
536 E_FATAL("Feature lengths for means and variances differ\n");
537 ckd_free(flen);
538
539 /* Transform codebook for each stream s */
540 for (i = 0; i < g->n_mgau; ++i) {
541 for (f = 0; f < g->n_feat; ++f) {
542 float64 *temp;
543 temp = (float64 *) ckd_calloc(g->featlen[f], sizeof(float64));
544 /* Transform each density d in selected codebook */
545 for (d = 0; d < g->n_density; d++) {
546 int l;
547 for (l = 0; l < g->featlen[f]; l++) {
548 temp[l] = 0.0;
549 for (m = 0; m < g->featlen[f]; m++) {
550 /* FIXME: For now, only one class, hence the zeros below. */
551 temp[l] += mllr->A[f][0][l][m] * g->mean[i][f][d][m];
552 }
553 temp[l] += mllr->b[f][0][l];
554 }
555
556 for (l = 0; l < g->featlen[f]; l++) {
557 g->mean[i][f][d][l] = (float32) temp[l];
558 g->var[i][f][d][l] *= mllr->h[f][0][l];
559 }
560 }
561 ckd_free(temp);
562 }
563 }
564
565 /* Re-precompute (if we aren't adapting variances this isn't
566 * actually necessary...) */
567 gauden_dist_precompute(g, g->lmath, cmd_ln_float32_r(config, "-varfloor"));
568 return 0;
569}
#define WORST_SCORE
Large "bad" score.
Definition hmm.h:84
#define SENSCR_SHIFT
Shift count for senone scores.
Definition hmm.h:73
(Sphinx 3.0 specific) Gaussian density module.
Structure to store distance (density) values for a given input observation wrt density values in some...
Definition ms_gauden.h:71
int32 id
Index of codeword (gaussian density)
Definition ms_gauden.h:72
mfcc_t dist
Density value for input observation wrt above codeword; NOTE: result in logs3 domain,...
Definition ms_gauden.h:73
Multivariate gaussian mixture density parameters.
Definition ms_gauden.h:82
mfcc_t **** var
like mean; diagonal covariance vector only
Definition ms_gauden.h:84
mfcc_t *** det
log(determinant) for each variance vector; actually, log(sqrt(2*pi*det))
Definition ms_gauden.h:85
int32 n_feat
Number feature streams in each codebook.
Definition ms_gauden.h:89
mfcc_t **** mean
mean[codebook][feature][codeword] vector
Definition ms_gauden.h:83
int32 n_density
Number gaussian densities in each codebook-feature stream.
Definition ms_gauden.h:90
int32 * featlen
feature length for each feature
Definition ms_gauden.h:91
logmath_t * lmath
log math computation
Definition ms_gauden.h:87
int32 n_mgau
Number codebooks.
Definition ms_gauden.h:88
Feature space linear transform structure.
Definition acmod.h:82
float32 **** A
Rotation part of mean transformations.
Definition acmod.h:87
float32 *** b
Bias part of mean transformations.
Definition acmod.h:88
float32 *** h
Diagonal transformation of variances.
Definition acmod.h:89