Skip to content

Commit

Permalink
sandbox/online: committing various changes in src/, mostly relating t…
Browse files Browse the repository at this point in the history
…o online estimation of iVectors which I intend to use for online neural net decoding.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/online@4145 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
  • Loading branch information
danpovey committed Jul 13, 2014
1 parent 0291ad2 commit 104c60d
Show file tree
Hide file tree
Showing 17 changed files with 564 additions and 69 deletions.
1 change: 1 addition & 0 deletions src/bin/copy-gselect.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ int main(int argc, char *argv[]) {
const char *usage =
"Copy Gaussian indices for pruning, possibly making the\n"
"lists shorter (e.g. the --n=10 limits to the 10 best indices\n"
"See also gmm-gselect, fgmm-gselect\n"
"Usage: \n"
" copy-gselect [options] <gselect-rspecifier> <gselect-wspecifier>\n";

Expand Down
1 change: 1 addition & 0 deletions src/fgmmbin/fgmm-global-gselect-to-post.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ int main(int argc, char *argv[]) {
"a full-covariance GMM, output per-frame posteriors for the selected\n"
"indices. Also supports pruning the posteriors if they are below\n"
"a stated threshold, (and renormalizing the rest to sum to one)\n"
"See also: gmm-gselect, fgmm-gselect, gmm-global-gest-post\n"
"\n"
"Usage: fgmm-global-gselect-to-post [options] <model-in> <feature-rspecifier> "
"<gselect-rspecifier> <post-wspecifier>\n"
Expand Down
1 change: 1 addition & 0 deletions src/fgmmbin/fgmm-gselect.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ int main(int argc, char *argv[]) {
" (e.g. in training UBMs, SGMMs, tied-mixture systems)\n"
" For each frame, gives a list of the n best Gaussian indices,\n"
" sorted from best to worst.\n"
"See also: gmm-gselect, copy-gselect, fgmm-gselect-to-post\n"
"Usage: \n"
" fgmm-gselect [options] <model-in> <feature-rspecifier> <gselect-wspecifier>\n"
"The --gselect option (which takes an rspecifier) limits selection to a subset\n"
Expand Down
2 changes: 1 addition & 1 deletion src/gmmbin/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ BINFILES = gmm-init-mono gmm-est gmm-acc-stats-ali gmm-align \
gmm-est-basis-fmllr-gpost gmm-latgen-tracking gmm-latgen-faster-parallel \
gmm-est-fmllr-raw gmm-est-fmllr-raw-gpost gmm-global-init-from-feats \
gmm-global-info gmm-latgen-faster-regtree-fmllr gmm-est-fmllr-global \
gmm-acc-mllt-global gmm-transform-means-global
gmm-acc-mllt-global gmm-transform-means-global gmm-global-get-post

OBJFILES =

Expand Down
170 changes: 170 additions & 0 deletions src/gmmbin/gmm-global-get-post.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
// gmmbin/gmm-global-get-post.cc

// Copyright 2009-2011 Saarland University; Microsoft Corporation
// 2013-2014 Johns Hopkins University (author: Daniel Povey)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.


#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "gmm/diag-gmm.h"
#include "hmm/posterior.h"

namespace kaldi {

// comparator object that can be used to sort from greatest to
// least posterior.
struct CompareReverseSecond {
// view this as an "<" operator used for sorting, except it behaves like
// a ">" operator on the .second field of the pair because we want the
// sort to be in reverse order (greatest to least) on posterior.
bool operator() (const std::pair<int32, BaseFloat> &a,
const std::pair<int32, BaseFloat> &b) {
return (a.second > b.second);
}
};


}

int main(int argc, char *argv[]) {
try {
using namespace kaldi;
using std::vector;
typedef kaldi::int32 int32;
const char *usage =
"Precompute Gaussian indices and convert immediately to top-n\n"
"posteriors (useful in iVector extraction with diagonal UBMs)\n"
"See also: gmm-gselect, fgmm-gselect, fgmm-global-gselect-to-post\n"
" (e.g. in training UBMs, SGMMs, tied-mixture systems)\n"
" For each frame, gives a list of the n best Gaussian indices,\n"
" sorted from best to worst.\n"
"Usage: \n"
" gmm-global-get-post [options] <model-in> <feature-rspecifier> <gselect-wspecifier>\n"
"e.g.: gmm-global-get-post --n=20 1.gmm \"ark:feature-command |\" \"ark,t:|gzip -c >post.1.gz\"\n";

ParseOptions po(usage);
int32 num_post = 50;
BaseFloat min_post = 0.0;
po.Register("n", &num_post, "Number of Gaussians to keep per frame\n");
po.Register("min-post", &min_post, "Minimum posterior we will output "
"before pruning and renormalizing (e.g. 0.01)");
po.Read(argc, argv);

if (po.NumArgs() != 3) {
po.PrintUsage();
exit(1);
}

std::string model_filename = po.GetArg(1),
feature_rspecifier = po.GetArg(2),
post_wspecifier = po.GetArg(3);

DiagGmm gmm;
ReadKaldiObject(model_filename, &gmm);
KALDI_ASSERT(num_post > 0);
KALDI_ASSERT(min_post < 1.0);
int32 num_gauss = gmm.NumGauss();
if (num_post > num_gauss) {
KALDI_WARN << "You asked for " << num_post << " Gaussians but GMM "
<< "only has " << num_gauss << ", returning this many. ";
num_post = num_gauss;
}

double tot_like = 0.0;
kaldi::int64 tot_t = 0;

SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
PosteriorWriter post_writer(post_wspecifier);

int32 num_done = 0, num_err = 0;
for (; !feature_reader.Done(); feature_reader.Next()) {
std::string utt = feature_reader.Key();
const Matrix<BaseFloat> &feats = feature_reader.Value();
int32 T = feats.NumRows();
if (T == 0) {
KALDI_WARN << "Empty features for utterance " << utt;
num_err++;
continue;
}
if (feats.NumCols() != gmm.Dim()) {
KALDI_WARN << "Dimension mismatch for utterance " << utt
<< ": got " << feats.NumCols() << ", expected " << gmm.Dim();
num_err++;
continue;
}
vector<vector<int32> > gselect(T);

Matrix<BaseFloat> loglikes;

gmm.LogLikelihoods(feats, &loglikes);

Posterior post(T);

double log_like_this_file = 0.0;
for (int32 t = 0; t < T; t++) {
SubVector<BaseFloat> loglikes_row(loglikes, t);
log_like_this_file += loglikes_row.ApplySoftMax();
std::vector<std::pair<int32, BaseFloat> > temp_post(num_gauss);
for (int32 g = 0; g < num_gauss; g++)
temp_post[g] = std::pair<int32, BaseFloat>(g, loglikes_row(g));
CompareReverseSecond compare;
// sort in decreasing order on posterior. actually, for efficiency we
// first do nth_element and then sort, as we only need the part we're
// going to output, to be sorted.
std::nth_element(temp_post.begin(),
temp_post.begin() + num_post, temp_post.end(),
compare);
std::sort(temp_post.begin(), temp_post.begin() + num_post,
compare);

std::vector<std::pair<int32, BaseFloat> > *output_post = &(post[t]);
output_post->insert(output_post->end(),
temp_post.begin(), temp_post.begin() + num_post);
while (output_post->size() > 1 && output_post->back().second < min_post)
post[t].pop_back();
// Now renormalize.
BaseFloat tot = 0.0;
size_t size = output_post->size();
for (size_t i = 0; i < size; i++)
tot += (*output_post)[i].second;
BaseFloat inv_tot = 1.0 / tot;
for (size_t i = 0; i < size; i++)
(*output_post)[i].second *= inv_tot;
}
KALDI_VLOG(1) << "Processed utterance " << utt << ", average likelihood "
<< (log_like_this_file / T) << " over " << T << " frames";
tot_like += log_like_this_file;
tot_t += T;

post_writer.Write(utt, post);
num_done++;
}

KALDI_LOG << "Done " << num_done << " files, " << num_err
<< " with errors, average UBM log-likelihood is "
<< (tot_like/tot_t) << " over " << tot_t << " frames.";

if (num_done != 0) return 0;
else return 1;
} catch(const std::exception &e) {
std::cerr << e.what();
return -1;
}
}


5 changes: 3 additions & 2 deletions src/gmmbin/gmm-gselect.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,13 @@ int main(int argc, char *argv[]) {
" (e.g. in training UBMs, SGMMs, tied-mixture systems)\n"
" For each frame, gives a list of the n best Gaussian indices,\n"
" sorted from best to worst.\n"
"See also: gmm-global-get-post, fgmm-global-gselect-to-post,\n"
"copy-gselect, fgmm-gselect\n"
"Usage: \n"
" gmm-gselect [options] <model-in> <feature-rspecifier> <gselect-wspecifier>\n"
"The --gselect option (which takes an rspecifier) limits selection to a subset\n"
"of indices:\n"
"e.g.: gmm-gselect \"--gselect=ark:gunzip -c bigger.gselect.gz|\" --n=20 1.gmm \"ark:feature-command |\" \"ark,t:|gzip -c >1.gselect.gz\"\n";
"e.g.: gmm-gselect \"--gselect=ark:gunzip -c bigger.gselect.gz|\" --n=20 1.gmm \"ark:feature-command |\" \"ark,t:|gzip -c >gselect.1.gz\"\n";

ParseOptions po(usage);
int32 num_gselect = 50;
Expand All @@ -64,7 +66,6 @@ int main(int argc, char *argv[]) {
ReadKaldiObject(model_filename, &gmm);
KALDI_ASSERT(num_gselect > 0);
int32 num_gauss = gmm.NumGauss();
KALDI_ASSERT(num_gauss);
if (num_gselect > num_gauss) {
KALDI_WARN << "You asked for " << num_gselect << " Gaussians but GMM "
<< "only has " << num_gauss << ", returning this many. "
Expand Down
12 changes: 12 additions & 0 deletions src/hmm/posterior.cc
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,18 @@ void ScalePosterior(BaseFloat scale, Posterior *post) {
}
}

BaseFloat TotalPosterior(const Posterior &post) {
double sum = 0.0;
size_t T = post.size();
for (size_t t = 0; t < T; t++) {
size_t I = post[t].size();
for (size_t i = 0; i < I; i++) {
sum += post[t][i].second;
}
}
return sum;
}

bool PosteriorEntriesAreDisjoint(
const std::vector<std::pair<int32,BaseFloat> > &post_elem1,
const std::vector<std::pair<int32,BaseFloat> > &post_elem2) {
Expand Down
3 changes: 3 additions & 0 deletions src/hmm/posterior.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ typedef RandomAccessTableReader<GaussPostHolder> RandomAccessGaussPostReader;
/// Scales the BaseFloat (weight) element in the posterior entries.
void ScalePosterior(BaseFloat scale, Posterior *post);

/// Returns the total of all the weights in "post".
BaseFloat TotalPosterior(const Posterior &post);

/// Returns true if the two lists of pairs have no common .first element.
bool PosteriorEntriesAreDisjoint(
Expand Down Expand Up @@ -156,6 +158,7 @@ void AlignmentToPosterior(const std::vector<int32> &ali,
void SortPosteriorByPdfs(const TransitionModel &tmodel,
Posterior *post);


/// Converts a posterior over transition-ids to be a posterior
/// over pdf-ids.
void ConvertPosteriorToPdfs(const TransitionModel &tmodel,
Expand Down
4 changes: 2 additions & 2 deletions src/ivector/ivector-extractor-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,11 @@ void TestIvectorExtraction(const IvectorExtractor &extractor,

extractor.GetIvectorDistribution(utt_stats, &ivector1, NULL);

online_stats.GetIvector(&ivector2);
online_stats.GetIvector(-1, &ivector2);

KALDI_ASSERT(ivector1.ApproxEqual(ivector2));
}


void UnitTestIvectorExtractor() {
FullGmm fgmm;
Expand Down
Loading

0 comments on commit 104c60d

Please sign in to comment.