sandbox/online: committing various changes in src/, mostly relating t…

…o online estimation of iVectors which I intend to use for online neural net decoding. git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/online@4145 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
jtrmal · Jul 13, 2014 · 104c60d · 104c60d
1 parent 0291ad2
commit 104c60d
Show file tree

Hide file tree

Showing 17 changed files with 564 additions and 69 deletions.
diff --git a/src/bin/copy-gselect.cc b/src/bin/copy-gselect.cc
@@ -31,6 +31,7 @@ int main(int argc, char *argv[]) {
     const char *usage =
         "Copy Gaussian indices for pruning, possibly making the\n"
         "lists shorter (e.g. the --n=10 limits to the 10 best indices\n"
+        "See also gmm-gselect, fgmm-gselect\n"
         "Usage: \n"
         " copy-gselect [options] <gselect-rspecifier> <gselect-wspecifier>\n";
 

diff --git a/src/fgmmbin/fgmm-global-gselect-to-post.cc b/src/fgmmbin/fgmm-global-gselect-to-post.cc
@@ -35,6 +35,7 @@ int main(int argc, char *argv[]) {
         "a full-covariance GMM, output per-frame posteriors for the selected\n"
         "indices.  Also supports pruning the posteriors if they are below\n"
         "a stated threshold, (and renormalizing the rest to sum to one)\n"
+        "See also: gmm-gselect, fgmm-gselect, gmm-global-gest-post\n"
         "\n"
         "Usage:  fgmm-global-gselect-to-post [options] <model-in> <feature-rspecifier> "
         "<gselect-rspecifier> <post-wspecifier>\n"

diff --git a/src/fgmmbin/fgmm-gselect.cc b/src/fgmmbin/fgmm-gselect.cc
@@ -33,6 +33,7 @@ int main(int argc, char *argv[]) {
         " (e.g. in training UBMs, SGMMs, tied-mixture systems)\n"
         " For each frame, gives a list of the n best Gaussian indices,\n"
         " sorted from best to worst.\n"
+        "See also: gmm-gselect, copy-gselect, fgmm-gselect-to-post\n"
         "Usage: \n"
         " fgmm-gselect [options] <model-in> <feature-rspecifier> <gselect-wspecifier>\n"
         "The --gselect option (which takes an rspecifier) limits selection to a subset\n"

diff --git a/src/gmmbin/Makefile b/src/gmmbin/Makefile
@@ -27,7 +27,7 @@ BINFILES = gmm-init-mono gmm-est gmm-acc-stats-ali gmm-align \
            gmm-est-basis-fmllr-gpost gmm-latgen-tracking gmm-latgen-faster-parallel \
            gmm-est-fmllr-raw gmm-est-fmllr-raw-gpost gmm-global-init-from-feats \
            gmm-global-info gmm-latgen-faster-regtree-fmllr gmm-est-fmllr-global \
-           gmm-acc-mllt-global gmm-transform-means-global
+           gmm-acc-mllt-global gmm-transform-means-global gmm-global-get-post
 
 OBJFILES =
 

diff --git a/src/gmmbin/gmm-global-get-post.cc b/src/gmmbin/gmm-global-get-post.cc
@@ -0,0 +1,170 @@
+// gmmbin/gmm-global-get-post.cc
+
+// Copyright 2009-2011   Saarland University;  Microsoft Corporation
+//           2013-2014   Johns Hopkins University (author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "base/kaldi-common.h"
+#include "util/common-utils.h"
+#include "gmm/diag-gmm.h"
+#include "hmm/posterior.h"
+
+namespace kaldi {
+
+// comparator object that can be used to sort from greatest to
+// least posterior.
+struct CompareReverseSecond {
+  // view this as an "<" operator used for sorting, except it behaves like
+  // a ">" operator on the .second field of the pair because we want the
+  // sort to be in reverse order (greatest to least) on posterior.
+  bool operator() (const std::pair<int32, BaseFloat> &a,
+                   const std::pair<int32, BaseFloat> &b) {
+    return (a.second > b.second);
+  }
+};
+
+
+}
+
+int main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+    using std::vector;
+    typedef kaldi::int32 int32;
+    const char *usage =
+        "Precompute Gaussian indices and convert immediately to top-n\n"
+        "posteriors (useful in iVector extraction with diagonal UBMs)\n"
+        "See also: gmm-gselect, fgmm-gselect, fgmm-global-gselect-to-post\n"
+        " (e.g. in training UBMs, SGMMs, tied-mixture systems)\n"
+        " For each frame, gives a list of the n best Gaussian indices,\n"
+        " sorted from best to worst.\n"
+        "Usage: \n"
+        " gmm-global-get-post [options] <model-in> <feature-rspecifier> <gselect-wspecifier>\n"
+        "e.g.: gmm-global-get-post --n=20 1.gmm \"ark:feature-command |\" \"ark,t:|gzip -c >post.1.gz\"\n";
+
+    ParseOptions po(usage);
+    int32 num_post = 50;
+    BaseFloat min_post = 0.0;
+    po.Register("n", &num_post, "Number of Gaussians to keep per frame\n");
+    po.Register("min-post", &min_post, "Minimum posterior we will output "
+                "before pruning and renormalizing (e.g. 0.01)");
+    po.Read(argc, argv);
+
+    if (po.NumArgs() != 3) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    std::string model_filename = po.GetArg(1),
+        feature_rspecifier = po.GetArg(2),
+        post_wspecifier = po.GetArg(3);
+
+    DiagGmm gmm;
+    ReadKaldiObject(model_filename, &gmm);
+    KALDI_ASSERT(num_post > 0);
+    KALDI_ASSERT(min_post < 1.0);
+    int32 num_gauss = gmm.NumGauss();
+    if (num_post > num_gauss) {
+      KALDI_WARN << "You asked for " << num_post << " Gaussians but GMM "
+                 << "only has " << num_gauss << ", returning this many. ";
+      num_post = num_gauss;
+    }
+
+    double tot_like = 0.0;
+    kaldi::int64 tot_t = 0;
+
+    SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
+    PosteriorWriter post_writer(post_wspecifier);
+
+    int32 num_done = 0, num_err = 0;
+    for (; !feature_reader.Done(); feature_reader.Next()) {
+      std::string utt = feature_reader.Key();
+      const Matrix<BaseFloat> &feats = feature_reader.Value();
+      int32 T = feats.NumRows();
+      if (T == 0) {
+        KALDI_WARN << "Empty features for utterance " << utt;
+        num_err++;
+        continue;
+      }
+      if (feats.NumCols() != gmm.Dim()) {
+        KALDI_WARN << "Dimension mismatch for utterance " << utt
+                   << ": got " << feats.NumCols() << ", expected " << gmm.Dim();
+        num_err++;
+        continue;
+      }
+      vector<vector<int32> > gselect(T);
+
+      Matrix<BaseFloat> loglikes;
+
+      gmm.LogLikelihoods(feats, &loglikes);
+
+      Posterior post(T);
+
+      double log_like_this_file = 0.0;
+      for (int32 t = 0; t < T; t++) {
+        SubVector<BaseFloat> loglikes_row(loglikes, t);
+        log_like_this_file += loglikes_row.ApplySoftMax();
+        std::vector<std::pair<int32, BaseFloat> > temp_post(num_gauss);
+        for (int32 g = 0; g < num_gauss; g++)
+          temp_post[g] = std::pair<int32, BaseFloat>(g, loglikes_row(g));
+        CompareReverseSecond compare;
+        // sort in decreasing order on posterior.  actually, for efficiency we
+        // first do nth_element and then sort, as we only need the part we're
+        // going to output, to be sorted.
+        std::nth_element(temp_post.begin(),
+                         temp_post.begin() + num_post, temp_post.end(),
+                         compare);
+        std::sort(temp_post.begin(), temp_post.begin() + num_post,
+                  compare);
+
+        std::vector<std::pair<int32, BaseFloat> > *output_post = &(post[t]);
+        output_post->insert(output_post->end(),
+                            temp_post.begin(), temp_post.begin() + num_post);
+        while (output_post->size() > 1 && output_post->back().second < min_post)
+          post[t].pop_back();
+        // Now renormalize.
+        BaseFloat tot = 0.0;
+        size_t size = output_post->size();
+        for (size_t i = 0; i < size; i++)
+          tot += (*output_post)[i].second;
+        BaseFloat inv_tot = 1.0 / tot;
+        for (size_t i = 0; i < size; i++)
+          (*output_post)[i].second *= inv_tot;
+      }
+      KALDI_VLOG(1) << "Processed utterance " << utt << ", average likelihood "
+                    << (log_like_this_file / T) << " over " << T << " frames";
+      tot_like += log_like_this_file;
+      tot_t += T;
+
+      post_writer.Write(utt, post);
+      num_done++;
+    }
+
+    KALDI_LOG << "Done " << num_done << " files, " << num_err
+              << " with errors, average UBM log-likelihood is "
+              << (tot_like/tot_t) << " over " << tot_t << " frames.";
+
+    if (num_done != 0) return 0;
+    else return 1;
+  } catch(const std::exception &e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}
+
+
diff --git a/src/gmmbin/gmm-gselect.cc b/src/gmmbin/gmm-gselect.cc
@@ -34,11 +34,13 @@ int main(int argc, char *argv[]) {
         " (e.g. in training UBMs, SGMMs, tied-mixture systems)\n"
         " For each frame, gives a list of the n best Gaussian indices,\n"
         " sorted from best to worst.\n"
+        "See also: gmm-global-get-post, fgmm-global-gselect-to-post,\n"
+        "copy-gselect, fgmm-gselect\n"
         "Usage: \n"
         " gmm-gselect [options] <model-in> <feature-rspecifier> <gselect-wspecifier>\n"
         "The --gselect option (which takes an rspecifier) limits selection to a subset\n"
         "of indices:\n"
-        "e.g.: gmm-gselect \"--gselect=ark:gunzip -c bigger.gselect.gz|\" --n=20 1.gmm \"ark:feature-command |\" \"ark,t:|gzip -c >1.gselect.gz\"\n";
+        "e.g.: gmm-gselect \"--gselect=ark:gunzip -c bigger.gselect.gz|\" --n=20 1.gmm \"ark:feature-command |\" \"ark,t:|gzip -c >gselect.1.gz\"\n";
 
     ParseOptions po(usage);
     int32 num_gselect = 50;
@@ -64,7 +66,6 @@ int main(int argc, char *argv[]) {
     ReadKaldiObject(model_filename, &gmm);
     KALDI_ASSERT(num_gselect > 0);
     int32 num_gauss = gmm.NumGauss();
-    KALDI_ASSERT(num_gauss);
     if (num_gselect > num_gauss) {
       KALDI_WARN << "You asked for " << num_gselect << " Gaussians but GMM "
                  << "only has " << num_gauss << ", returning this many. "

diff --git a/src/hmm/posterior.cc b/src/hmm/posterior.cc
@@ -212,6 +212,18 @@ void ScalePosterior(BaseFloat scale, Posterior *post) {
   }
 }
 
+BaseFloat TotalPosterior(const Posterior &post) {
+  double sum =  0.0;
+  size_t T = post.size();
+  for (size_t t = 0; t < T; t++) {
+    size_t I = post[t].size();
+    for (size_t i = 0; i < I; i++) {
+      sum += post[t][i].second;
+    }
+  }
+  return sum;
+}
+
 bool PosteriorEntriesAreDisjoint(
     const std::vector<std::pair<int32,BaseFloat> > &post_elem1,
     const std::vector<std::pair<int32,BaseFloat> > &post_elem2) {

diff --git a/src/hmm/posterior.h b/src/hmm/posterior.h
@@ -126,6 +126,8 @@ typedef RandomAccessTableReader<GaussPostHolder> RandomAccessGaussPostReader;
 /// Scales the BaseFloat (weight) element in the posterior entries.
 void ScalePosterior(BaseFloat scale, Posterior *post);
 
+/// Returns the total of all the weights in "post".
+BaseFloat TotalPosterior(const Posterior &post);
 
 /// Returns true if the two lists of pairs have no common .first element.
 bool PosteriorEntriesAreDisjoint(
@@ -156,6 +158,7 @@ void AlignmentToPosterior(const std::vector<int32> &ali,
 void SortPosteriorByPdfs(const TransitionModel &tmodel,
                          Posterior *post);
 
+
 /// Converts a posterior over transition-ids to be a posterior
 /// over pdf-ids.
 void ConvertPosteriorToPdfs(const TransitionModel &tmodel,

diff --git a/src/ivector/ivector-extractor-test.cc b/src/ivector/ivector-extractor-test.cc
@@ -113,11 +113,11 @@ void TestIvectorExtraction(const IvectorExtractor &extractor,
 
   extractor.GetIvectorDistribution(utt_stats, &ivector1, NULL);
 
-  online_stats.GetIvector(&ivector2);
+  online_stats.GetIvector(-1, &ivector2);
 
   KALDI_ASSERT(ivector1.ApproxEqual(ivector2));
 }
-  
+
 
 void UnitTestIvectorExtractor() {
   FullGmm fgmm;