sandbox/online: merging changes from trunk

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/online@4141 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
jtrmal · Jul 13, 2014 · 0291ad2 · 0291ad2
2 parents c4c4cbc + 78dfd1f
commit 0291ad2
Show file tree

Hide file tree

Showing 14 changed files with 386 additions and 108 deletions.
diff --git a/src/Makefile b/src/Makefile
@@ -92,7 +92,7 @@ ext: test_dependencies ext_depend $(SUBDIRS) $(EXT_SUBDIRS)
 # delete or comment out the lines below.
 OPENFST_VER = $(shell grep 'PACKAGE_VERSION' ../tools/openfst/Makefile | sed -e 's:.*= ::')
 test_dependencies:
-	@[ "$(OPENFST_VER)" == '1.2.10' ] || [ "$(OPENFST_VER)" == '1.3.2' ] || [ "$(OPENFST_VER)" == '1.3.4' ] || { echo "You now need openfst-1.2.10.  Do: cd ../tools; svn update; ./install.sh; cd ../src; make depend; make"; exit 1; };
+	[ "$(OPENFST_VER)" == '1.3.2' ] || [ "$(OPENFST_VER)" == '1.3.3' ] || [ "$(OPENFST_VER)" == '1.3.4' ] || { echo "You now need openfst-1.3.2 or later. cd ../tools; svn update; ./install.sh; cd ../src; make depend; make"; exit 1; };
 
 check_portaudio:
 	@[ -d ../tools/portaudio ] || ( cd ../tools;  ./install_portaudio.sh )

diff --git a/src/feat/feature-fbank.cc b/src/feat/feature-fbank.cc
@@ -31,6 +31,11 @@ Fbank::Fbank(const FbankOptions &opts)
   int32 padded_window_size = opts.frame_opts.PaddedWindowSize();
   if ((padded_window_size & (padded_window_size-1)) == 0)  // Is a power of two...
     srfft_ = new SplitRadixRealFft<BaseFloat>(padded_window_size);
+
+  // We'll definitely need the filterbanks info for VTLN warping factor 1.0.
+  // [note: this call caches it.]  The reason we call this here is to
+  // improve the efficiency of the "const" version of Compute().
+  GetMelBanks(1.0);
 }
 
 Fbank::~Fbank() {
@@ -56,10 +61,50 @@ const MelBanks *Fbank::GetMelBanks(BaseFloat vtln_warp) {
   return this_mel_banks;
 }
 
+const MelBanks *Fbank::GetMelBanks(BaseFloat vtln_warp,
+                                   bool *must_delete) const {
+  MelBanks *this_mel_banks = NULL;
+  std::map<BaseFloat, MelBanks*>::const_iterator iter =
+      mel_banks_.find(vtln_warp);
+  if (iter == mel_banks_.end()) {
+    this_mel_banks = new MelBanks(opts_.mel_opts,
+                                  opts_.frame_opts,
+                                  vtln_warp);
+    *must_delete = true;
+  } else {
+    this_mel_banks = iter->second;
+    *must_delete = false;
+  }
+  return this_mel_banks;
+}
+
 void Fbank::Compute(const VectorBase<BaseFloat> &wave,
-                   BaseFloat vtln_warp,
-                   Matrix<BaseFloat> *output,
-                   Vector<BaseFloat> *wave_remainder) {
+                    BaseFloat vtln_warp,
+                    Matrix<BaseFloat> *output,
+                    Vector<BaseFloat> *wave_remainder) {
+  const MelBanks *this_mel_banks = GetMelBanks(vtln_warp);
+  ComputeInternal(wave, *this_mel_banks, output, wave_remainder);  
+}
+
+void Fbank::Compute(const VectorBase<BaseFloat> &wave,
+                    BaseFloat vtln_warp,
+                    Matrix<BaseFloat> *output,
+                    Vector<BaseFloat> *wave_remainder) const {
+  bool must_delete_mel_banks;
+  const MelBanks *mel_banks = GetMelBanks(vtln_warp,
+                                          &must_delete_mel_banks);
+
+  ComputeInternal(wave, *mel_banks, output, wave_remainder);
+
+  if (must_delete_mel_banks)
+    delete mel_banks;
+}
+
+
+void Fbank::ComputeInternal(const VectorBase<BaseFloat> &wave,
+                            const MelBanks &mel_banks,
+                            Matrix<BaseFloat> *output,
+                            Vector<BaseFloat> *wave_remainder) const {
   KALDI_ASSERT(output != NULL);
 
   // Get dimensions of output features
@@ -77,6 +122,7 @@ void Fbank::Compute(const VectorBase<BaseFloat> &wave,
   // Buffers
   Vector<BaseFloat> window;  // windowed waveform.
   Vector<BaseFloat> mel_energies;
+  std::vector<BaseFloat> temp_buffer;  // used by srfft.  
   BaseFloat log_energy;
 
   // Compute all the freames, r is frame index..
@@ -90,17 +136,16 @@ void Fbank::Compute(const VectorBase<BaseFloat> &wave,
       log_energy = log(VecVec(window, window));
 
     if (srfft_ != NULL)  // Compute FFT using split-radix algorithm.
-      srfft_->Compute(window.Data(), true);
+      srfft_->Compute(window.Data(), true, &temp_buffer);
     else  // An alternative algorithm that works for non-powers-of-two.
       RealFft(&window, true);
 
     // Convert the FFT into a power spectrum.
     ComputePowerSpectrum(&window);
     SubVector<BaseFloat> power_spectrum(window, 0, window.Dim()/2 + 1);
 
-    // Integrate with MelFiterbank over power spectrum
-    const MelBanks *this_mel_banks = GetMelBanks(vtln_warp);
-    this_mel_banks->Compute(power_spectrum, &mel_energies);
+    // Sum with MelFiterbank over power spectrum
+    mel_banks.Compute(power_spectrum, &mel_energies);
     if (opts_.use_log_fbank)
       mel_energies.ApplyLog();  // take the log.
 

diff --git a/src/feat/feature-fbank.h b/src/feat/feature-fbank.h
@@ -82,16 +82,34 @@ class Fbank {
 
   int32 Dim() const { return opts_.mel_opts.num_bins; }
 
-  /// Will throw exception on failure (e.g. if file too short for
-  /// even one frame).
+  /// Will throw exception on failure (e.g. if file too short for even one
+  /// frame).  The output "wave_remainder" is the last frame or two of the
+  /// waveform that it would be necessary to include in the next call to Compute
+  /// for the same utterance.  It is not exactly the un-processed part (it may
+  /// have been partly processed), it's the start of the next window that we
+  /// have not already processed.
   void Compute(const VectorBase<BaseFloat> &wave,
                BaseFloat vtln_warp,
                Matrix<BaseFloat> *output,
                Vector<BaseFloat> *wave_remainder = NULL);
-
+
+  /// Const version of Compute()
+  void Compute(const VectorBase<BaseFloat> &wave,
+               BaseFloat vtln_warp,
+               Matrix<BaseFloat> *output,
+               Vector<BaseFloat> *wave_remainder = NULL) const;
   typedef FbankOptions Options;
  private:
+  void ComputeInternal(const VectorBase<BaseFloat> &wave,
+                       const MelBanks &mel_banks,
+                       Matrix<BaseFloat> *output,
+                       Vector<BaseFloat> *wave_remainder = NULL) const;
+
   const MelBanks *GetMelBanks(BaseFloat vtln_warp);
+
+  const MelBanks *GetMelBanks(BaseFloat vtln_warp,
+                              bool *must_delete) const;
+
   FbankOptions opts_;
   BaseFloat log_energy_floor_;
   std::map<BaseFloat, MelBanks*> mel_banks_;  // BaseFloat is VTLN coefficient.

diff --git a/src/feat/feature-mfcc.cc b/src/feat/feature-mfcc.cc
@@ -44,6 +44,11 @@ Mfcc::Mfcc(const MfccOptions &opts)
   int32 padded_window_size = opts.frame_opts.PaddedWindowSize();
   if ((padded_window_size & (padded_window_size-1)) == 0)  // Is a power of two...
     srfft_ = new SplitRadixRealFft<BaseFloat>(padded_window_size);
+
+  // We'll definitely need the filterbanks info for VTLN warping factor 1.0.
+  // [note: this call caches it.]  The reason we call this here is to
+  // improve the efficiency of the "const" version of Compute().
+  GetMelBanks(1.0);
 }
 
 Mfcc::~Mfcc() {
@@ -69,10 +74,50 @@ const MelBanks *Mfcc::GetMelBanks(BaseFloat vtln_warp) {
   return this_mel_banks;
 }
 
+
+const MelBanks *Mfcc::GetMelBanks(BaseFloat vtln_warp, bool *must_delete) const {
+  MelBanks *this_mel_banks = NULL;
+  std::map<BaseFloat, MelBanks*>::const_iterator iter =
+      mel_banks_.find(vtln_warp);
+  if (iter == mel_banks_.end()) {
+    this_mel_banks = new MelBanks(opts_.mel_opts,
+                                  opts_.frame_opts,
+                                  vtln_warp);
+    *must_delete = true;
+  } else {
+    this_mel_banks = iter->second;
+    *must_delete = false;
+  }
+  return this_mel_banks;
+}
+
+
 void Mfcc::Compute(const VectorBase<BaseFloat> &wave,
                    BaseFloat vtln_warp,
                    Matrix<BaseFloat> *output,
                    Vector<BaseFloat> *wave_remainder) {
+  const MelBanks *this_mel_banks = GetMelBanks(vtln_warp);
+  ComputeInternal(wave, *this_mel_banks, output, wave_remainder);  
+}
+
+void Mfcc::Compute(const VectorBase<BaseFloat> &wave,
+                   BaseFloat vtln_warp,
+                   Matrix<BaseFloat> *output,
+                   Vector<BaseFloat> *wave_remainder) const {
+  bool must_delete_mel_banks;
+  const MelBanks *mel_banks = GetMelBanks(vtln_warp,
+                                               &must_delete_mel_banks);
+
+  ComputeInternal(wave, *mel_banks, output, wave_remainder);
+
+  if (must_delete_mel_banks)
+    delete mel_banks;
+}
+
+void Mfcc::ComputeInternal(const VectorBase<BaseFloat> &wave,
+                           const MelBanks &mel_banks,
+                           Matrix<BaseFloat> *output,
+                           Vector<BaseFloat> *wave_remainder) const {
   KALDI_ASSERT(output != NULL);
   int32 rows_out = NumFrames(wave.Dim(), opts_.frame_opts),
       cols_out = opts_.num_ceps;
@@ -86,6 +131,7 @@ void Mfcc::Compute(const VectorBase<BaseFloat> &wave,
     ExtractWaveformRemainder(wave, opts_.frame_opts, wave_remainder);
   Vector<BaseFloat> window;  // windowed waveform.
   Vector<BaseFloat> mel_energies;
+  std::vector<BaseFloat> temp_buffer;  // used by srfft.
   for (int32 r = 0; r < rows_out; r++) {  // r is frame index..
     BaseFloat log_energy;
     ExtractWindow(wave, r, opts_.frame_opts, feature_window_function_, &window,
@@ -95,17 +141,16 @@ void Mfcc::Compute(const VectorBase<BaseFloat> &wave,
       log_energy = log(VecVec(window, window));
 
     if (srfft_ != NULL)  // Compute FFT using the split-radix algorithm.
-      srfft_->Compute(window.Data(), true);
+      srfft_->Compute(window.Data(), true, &temp_buffer);
     else  // An alternative algorithm that works for non-powers-of-two.
       RealFft(&window, true);
 
     // Convert the FFT into a power spectrum.
     ComputePowerSpectrum(&window);
     SubVector<BaseFloat> power_spectrum(window, 0, window.Dim()/2 + 1);
 
-    const MelBanks *this_mel_banks = GetMelBanks(vtln_warp);
-    this_mel_banks->Compute(power_spectrum, &mel_energies);
-
+    mel_banks.Compute(power_spectrum, &mel_energies);
+
     mel_energies.ApplyLog();  // take the log.
 
     SubVector<BaseFloat> this_mfcc(output->Row(r));

diff --git a/src/feat/feature-mfcc.h b/src/feat/feature-mfcc.h
@@ -93,16 +93,30 @@ class Mfcc {
   /// waveform that it would be necessary to include in the next call to Compute
   /// for the same utterance.  It is not exactly the un-processed part (it may
   /// have been partly processed), it's the start of the next window that we
-  /// have not already processed.  Will throw exception on failure (e.g. if file
-  /// too short for even one frame).
+  /// have not already processed.
   void Compute(const VectorBase<BaseFloat> &wave,
                BaseFloat vtln_warp,
                Matrix<BaseFloat> *output,
                Vector<BaseFloat> *wave_remainder = NULL);
 
   typedef MfccOptions Options;
+  /// Const version of Compute()
+  void Compute(const VectorBase<BaseFloat> &wave,
+               BaseFloat vtln_warp,
+               Matrix<BaseFloat> *output,
+               Vector<BaseFloat> *wave_remainder = NULL) const;
+
  private:
+  void ComputeInternal(const VectorBase<BaseFloat> &wave,
+                       const MelBanks &mel_banks,
+                       Matrix<BaseFloat> *output,
+                       Vector<BaseFloat> *wave_remainder = NULL) const;
+
   const MelBanks *GetMelBanks(BaseFloat vtln_warp);
+
+  const MelBanks *GetMelBanks(BaseFloat vtln_warp,
+                              bool *must_delete) const;
+
   MfccOptions opts_;
   Vector<BaseFloat> lifter_coeffs_;
   Matrix<BaseFloat> dct_matrix_;  // matrix we left-multiply by to perform DCT.