Skip to content

Commit

Permalink
sandbox/online: merging changes from trunk
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/online@4141 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
  • Loading branch information
danpovey committed Jul 13, 2014
2 parents c4c4cbc + 78dfd1f commit 0291ad2
Show file tree
Hide file tree
Showing 14 changed files with 386 additions and 108 deletions.
2 changes: 1 addition & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ ext: test_dependencies ext_depend $(SUBDIRS) $(EXT_SUBDIRS)
# delete or comment out the lines below.
OPENFST_VER = $(shell grep 'PACKAGE_VERSION' ../tools/openfst/Makefile | sed -e 's:.*= ::')
test_dependencies:
@[ "$(OPENFST_VER)" == '1.2.10' ] || [ "$(OPENFST_VER)" == '1.3.2' ] || [ "$(OPENFST_VER)" == '1.3.4' ] || { echo "You now need openfst-1.2.10. Do: cd ../tools; svn update; ./install.sh; cd ../src; make depend; make"; exit 1; };
[ "$(OPENFST_VER)" == '1.3.2' ] || [ "$(OPENFST_VER)" == '1.3.3' ] || [ "$(OPENFST_VER)" == '1.3.4' ] || { echo "You now need openfst-1.3.2 or later. cd ../tools; svn update; ./install.sh; cd ../src; make depend; make"; exit 1; };

check_portaudio:
@[ -d ../tools/portaudio ] || ( cd ../tools; ./install_portaudio.sh )
Expand Down
59 changes: 52 additions & 7 deletions src/feat/feature-fbank.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ Fbank::Fbank(const FbankOptions &opts)
int32 padded_window_size = opts.frame_opts.PaddedWindowSize();
if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two...
srfft_ = new SplitRadixRealFft<BaseFloat>(padded_window_size);

// We'll definitely need the filterbanks info for VTLN warping factor 1.0.
// [note: this call caches it.] The reason we call this here is to
// improve the efficiency of the "const" version of Compute().
GetMelBanks(1.0);
}

Fbank::~Fbank() {
Expand All @@ -56,10 +61,50 @@ const MelBanks *Fbank::GetMelBanks(BaseFloat vtln_warp) {
return this_mel_banks;
}

const MelBanks *Fbank::GetMelBanks(BaseFloat vtln_warp,
bool *must_delete) const {
MelBanks *this_mel_banks = NULL;
std::map<BaseFloat, MelBanks*>::const_iterator iter =
mel_banks_.find(vtln_warp);
if (iter == mel_banks_.end()) {
this_mel_banks = new MelBanks(opts_.mel_opts,
opts_.frame_opts,
vtln_warp);
*must_delete = true;
} else {
this_mel_banks = iter->second;
*must_delete = false;
}
return this_mel_banks;
}

void Fbank::Compute(const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output,
Vector<BaseFloat> *wave_remainder) {
BaseFloat vtln_warp,
Matrix<BaseFloat> *output,
Vector<BaseFloat> *wave_remainder) {
const MelBanks *this_mel_banks = GetMelBanks(vtln_warp);
ComputeInternal(wave, *this_mel_banks, output, wave_remainder);
}

void Fbank::Compute(const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output,
Vector<BaseFloat> *wave_remainder) const {
bool must_delete_mel_banks;
const MelBanks *mel_banks = GetMelBanks(vtln_warp,
&must_delete_mel_banks);

ComputeInternal(wave, *mel_banks, output, wave_remainder);

if (must_delete_mel_banks)
delete mel_banks;
}


void Fbank::ComputeInternal(const VectorBase<BaseFloat> &wave,
const MelBanks &mel_banks,
Matrix<BaseFloat> *output,
Vector<BaseFloat> *wave_remainder) const {
KALDI_ASSERT(output != NULL);

// Get dimensions of output features
Expand All @@ -77,6 +122,7 @@ void Fbank::Compute(const VectorBase<BaseFloat> &wave,
// Buffers
Vector<BaseFloat> window; // windowed waveform.
Vector<BaseFloat> mel_energies;
std::vector<BaseFloat> temp_buffer; // used by srfft.
BaseFloat log_energy;

// Compute all the freames, r is frame index..
Expand All @@ -90,17 +136,16 @@ void Fbank::Compute(const VectorBase<BaseFloat> &wave,
log_energy = log(VecVec(window, window));

if (srfft_ != NULL) // Compute FFT using split-radix algorithm.
srfft_->Compute(window.Data(), true);
srfft_->Compute(window.Data(), true, &temp_buffer);
else // An alternative algorithm that works for non-powers-of-two.
RealFft(&window, true);

// Convert the FFT into a power spectrum.
ComputePowerSpectrum(&window);
SubVector<BaseFloat> power_spectrum(window, 0, window.Dim()/2 + 1);

// Integrate with MelFiterbank over power spectrum
const MelBanks *this_mel_banks = GetMelBanks(vtln_warp);
this_mel_banks->Compute(power_spectrum, &mel_energies);
// Sum with MelFiterbank over power spectrum
mel_banks.Compute(power_spectrum, &mel_energies);
if (opts_.use_log_fbank)
mel_energies.ApplyLog(); // take the log.

Expand Down
24 changes: 21 additions & 3 deletions src/feat/feature-fbank.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,16 +82,34 @@ class Fbank {

int32 Dim() const { return opts_.mel_opts.num_bins; }

/// Will throw exception on failure (e.g. if file too short for
/// even one frame).
/// Will throw exception on failure (e.g. if file too short for even one
/// frame). The output "wave_remainder" is the last frame or two of the
/// waveform that it would be necessary to include in the next call to Compute
/// for the same utterance. It is not exactly the un-processed part (it may
/// have been partly processed), it's the start of the next window that we
/// have not already processed.
void Compute(const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output,
Vector<BaseFloat> *wave_remainder = NULL);


/// Const version of Compute()
void Compute(const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output,
Vector<BaseFloat> *wave_remainder = NULL) const;
typedef FbankOptions Options;
private:
void ComputeInternal(const VectorBase<BaseFloat> &wave,
const MelBanks &mel_banks,
Matrix<BaseFloat> *output,
Vector<BaseFloat> *wave_remainder = NULL) const;

const MelBanks *GetMelBanks(BaseFloat vtln_warp);

const MelBanks *GetMelBanks(BaseFloat vtln_warp,
bool *must_delete) const;

FbankOptions opts_;
BaseFloat log_energy_floor_;
std::map<BaseFloat, MelBanks*> mel_banks_; // BaseFloat is VTLN coefficient.
Expand Down
53 changes: 49 additions & 4 deletions src/feat/feature-mfcc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ Mfcc::Mfcc(const MfccOptions &opts)
int32 padded_window_size = opts.frame_opts.PaddedWindowSize();
if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two...
srfft_ = new SplitRadixRealFft<BaseFloat>(padded_window_size);

// We'll definitely need the filterbanks info for VTLN warping factor 1.0.
// [note: this call caches it.] The reason we call this here is to
// improve the efficiency of the "const" version of Compute().
GetMelBanks(1.0);
}

Mfcc::~Mfcc() {
Expand All @@ -69,10 +74,50 @@ const MelBanks *Mfcc::GetMelBanks(BaseFloat vtln_warp) {
return this_mel_banks;
}


const MelBanks *Mfcc::GetMelBanks(BaseFloat vtln_warp, bool *must_delete) const {
MelBanks *this_mel_banks = NULL;
std::map<BaseFloat, MelBanks*>::const_iterator iter =
mel_banks_.find(vtln_warp);
if (iter == mel_banks_.end()) {
this_mel_banks = new MelBanks(opts_.mel_opts,
opts_.frame_opts,
vtln_warp);
*must_delete = true;
} else {
this_mel_banks = iter->second;
*must_delete = false;
}
return this_mel_banks;
}


void Mfcc::Compute(const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output,
Vector<BaseFloat> *wave_remainder) {
const MelBanks *this_mel_banks = GetMelBanks(vtln_warp);
ComputeInternal(wave, *this_mel_banks, output, wave_remainder);
}

void Mfcc::Compute(const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output,
Vector<BaseFloat> *wave_remainder) const {
bool must_delete_mel_banks;
const MelBanks *mel_banks = GetMelBanks(vtln_warp,
&must_delete_mel_banks);

ComputeInternal(wave, *mel_banks, output, wave_remainder);

if (must_delete_mel_banks)
delete mel_banks;
}

void Mfcc::ComputeInternal(const VectorBase<BaseFloat> &wave,
const MelBanks &mel_banks,
Matrix<BaseFloat> *output,
Vector<BaseFloat> *wave_remainder) const {
KALDI_ASSERT(output != NULL);
int32 rows_out = NumFrames(wave.Dim(), opts_.frame_opts),
cols_out = opts_.num_ceps;
Expand All @@ -86,6 +131,7 @@ void Mfcc::Compute(const VectorBase<BaseFloat> &wave,
ExtractWaveformRemainder(wave, opts_.frame_opts, wave_remainder);
Vector<BaseFloat> window; // windowed waveform.
Vector<BaseFloat> mel_energies;
std::vector<BaseFloat> temp_buffer; // used by srfft.
for (int32 r = 0; r < rows_out; r++) { // r is frame index..
BaseFloat log_energy;
ExtractWindow(wave, r, opts_.frame_opts, feature_window_function_, &window,
Expand All @@ -95,17 +141,16 @@ void Mfcc::Compute(const VectorBase<BaseFloat> &wave,
log_energy = log(VecVec(window, window));

if (srfft_ != NULL) // Compute FFT using the split-radix algorithm.
srfft_->Compute(window.Data(), true);
srfft_->Compute(window.Data(), true, &temp_buffer);
else // An alternative algorithm that works for non-powers-of-two.
RealFft(&window, true);

// Convert the FFT into a power spectrum.
ComputePowerSpectrum(&window);
SubVector<BaseFloat> power_spectrum(window, 0, window.Dim()/2 + 1);

const MelBanks *this_mel_banks = GetMelBanks(vtln_warp);
this_mel_banks->Compute(power_spectrum, &mel_energies);

mel_banks.Compute(power_spectrum, &mel_energies);

mel_energies.ApplyLog(); // take the log.

SubVector<BaseFloat> this_mfcc(output->Row(r));
Expand Down
18 changes: 16 additions & 2 deletions src/feat/feature-mfcc.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,16 +93,30 @@ class Mfcc {
/// waveform that it would be necessary to include in the next call to Compute
/// for the same utterance. It is not exactly the un-processed part (it may
/// have been partly processed), it's the start of the next window that we
/// have not already processed. Will throw exception on failure (e.g. if file
/// too short for even one frame).
/// have not already processed.
void Compute(const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output,
Vector<BaseFloat> *wave_remainder = NULL);

typedef MfccOptions Options;
/// Const version of Compute()
void Compute(const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output,
Vector<BaseFloat> *wave_remainder = NULL) const;

private:
void ComputeInternal(const VectorBase<BaseFloat> &wave,
const MelBanks &mel_banks,
Matrix<BaseFloat> *output,
Vector<BaseFloat> *wave_remainder = NULL) const;

const MelBanks *GetMelBanks(BaseFloat vtln_warp);

const MelBanks *GetMelBanks(BaseFloat vtln_warp,
bool *must_delete) const;

MfccOptions opts_;
Vector<BaseFloat> lifter_coeffs_;
Matrix<BaseFloat> dct_matrix_; // matrix we left-multiply by to perform DCT.
Expand Down
Loading

0 comments on commit 0291ad2

Please sign in to comment.