Skip to content

Commit

Permalink
add plp
Browse files Browse the repository at this point in the history
  • Loading branch information
takenori-y committed Jan 8, 2024
1 parent 629fe76 commit 97438b4
Show file tree
Hide file tree
Showing 11 changed files with 895 additions and 10 deletions.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ set(CC_SOURCES
${SOURCE_DIR}/analysis/mel_filter_bank_analysis.cc
${SOURCE_DIR}/analysis/mel_frequency_cepstral_coefficients_analysis.cc
${SOURCE_DIR}/analysis/mel_generalized_cepstral_analysis.cc
${SOURCE_DIR}/analysis/perceptual_linear_predictive_coefficients_analysis.cc
${SOURCE_DIR}/analysis/pitch_extraction.cc
${SOURCE_DIR}/analysis/pitch_extraction_by_dio.cc
${SOURCE_DIR}/analysis/pitch_extraction_by_harvest.cc
Expand Down Expand Up @@ -177,7 +178,9 @@ set(CC_SOURCES
${SOURCE_DIR}/math/gaussian_mixture_modeling.cc
${SOURCE_DIR}/math/histogram_calculation.cc
${SOURCE_DIR}/math/inverse_discrete_cosine_transform.cc
${SOURCE_DIR}/math/inverse_discrete_fourier_transform.cc
${SOURCE_DIR}/math/inverse_fast_fourier_transform.cc
${SOURCE_DIR}/math/inverse_fourier_transform.cc
${SOURCE_DIR}/math/levinson_durbin_recursion.cc
${SOURCE_DIR}/math/matrix.cc
${SOURCE_DIR}/math/matrix2d.cc
Expand Down Expand Up @@ -352,6 +355,7 @@ set(MAIN_SOURCES
${SOURCE_DIR}/main/pca.cc
${SOURCE_DIR}/main/pcas.cc
${SOURCE_DIR}/main/phase.cc
${SOURCE_DIR}/main/plp.cc
${SOURCE_DIR}/main/pitch.cc
${SOURCE_DIR}/main/pitch2sin.cc
${SOURCE_DIR}/main/pitch_mark.cc
Expand Down
2 changes: 1 addition & 1 deletion doc/main/mfcc.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ mfcc

.. doxygenfile:: mfcc.cc

.. seealso:: :ref:`fbank`
.. seealso:: :ref:`fbank` :ref:`plp`

.. doxygenclass:: sptk::MelFrequencyCepstralCoefficientsAnalysis
:members:
11 changes: 11 additions & 0 deletions doc/main/plp.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
.. _plp:

plp
===

.. doxygenfile:: plp.cc

.. seealso:: :ref:`fbank` :ref:`mfcc`

.. doxygenclass:: sptk::PerceptualLinearPredictiveCoefficientsAnalysis
:members:
6 changes: 6 additions & 0 deletions include/SPTK/analysis/mel_filter_bank_analysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,11 @@ class MelFilterBankAnalysis {
return is_valid_;
}

/**
* @return Center frequencies in Hz.
*/
bool GetCenterFrequencies(std::vector<double>* center_frequencies) const;

/**
* @param[in] power_spectrum @f$(N/2+1)@f$-length power spectrum.
* @param[out] filter_bank_output @f$C@f$-channel filter-bank outputs.
Expand All @@ -123,6 +128,7 @@ class MelFilterBankAnalysis {

int lower_bin_index_;
int upper_bin_index_;
std::vector<double> center_frequencies_;
std::vector<int> channel_indices_;
std::vector<double> channel_weights_;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
// ------------------------------------------------------------------------ //
// Copyright 2021 SPTK Working Group //
// //
// Licensed under the Apache License, Version 2.0 (the "License"); //
// you may not use this file except in compliance with the License. //
// You may obtain a copy of the License at //
// //
// http://www.apache.org/licenses/LICENSE-2.0 //
// //
// Unless required by applicable law or agreed to in writing, software //
// distributed under the License is distributed on an "AS IS" BASIS, //
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
// See the License for the specific language governing permissions and //
// limitations under the License. //
// ------------------------------------------------------------------------ //

#ifndef SPTK_ANALYSIS_PERCEPTUAL_LINEAR_PREDICTIVE_COEFFICIENTS_ANALYSIS_H_
#define SPTK_ANALYSIS_PERCEPTUAL_LINEAR_PREDICTIVE_COEFFICIENTS_ANALYSIS_H_

#include <vector> // std::vector

#include "SPTK/analysis/mel_filter_bank_analysis.h"
#include "SPTK/conversion/linear_predictive_coefficients_to_cepstrum.h"
#include "SPTK/math/inverse_fourier_transform.h"
#include "SPTK/math/levinson_durbin_recursion.h"
#include "SPTK/utils/sptk_utils.h"

namespace sptk {

/**
* Perform perceptual linear predictive (PLP) coefficients analysis.
*
* The input is the half part of power spectrum:
* @f[
* \begin{array}{cccc}
* |X(0)|^2, & |X(1)|^2, & \ldots, & |X(N/2)|^2,
* \end{array}
* @f]
* where @f$N@f$ is the FFT length. The outputs are the @f$M@f$-th order PLP
* features with the zeroth cepstral parameter:
* @f[
* \begin{array}{ccccc}
* c(0), & \bar{c}(1), & \bar{c}(2), & \ldots, & \bar{c}(M)
* \end{array}
* @f]
* and the log-signal energy @f$E@f$.
*
* [1] S. Young et al., &quot;The HTK book,&quot; Cambridge University
* Engineering Department, 2006.
*/
class PerceptualLinearPredictiveCoefficientsAnalysis {
public:
/**
* Buffer for PerceptualLinearPredictiveCoefficientsAnalysis class.
*/
class Buffer {
public:
Buffer() {
}

virtual ~Buffer() {
}

private:
std::vector<double> filter_bank_output_;
std::vector<double> spectrum_;
std::vector<double> cepstrum_;

std::vector<double> real_part_input_;
std::vector<double> real_part_output_;
std::vector<double> imag_part_input_;
std::vector<double> imag_part_output_;

LevinsonDurbinRecursion::Buffer buffer_for_levinson_durbin_recursion_;

friend class PerceptualLinearPredictiveCoefficientsAnalysis;
DISALLOW_COPY_AND_ASSIGN(Buffer);
};

/**
* @param[in] fft_length Number of FFT bins, @f$N@f$.
* @param[in] num_channel Number of channels, @f$C@f$.
* @param[in] num_order Order of cepstral coefficients, @f$M@f$.
* @param[in] liftering_coefficient A parameter of liftering, @f$L@f$.
* @param[in] sampling_rate Sampling rate in Hz.
* @param[in] lowest_frequency Lowest frequency in Hz.
* @param[in] highest_frequency Highest frequency in Hz.
* @param[in] floor Floor value of raw filter-bank output.
* @param[in] compression_factor Amplitude compression factor.
*/
PerceptualLinearPredictiveCoefficientsAnalysis(
int fft_length, int num_channel, int num_order, int liftering_coefficient,
double sampling_rate, double lowest_frequency, double highest_frequency,
double floor, double compression_factor);

virtual ~PerceptualLinearPredictiveCoefficientsAnalysis() {
}

/**
* @return FFT size.
*/
int GetFftLength() const {
return mel_filter_bank_analysis_.GetFftLength();
}

/**
* @return Number of channels.
*/
int GetNumChannel() const {
return mel_filter_bank_analysis_.GetNumChannel();
}

/**
* @return Order of cepstral coefficients.
*/
int GetNumOrder() const {
return levinson_durbin_recursion_.GetNumOrder();
}

/**
* @return Liftering coefficient.
*/
int GetLifteringCoefficient() const {
return liftering_coefficient_;
}

/**
* @return Compression factor.
*/
double GetCompressionFactor() const {
return compression_factor_;
}

/**
* @return True if this object is valid.
*/
bool IsValid() const {
return is_valid_;
}

/**
* @param[in] power_spectrum @f$(N/2+1)@f$-length power spectrum.
* @param[out] plp @f$M@f$-th order PLP features.
* @param[out] energy Signal energy @f$E@f$ (optional).
* @param[out] buffer Buffer.
* @return True on success, false on failure.
*/
bool Run(
const std::vector<double>& power_spectrum, std::vector<double>* plp,
double* energy,
PerceptualLinearPredictiveCoefficientsAnalysis::Buffer* buffer) const;

private:
const int liftering_coefficient_;
const double compression_factor_;

const MelFilterBankAnalysis mel_filter_bank_analysis_;
const InverseFourierTransform inverse_fourier_transform_;
const LevinsonDurbinRecursion levinson_durbin_recursion_;
const LinearPredictiveCoefficientsToCepstrum
linear_predictive_coefficients_to_cepstrum_;

bool is_valid_;

std::vector<double> equal_loudness_curve_;
std::vector<double> cepstal_weights_;

DISALLOW_COPY_AND_ASSIGN(PerceptualLinearPredictiveCoefficientsAnalysis);
};

} // namespace sptk

#endif // SPTK_ANALYSIS_PERCEPTUAL_LINEAR_PREDICTIVE_COEFFICIENTS_ANALYSIS_H_
31 changes: 26 additions & 5 deletions src/analysis/mel_filter_bank_analysis.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,19 @@
#include "SPTK/analysis/mel_filter_bank_analysis.h"

#include <algorithm> // std::fill, std::max, std::min
#include <cmath> // std::log, std::sqrt
#include <cmath> // std::exp, std::log, std::sqrt
#include <cstddef> // std::size_t
#include <numeric> // std::accumulate

namespace {

// Note that HTK use 1127 instead of 1127.01048.
double HzToMel(double hz) {
// return 1127.01048 * std::log(hz / 700.0 + 1.0);
return 1127 * std::log(hz / 700.0 + 1.0);
return 1127.0 * std::log(hz / 700.0 + 1.0);
}

double MelToHz(double mel) {
return 700.0 * (std::exp(mel / 1127.0) - 1.0);
}

double SampleMel(int index, int fft_length, double sampling_rate) {
Expand Down Expand Up @@ -74,8 +77,8 @@ MelFilterBankAnalysis::MelFilterBankAnalysis(int fft_length, int num_channel,
const double mel_high(HzToMel(highest_frequency));

// Create vector of filter-bank center frequencies.
std::vector<double> center_frequencies(num_channel_ + 1);
double* cf(&(center_frequencies[0]));
center_frequencies_.resize(num_channel_ + 1);
double* cf(&(center_frequencies_[0]));
{
const double diff(mel_high - mel_low);
for (int m(0); m <= num_channel_; ++m) {
Expand Down Expand Up @@ -108,6 +111,24 @@ MelFilterBankAnalysis::MelFilterBankAnalysis(int fft_length, int num_channel,
}
}

bool MelFilterBankAnalysis::GetCenterFrequencies(
std::vector<double>* center_frequencies) const {
if (!is_valid_ || NULL == center_frequencies) {
return false;
}

if (center_frequencies->size() !=
static_cast<std::size_t>(num_channel_ + 1)) {
center_frequencies->resize(num_channel_ + 1);
}

for (int m(0); m <= num_channel_; ++m) {
(*center_frequencies)[m] = MelToHz(center_frequencies_[m]);
}

return true;
}

bool MelFilterBankAnalysis::Run(const std::vector<double>& power_spectrum,
std::vector<double>* filter_bank_output,
double* energy) const {
Expand Down
Loading

0 comments on commit 97438b4

Please sign in to comment.