Skip to content

Commit

Permalink
Merge pull request #55 from sp-nitech/plp
Browse files Browse the repository at this point in the history
Add plp
  • Loading branch information
takenori-y committed Jan 9, 2024
2 parents e0dcbae + 3ce1318 commit a7ad25f
Show file tree
Hide file tree
Showing 21 changed files with 1,313 additions and 19 deletions.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ set(CC_SOURCES
${SOURCE_DIR}/analysis/mel_filter_bank_analysis.cc
${SOURCE_DIR}/analysis/mel_frequency_cepstral_coefficients_analysis.cc
${SOURCE_DIR}/analysis/mel_generalized_cepstral_analysis.cc
${SOURCE_DIR}/analysis/perceptual_linear_predictive_coefficients_analysis.cc
${SOURCE_DIR}/analysis/pitch_extraction.cc
${SOURCE_DIR}/analysis/pitch_extraction_by_dio.cc
${SOURCE_DIR}/analysis/pitch_extraction_by_harvest.cc
Expand Down Expand Up @@ -177,7 +178,9 @@ set(CC_SOURCES
${SOURCE_DIR}/math/gaussian_mixture_modeling.cc
${SOURCE_DIR}/math/histogram_calculation.cc
${SOURCE_DIR}/math/inverse_discrete_cosine_transform.cc
${SOURCE_DIR}/math/inverse_discrete_fourier_transform.cc
${SOURCE_DIR}/math/inverse_fast_fourier_transform.cc
${SOURCE_DIR}/math/inverse_fourier_transform.cc
${SOURCE_DIR}/math/levinson_durbin_recursion.cc
${SOURCE_DIR}/math/matrix.cc
${SOURCE_DIR}/math/matrix2d.cc
Expand Down Expand Up @@ -352,6 +355,7 @@ set(MAIN_SOURCES
${SOURCE_DIR}/main/pca.cc
${SOURCE_DIR}/main/pcas.cc
${SOURCE_DIR}/main/phase.cc
${SOURCE_DIR}/main/plp.cc
${SOURCE_DIR}/main/pitch.cc
${SOURCE_DIR}/main/pitch2sin.cc
${SOURCE_DIR}/main/pitch_mark.cc
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ Changes from SPTK3
- Nonrecursive MLPG (`mlpg -R 1`)
- Pitch adaptive spectrum estimation (`pitch_spec`)
- Pitch extraction by DIO used in WORLD (`pitch -a 3`)
- PLP extraction (`plp`)
- Pole-zero plot (`gpolezero`)
- Scalar quantization (`quantize` and `dequantize`)
- Sinusoidal generation from pitch (`pitch2sin`)
Expand Down
2 changes: 1 addition & 1 deletion doc/main/mfcc.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ mfcc

.. doxygenfile:: mfcc.cc

.. seealso:: :ref:`fbank`
.. seealso:: :ref:`fbank` :ref:`plp`

.. doxygenclass:: sptk::MelFrequencyCepstralCoefficientsAnalysis
:members:
11 changes: 11 additions & 0 deletions doc/main/plp.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
.. _plp:

plp
===

.. doxygenfile:: plp.cc

.. seealso:: :ref:`fbank` :ref:`mfcc`

.. doxygenclass:: sptk::PerceptualLinearPredictiveCoefficientsAnalysis
:members:
6 changes: 6 additions & 0 deletions include/SPTK/analysis/mel_filter_bank_analysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,11 @@ class MelFilterBankAnalysis {
return is_valid_;
}

/**
* @return Center frequencies in Hz.
*/
bool GetCenterFrequencies(std::vector<double>* center_frequencies) const;

/**
* @param[in] power_spectrum @f$(N/2+1)@f$-length power spectrum.
* @param[out] filter_bank_output @f$C@f$-channel filter-bank outputs.
Expand All @@ -123,6 +128,7 @@ class MelFilterBankAnalysis {

int lower_bin_index_;
int upper_bin_index_;
std::vector<double> center_frequencies_;
std::vector<int> channel_indices_;
std::vector<double> channel_weights_;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
// ------------------------------------------------------------------------ //
// Copyright 2021 SPTK Working Group //
// //
// Licensed under the Apache License, Version 2.0 (the "License"); //
// you may not use this file except in compliance with the License. //
// You may obtain a copy of the License at //
// //
// http://www.apache.org/licenses/LICENSE-2.0 //
// //
// Unless required by applicable law or agreed to in writing, software //
// distributed under the License is distributed on an "AS IS" BASIS, //
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
// See the License for the specific language governing permissions and //
// limitations under the License. //
// ------------------------------------------------------------------------ //

#ifndef SPTK_ANALYSIS_PERCEPTUAL_LINEAR_PREDICTIVE_COEFFICIENTS_ANALYSIS_H_
#define SPTK_ANALYSIS_PERCEPTUAL_LINEAR_PREDICTIVE_COEFFICIENTS_ANALYSIS_H_

#include <vector> // std::vector

#include "SPTK/analysis/mel_filter_bank_analysis.h"
#include "SPTK/conversion/linear_predictive_coefficients_to_cepstrum.h"
#include "SPTK/math/inverse_fourier_transform.h"
#include "SPTK/math/levinson_durbin_recursion.h"
#include "SPTK/utils/sptk_utils.h"

namespace sptk {

/**
* Perform perceptual linear predictive (PLP) coefficients analysis.
*
* The input is the half part of power spectrum:
* @f[
* \begin{array}{cccc}
* |X(0)|^2, & |X(1)|^2, & \ldots, & |X(N/2)|^2,
* \end{array}
* @f]
* where @f$N@f$ is the FFT length. The outputs are the @f$M@f$-th order PLP
* features with the zeroth cepstral parameter:
* @f[
* \begin{array}{ccccc}
* c(0), & \bar{c}(1), & \bar{c}(2), & \ldots, & \bar{c}(M)
* \end{array}
* @f]
* and the log-signal energy @f$E@f$.
*
* [1] S. Young et al., &quot;The HTK book,&quot; Cambridge University
* Engineering Department, 2006.
*/
class PerceptualLinearPredictiveCoefficientsAnalysis {
public:
/**
* Buffer for PerceptualLinearPredictiveCoefficientsAnalysis class.
*/
class Buffer {
public:
Buffer() {
}

virtual ~Buffer() {
}

private:
std::vector<double> filter_bank_output_;
std::vector<double> spectrum_;
std::vector<double> cepstrum_;

std::vector<double> real_part_input_;
std::vector<double> real_part_output_;
std::vector<double> imag_part_input_;
std::vector<double> imag_part_output_;

LevinsonDurbinRecursion::Buffer buffer_for_levinson_durbin_recursion_;

friend class PerceptualLinearPredictiveCoefficientsAnalysis;
DISALLOW_COPY_AND_ASSIGN(Buffer);
};

/**
* @param[in] fft_length Number of FFT bins, @f$N@f$.
* @param[in] num_channel Number of channels, @f$C@f$.
* @param[in] num_order Order of cepstral coefficients, @f$M@f$.
* @param[in] liftering_coefficient A parameter of liftering, @f$L@f$.
* @param[in] compression_factor Amplitude compression factor.
* @param[in] sampling_rate Sampling rate in Hz.
* @param[in] lowest_frequency Lowest frequency in Hz.
* @param[in] highest_frequency Highest frequency in Hz.
* @param[in] floor Floor value of raw filter-bank output.
*/
PerceptualLinearPredictiveCoefficientsAnalysis(
int fft_length, int num_channel, int num_order, int liftering_coefficient,
double compression_factor, double sampling_rate, double lowest_frequency,
double highest_frequency, double floor);

virtual ~PerceptualLinearPredictiveCoefficientsAnalysis() {
}

/**
* @return FFT size.
*/
int GetFftLength() const {
return mel_filter_bank_analysis_.GetFftLength();
}

/**
* @return Number of channels.
*/
int GetNumChannel() const {
return mel_filter_bank_analysis_.GetNumChannel();
}

/**
* @return Order of cepstral coefficients.
*/
int GetNumOrder() const {
return levinson_durbin_recursion_.GetNumOrder();
}

/**
* @return Liftering coefficient.
*/
int GetLifteringCoefficient() const {
return liftering_coefficient_;
}

/**
* @return Compression factor.
*/
double GetCompressionFactor() const {
return compression_factor_;
}

/**
* @return True if this object is valid.
*/
bool IsValid() const {
return is_valid_;
}

/**
* @param[in] power_spectrum @f$(N/2+1)@f$-length power spectrum.
* @param[out] plp @f$M@f$-th order PLP features.
* @param[out] energy Signal energy @f$E@f$ (optional).
* @param[out] buffer Buffer.
* @return True on success, false on failure.
*/
bool Run(
const std::vector<double>& power_spectrum, std::vector<double>* plp,
double* energy,
PerceptualLinearPredictiveCoefficientsAnalysis::Buffer* buffer) const;

private:
const int liftering_coefficient_;
const double compression_factor_;

const MelFilterBankAnalysis mel_filter_bank_analysis_;
const InverseFourierTransform inverse_fourier_transform_;
const LevinsonDurbinRecursion levinson_durbin_recursion_;
const LinearPredictiveCoefficientsToCepstrum
linear_predictive_coefficients_to_cepstrum_;

bool is_valid_;

std::vector<double> equal_loudness_curve_;
std::vector<double> cepstal_weights_;

DISALLOW_COPY_AND_ASSIGN(PerceptualLinearPredictiveCoefficientsAnalysis);
};

} // namespace sptk

#endif // SPTK_ANALYSIS_PERCEPTUAL_LINEAR_PREDICTIVE_COEFFICIENTS_ANALYSIS_H_
3 changes: 3 additions & 0 deletions include/SPTK/math/discrete_fourier_transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ class DiscreteFourierTransform {

bool is_valid_;

std::vector<double> sine_table_;
std::vector<double> cosine_table_;

DISALLOW_COPY_AND_ASSIGN(DiscreteFourierTransform);
};

Expand Down
2 changes: 1 addition & 1 deletion include/SPTK/math/fast_fourier_transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
namespace sptk {

/**
* Calculate DFT of complex-valued input data.
* Calculate FFT of complex-valued input data.
*
* The inputs are @f$M@f$-th order complex-valued data:
* @f[
Expand Down
4 changes: 2 additions & 2 deletions include/SPTK/math/fourier_transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@ class FourierTransform {
}

/**
* @param[in,out] real_part Real part.
* @param[in,out] imag_part Imaginary part.
* @param[in,out] real_part @f$L@f$-length real part.
* @param[in,out] imag_part @f$L@f$-length imaginary part.
* @return True on success, false on failure.
*/
bool Run(std::vector<double>* real_part,
Expand Down
106 changes: 106 additions & 0 deletions include/SPTK/math/inverse_discrete_fourier_transform.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
// ------------------------------------------------------------------------ //
// Copyright 2021 SPTK Working Group //
// //
// Licensed under the Apache License, Version 2.0 (the "License"); //
// you may not use this file except in compliance with the License. //
// You may obtain a copy of the License at //
// //
// http://www.apache.org/licenses/LICENSE-2.0 //
// //
// Unless required by applicable law or agreed to in writing, software //
// distributed under the License is distributed on an "AS IS" BASIS, //
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
// See the License for the specific language governing permissions and //
// limitations under the License. //
// ------------------------------------------------------------------------ //

#ifndef SPTK_MATH_INVERSE_DISCRETE_FOURIER_TRANSFORM_H_
#define SPTK_MATH_INVERSE_DISCRETE_FOURIER_TRANSFORM_H_

#include <vector> // std::vector

#include "SPTK/math/discrete_fourier_transform.h"
#include "SPTK/utils/sptk_utils.h"

namespace sptk {

/**
* Calculate inverse DFT of complex-valued input data.
*
* The inputs are @f$L@f$-length complex-valued data:
* @f[
* \begin{array}{cccc}
* \mathrm{Re}(X(0)), & \mathrm{Re}(X(1)), & \ldots, & \mathrm{Re}(X(L-1)), \\
* \mathrm{Im}(X(0)), & \mathrm{Im}(X(1)), & \ldots, & \mathrm{Im}(X(L-1)).
* \end{array}
* @f]
* The outputs are
* @f[
* \begin{array}{cccc}
* \mathrm{Re}(x(0)), & \mathrm{Re}(x(1)), & \ldots, & \mathrm{Re}(x(L-1)), \\
* \mathrm{Im}(x(0)), & \mathrm{Im}(x(1)), & \ldots, & \mathrm{Im}(x(L-1)).
* \end{array}
* @f]
* They are computed as
* @f[
* x(n) = \frac{1}{L} \sum_{n=0}^{L-1} X(k) e^{j2\pi nk / L}.
* @f]
*/
class InverseDiscreteFourierTransform {
public:
/**
* @param[in] dft_length DFT length, @f$L@f$.
*/
explicit InverseDiscreteFourierTransform(int dft_length);

virtual ~InverseDiscreteFourierTransform() {
}

/**
* @return DFT length.
*/
int GetDftLength() const {
return dft_length_;
}

/**
* @return True if this object is valid.
*/
bool IsValid() const {
return is_valid_;
}

/**
* @param[in] real_part_input @f$L@f$-length real part of input.
* @param[in] imag_part_input @f$L@f$-length imaginary part of input.
* @param[out] real_part_output @f$L@f$-length real part of output.
* @param[out] imag_part_output @f$L@f$-length iaginary part of output.
* @return True on success, false on failure.
*/
bool Run(const std::vector<double>& real_part_input,
const std::vector<double>& imag_part_input,
std::vector<double>* real_part_output,
std::vector<double>* imag_part_output) const;

/**
* @param[in,out] real_part @f$L@f$-length real part.
* @param[in,out] imag_part @f$L@f$-length imaginary part.
* @return True on success, false on failure.
*/
bool Run(std::vector<double>* real_part,
std::vector<double>* imag_part) const;

private:
const int dft_length_;

bool is_valid_;

std::vector<double> sine_table_;
std::vector<double> cosine_table_;

DISALLOW_COPY_AND_ASSIGN(InverseDiscreteFourierTransform);
};

} // namespace sptk

#endif // SPTK_MATH_INVERSE_DISCRETE_FOURIER_TRANSFORM_H_
2 changes: 1 addition & 1 deletion include/SPTK/math/inverse_fast_fourier_transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
namespace sptk {

/**
* Calculate inverse DFT of complex-valued input data.
* Calculate inverse FFT of complex-valued input data.
*
* The inputs are @f$M@f$-th order complex-valued data:
* @f[
Expand Down
Loading

0 comments on commit a7ad25f

Please sign in to comment.