Merge pull request #55 from sp-nitech/plp

Add plp
sp-nitech · Jan 9, 2024 · a7ad25f · a7ad25f
2 parents e0dcbae + 3ce1318
commit a7ad25f
Show file tree

Hide file tree

Showing 21 changed files with 1,313 additions and 19 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -80,6 +80,7 @@ set(CC_SOURCES
   ${SOURCE_DIR}/analysis/mel_filter_bank_analysis.cc
   ${SOURCE_DIR}/analysis/mel_frequency_cepstral_coefficients_analysis.cc
   ${SOURCE_DIR}/analysis/mel_generalized_cepstral_analysis.cc
+  ${SOURCE_DIR}/analysis/perceptual_linear_predictive_coefficients_analysis.cc
   ${SOURCE_DIR}/analysis/pitch_extraction.cc
   ${SOURCE_DIR}/analysis/pitch_extraction_by_dio.cc
   ${SOURCE_DIR}/analysis/pitch_extraction_by_harvest.cc
@@ -177,7 +178,9 @@ set(CC_SOURCES
   ${SOURCE_DIR}/math/gaussian_mixture_modeling.cc
   ${SOURCE_DIR}/math/histogram_calculation.cc
   ${SOURCE_DIR}/math/inverse_discrete_cosine_transform.cc
+  ${SOURCE_DIR}/math/inverse_discrete_fourier_transform.cc
   ${SOURCE_DIR}/math/inverse_fast_fourier_transform.cc
+  ${SOURCE_DIR}/math/inverse_fourier_transform.cc
   ${SOURCE_DIR}/math/levinson_durbin_recursion.cc
   ${SOURCE_DIR}/math/matrix.cc
   ${SOURCE_DIR}/math/matrix2d.cc
@@ -352,6 +355,7 @@ set(MAIN_SOURCES
   ${SOURCE_DIR}/main/pca.cc
   ${SOURCE_DIR}/main/pcas.cc
   ${SOURCE_DIR}/main/phase.cc
+  ${SOURCE_DIR}/main/plp.cc
   ${SOURCE_DIR}/main/pitch.cc
   ${SOURCE_DIR}/main/pitch2sin.cc
   ${SOURCE_DIR}/main/pitch_mark.cc

diff --git a/README.md b/README.md
@@ -136,6 +136,7 @@ Changes from SPTK3
   - Nonrecursive MLPG (`mlpg -R 1`)
   - Pitch adaptive spectrum estimation (`pitch_spec`)
   - Pitch extraction by DIO used in WORLD (`pitch -a 3`)
+  - PLP extraction (`plp`)
   - Pole-zero plot (`gpolezero`)
   - Scalar quantization (`quantize` and `dequantize`)
   - Sinusoidal generation from pitch (`pitch2sin`)

diff --git a/doc/main/mfcc.rst b/doc/main/mfcc.rst
@@ -5,7 +5,7 @@ mfcc
 
 .. doxygenfile:: mfcc.cc
 
-.. seealso:: :ref:`fbank`
+.. seealso:: :ref:`fbank`  :ref:`plp`
 
 .. doxygenclass:: sptk::MelFrequencyCepstralCoefficientsAnalysis
    :members:
diff --git a/doc/main/plp.rst b/doc/main/plp.rst
@@ -0,0 +1,11 @@
+.. _plp:
+
+plp
+===
+
+.. doxygenfile:: plp.cc
+
+.. seealso:: :ref:`fbank`  :ref:`mfcc`
+
+.. doxygenclass:: sptk::PerceptualLinearPredictiveCoefficientsAnalysis
+   :members:
diff --git a/include/SPTK/analysis/mel_filter_bank_analysis.h b/include/SPTK/analysis/mel_filter_bank_analysis.h
@@ -104,6 +104,11 @@ class MelFilterBankAnalysis {
     return is_valid_;
   }
 
+  /**
+   * @return Center frequencies in Hz.
+   */
+  bool GetCenterFrequencies(std::vector<double>* center_frequencies) const;
+
   /**
    * @param[in] power_spectrum @f$(N/2+1)@f$-length power spectrum.
    * @param[out] filter_bank_output @f$C@f$-channel filter-bank outputs.
@@ -123,6 +128,7 @@ class MelFilterBankAnalysis {
 
   int lower_bin_index_;
   int upper_bin_index_;
+  std::vector<double> center_frequencies_;
   std::vector<int> channel_indices_;
   std::vector<double> channel_weights_;
 

diff --git a/include/SPTK/analysis/perceptual_linear_predictive_coefficients_analysis.h b/include/SPTK/analysis/perceptual_linear_predictive_coefficients_analysis.h
@@ -0,0 +1,173 @@
+// ------------------------------------------------------------------------ //
+// Copyright 2021 SPTK Working Group                                        //
+//                                                                          //
+// Licensed under the Apache License, Version 2.0 (the "License");          //
+// you may not use this file except in compliance with the License.         //
+// You may obtain a copy of the License at                                  //
+//                                                                          //
+//     http://www.apache.org/licenses/LICENSE-2.0                           //
+//                                                                          //
+// Unless required by applicable law or agreed to in writing, software      //
+// distributed under the License is distributed on an "AS IS" BASIS,        //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and      //
+// limitations under the License.                                           //
+// ------------------------------------------------------------------------ //
+
+#ifndef SPTK_ANALYSIS_PERCEPTUAL_LINEAR_PREDICTIVE_COEFFICIENTS_ANALYSIS_H_
+#define SPTK_ANALYSIS_PERCEPTUAL_LINEAR_PREDICTIVE_COEFFICIENTS_ANALYSIS_H_
+
+#include <vector>  // std::vector
+
+#include "SPTK/analysis/mel_filter_bank_analysis.h"
+#include "SPTK/conversion/linear_predictive_coefficients_to_cepstrum.h"
+#include "SPTK/math/inverse_fourier_transform.h"
+#include "SPTK/math/levinson_durbin_recursion.h"
+#include "SPTK/utils/sptk_utils.h"
+
+namespace sptk {
+
+/**
+ * Perform perceptual linear predictive (PLP) coefficients analysis.
+ *
+ * The input is the half part of power spectrum:
+ * @f[
+ *   \begin{array}{cccc}
+ *     |X(0)|^2, & |X(1)|^2, & \ldots, & |X(N/2)|^2,
+ *   \end{array}
+ * @f]
+ * where @f$N@f$ is the FFT length. The outputs are the @f$M@f$-th order PLP
+ * features with the zeroth cepstral parameter:
+ * @f[
+ *   \begin{array}{ccccc}
+ *     c(0), & \bar{c}(1), & \bar{c}(2), & \ldots, & \bar{c}(M)
+ *   \end{array}
+ * @f]
+ * and the log-signal energy @f$E@f$.
+ *
+ * [1] S. Young et al., &quot;The HTK book,&quot; Cambridge University
+ *     Engineering Department, 2006.
+ */
+class PerceptualLinearPredictiveCoefficientsAnalysis {
+ public:
+  /**
+   * Buffer for PerceptualLinearPredictiveCoefficientsAnalysis class.
+   */
+  class Buffer {
+   public:
+    Buffer() {
+    }
+
+    virtual ~Buffer() {
+    }
+
+   private:
+    std::vector<double> filter_bank_output_;
+    std::vector<double> spectrum_;
+    std::vector<double> cepstrum_;
+
+    std::vector<double> real_part_input_;
+    std::vector<double> real_part_output_;
+    std::vector<double> imag_part_input_;
+    std::vector<double> imag_part_output_;
+
+    LevinsonDurbinRecursion::Buffer buffer_for_levinson_durbin_recursion_;
+
+    friend class PerceptualLinearPredictiveCoefficientsAnalysis;
+    DISALLOW_COPY_AND_ASSIGN(Buffer);
+  };
+
+  /**
+   * @param[in] fft_length Number of FFT bins, @f$N@f$.
+   * @param[in] num_channel Number of channels, @f$C@f$.
+   * @param[in] num_order Order of cepstral coefficients, @f$M@f$.
+   * @param[in] liftering_coefficient A parameter of liftering, @f$L@f$.
+   * @param[in] compression_factor Amplitude compression factor.
+   * @param[in] sampling_rate Sampling rate in Hz.
+   * @param[in] lowest_frequency Lowest frequency in Hz.
+   * @param[in] highest_frequency Highest frequency in Hz.
+   * @param[in] floor Floor value of raw filter-bank output.
+   */
+  PerceptualLinearPredictiveCoefficientsAnalysis(
+      int fft_length, int num_channel, int num_order, int liftering_coefficient,
+      double compression_factor, double sampling_rate, double lowest_frequency,
+      double highest_frequency, double floor);
+
+  virtual ~PerceptualLinearPredictiveCoefficientsAnalysis() {
+  }
+
+  /**
+   * @return FFT size.
+   */
+  int GetFftLength() const {
+    return mel_filter_bank_analysis_.GetFftLength();
+  }
+
+  /**
+   * @return Number of channels.
+   */
+  int GetNumChannel() const {
+    return mel_filter_bank_analysis_.GetNumChannel();
+  }
+
+  /**
+   * @return Order of cepstral coefficients.
+   */
+  int GetNumOrder() const {
+    return levinson_durbin_recursion_.GetNumOrder();
+  }
+
+  /**
+   * @return Liftering coefficient.
+   */
+  int GetLifteringCoefficient() const {
+    return liftering_coefficient_;
+  }
+
+  /**
+   * @return Compression factor.
+   */
+  double GetCompressionFactor() const {
+    return compression_factor_;
+  }
+
+  /**
+   * @return True if this object is valid.
+   */
+  bool IsValid() const {
+    return is_valid_;
+  }
+
+  /**
+   * @param[in] power_spectrum @f$(N/2+1)@f$-length power spectrum.
+   * @param[out] plp @f$M@f$-th order PLP features.
+   * @param[out] energy Signal energy @f$E@f$ (optional).
+   * @param[out] buffer Buffer.
+   * @return True on success, false on failure.
+   */
+  bool Run(
+      const std::vector<double>& power_spectrum, std::vector<double>* plp,
+      double* energy,
+      PerceptualLinearPredictiveCoefficientsAnalysis::Buffer* buffer) const;
+
+ private:
+  const int liftering_coefficient_;
+  const double compression_factor_;
+
+  const MelFilterBankAnalysis mel_filter_bank_analysis_;
+  const InverseFourierTransform inverse_fourier_transform_;
+  const LevinsonDurbinRecursion levinson_durbin_recursion_;
+  const LinearPredictiveCoefficientsToCepstrum
+      linear_predictive_coefficients_to_cepstrum_;
+
+  bool is_valid_;
+
+  std::vector<double> equal_loudness_curve_;
+  std::vector<double> cepstal_weights_;
+
+  DISALLOW_COPY_AND_ASSIGN(PerceptualLinearPredictiveCoefficientsAnalysis);
+};
+
+}  // namespace sptk
+
+#endif  // SPTK_ANALYSIS_PERCEPTUAL_LINEAR_PREDICTIVE_COEFFICIENTS_ANALYSIS_H_
diff --git a/include/SPTK/math/discrete_fourier_transform.h b/include/SPTK/math/discrete_fourier_transform.h
@@ -94,6 +94,9 @@ class DiscreteFourierTransform {
 
   bool is_valid_;
 
+  std::vector<double> sine_table_;
+  std::vector<double> cosine_table_;
+
   DISALLOW_COPY_AND_ASSIGN(DiscreteFourierTransform);
 };
 

diff --git a/include/SPTK/math/fast_fourier_transform.h b/include/SPTK/math/fast_fourier_transform.h
@@ -24,7 +24,7 @@
 namespace sptk {
 
 /**
- * Calculate DFT of complex-valued input data.
+ * Calculate FFT of complex-valued input data.
  *
  * The inputs are @f$M@f$-th order complex-valued data:
  * @f[

diff --git a/include/SPTK/math/fourier_transform.h b/include/SPTK/math/fourier_transform.h
@@ -106,8 +106,8 @@ class FourierTransform {
   }
 
   /**
-   * @param[in,out] real_part Real part.
-   * @param[in,out] imag_part Imaginary part.
+   * @param[in,out] real_part @f$L@f$-length real part.
+   * @param[in,out] imag_part @f$L@f$-length imaginary part.
    * @return True on success, false on failure.
    */
   bool Run(std::vector<double>* real_part,

diff --git a/include/SPTK/math/inverse_discrete_fourier_transform.h b/include/SPTK/math/inverse_discrete_fourier_transform.h
@@ -0,0 +1,106 @@
+// ------------------------------------------------------------------------ //
+// Copyright 2021 SPTK Working Group                                        //
+//                                                                          //
+// Licensed under the Apache License, Version 2.0 (the "License");          //
+// you may not use this file except in compliance with the License.         //
+// You may obtain a copy of the License at                                  //
+//                                                                          //
+//     http://www.apache.org/licenses/LICENSE-2.0                           //
+//                                                                          //
+// Unless required by applicable law or agreed to in writing, software      //
+// distributed under the License is distributed on an "AS IS" BASIS,        //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and      //
+// limitations under the License.                                           //
+// ------------------------------------------------------------------------ //
+
+#ifndef SPTK_MATH_INVERSE_DISCRETE_FOURIER_TRANSFORM_H_
+#define SPTK_MATH_INVERSE_DISCRETE_FOURIER_TRANSFORM_H_
+
+#include <vector>  // std::vector
+
+#include "SPTK/math/discrete_fourier_transform.h"
+#include "SPTK/utils/sptk_utils.h"
+
+namespace sptk {
+
+/**
+ * Calculate inverse DFT of complex-valued input data.
+ *
+ * The inputs are @f$L@f$-length complex-valued data:
+ * @f[
+ *   \begin{array}{cccc}
+ *   \mathrm{Re}(X(0)), & \mathrm{Re}(X(1)), & \ldots, & \mathrm{Re}(X(L-1)), \\
+ *   \mathrm{Im}(X(0)), & \mathrm{Im}(X(1)), & \ldots, & \mathrm{Im}(X(L-1)).
+ *   \end{array}
+ * @f]
+ * The outputs are
+ * @f[
+ *   \begin{array}{cccc}
+ *   \mathrm{Re}(x(0)), & \mathrm{Re}(x(1)), & \ldots, & \mathrm{Re}(x(L-1)), \\
+ *   \mathrm{Im}(x(0)), & \mathrm{Im}(x(1)), & \ldots, & \mathrm{Im}(x(L-1)).
+ *   \end{array}
+ * @f]
+ * They are computed as
+ * @f[
+ *   x(n) = \frac{1}{L} \sum_{n=0}^{L-1} X(k) e^{j2\pi nk / L}.
+ * @f]
+ */
+class InverseDiscreteFourierTransform {
+ public:
+  /**
+   * @param[in] dft_length DFT length, @f$L@f$.
+   */
+  explicit InverseDiscreteFourierTransform(int dft_length);
+
+  virtual ~InverseDiscreteFourierTransform() {
+  }
+
+  /**
+   * @return DFT length.
+   */
+  int GetDftLength() const {
+    return dft_length_;
+  }
+
+  /**
+   * @return True if this object is valid.
+   */
+  bool IsValid() const {
+    return is_valid_;
+  }
+
+  /**
+   * @param[in] real_part_input @f$L@f$-length real part of input.
+   * @param[in] imag_part_input @f$L@f$-length imaginary part of input.
+   * @param[out] real_part_output @f$L@f$-length real part of output.
+   * @param[out] imag_part_output @f$L@f$-length iaginary part of output.
+   * @return True on success, false on failure.
+   */
+  bool Run(const std::vector<double>& real_part_input,
+           const std::vector<double>& imag_part_input,
+           std::vector<double>* real_part_output,
+           std::vector<double>* imag_part_output) const;
+
+  /**
+   * @param[in,out] real_part @f$L@f$-length real part.
+   * @param[in,out] imag_part @f$L@f$-length imaginary part.
+   * @return True on success, false on failure.
+   */
+  bool Run(std::vector<double>* real_part,
+           std::vector<double>* imag_part) const;
+
+ private:
+  const int dft_length_;
+
+  bool is_valid_;
+
+  std::vector<double> sine_table_;
+  std::vector<double> cosine_table_;
+
+  DISALLOW_COPY_AND_ASSIGN(InverseDiscreteFourierTransform);
+};
+
+}  // namespace sptk
+
+#endif  // SPTK_MATH_INVERSE_DISCRETE_FOURIER_TRANSFORM_H_
diff --git a/include/SPTK/math/inverse_fast_fourier_transform.h b/include/SPTK/math/inverse_fast_fourier_transform.h
@@ -25,7 +25,7 @@
 namespace sptk {
 
 /**
- * Calculate inverse DFT of complex-valued input data.
+ * Calculate inverse FFT of complex-valued input data.
  *
  * The inputs are @f$M@f$-th order complex-valued data:
  * @f[