diff --git a/CMakeLists.txt b/CMakeLists.txt index 8cbd789..b5806a8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,6 +80,7 @@ set(CC_SOURCES ${SOURCE_DIR}/analysis/mel_filter_bank_analysis.cc ${SOURCE_DIR}/analysis/mel_frequency_cepstral_coefficients_analysis.cc ${SOURCE_DIR}/analysis/mel_generalized_cepstral_analysis.cc + ${SOURCE_DIR}/analysis/perceptual_linear_predictive_coefficients_analysis.cc ${SOURCE_DIR}/analysis/pitch_extraction.cc ${SOURCE_DIR}/analysis/pitch_extraction_by_dio.cc ${SOURCE_DIR}/analysis/pitch_extraction_by_harvest.cc @@ -177,7 +178,9 @@ set(CC_SOURCES ${SOURCE_DIR}/math/gaussian_mixture_modeling.cc ${SOURCE_DIR}/math/histogram_calculation.cc ${SOURCE_DIR}/math/inverse_discrete_cosine_transform.cc + ${SOURCE_DIR}/math/inverse_discrete_fourier_transform.cc ${SOURCE_DIR}/math/inverse_fast_fourier_transform.cc + ${SOURCE_DIR}/math/inverse_fourier_transform.cc ${SOURCE_DIR}/math/levinson_durbin_recursion.cc ${SOURCE_DIR}/math/matrix.cc ${SOURCE_DIR}/math/matrix2d.cc @@ -352,6 +355,7 @@ set(MAIN_SOURCES ${SOURCE_DIR}/main/pca.cc ${SOURCE_DIR}/main/pcas.cc ${SOURCE_DIR}/main/phase.cc + ${SOURCE_DIR}/main/plp.cc ${SOURCE_DIR}/main/pitch.cc ${SOURCE_DIR}/main/pitch2sin.cc ${SOURCE_DIR}/main/pitch_mark.cc diff --git a/README.md b/README.md index f7fb050..0cfa0fd 100644 --- a/README.md +++ b/README.md @@ -136,6 +136,7 @@ Changes from SPTK3 - Nonrecursive MLPG (`mlpg -R 1`) - Pitch adaptive spectrum estimation (`pitch_spec`) - Pitch extraction by DIO used in WORLD (`pitch -a 3`) + - PLP extraction (`plp`) - Pole-zero plot (`gpolezero`) - Scalar quantization (`quantize` and `dequantize`) - Sinusoidal generation from pitch (`pitch2sin`) diff --git a/doc/main/mfcc.rst b/doc/main/mfcc.rst index ecabd72..ca93803 100644 --- a/doc/main/mfcc.rst +++ b/doc/main/mfcc.rst @@ -5,7 +5,7 @@ mfcc .. doxygenfile:: mfcc.cc -.. seealso:: :ref:`fbank` +.. seealso:: :ref:`fbank` :ref:`plp` .. doxygenclass:: sptk::MelFrequencyCepstralCoefficientsAnalysis :members: diff --git a/doc/main/plp.rst b/doc/main/plp.rst new file mode 100644 index 0000000..f1ea681 --- /dev/null +++ b/doc/main/plp.rst @@ -0,0 +1,11 @@ +.. _plp: + +plp +=== + +.. doxygenfile:: plp.cc + +.. seealso:: :ref:`fbank` :ref:`mfcc` + +.. doxygenclass:: sptk::PerceptualLinearPredictiveCoefficientsAnalysis + :members: diff --git a/include/SPTK/analysis/mel_filter_bank_analysis.h b/include/SPTK/analysis/mel_filter_bank_analysis.h index ceaf08c..bd36852 100644 --- a/include/SPTK/analysis/mel_filter_bank_analysis.h +++ b/include/SPTK/analysis/mel_filter_bank_analysis.h @@ -104,6 +104,11 @@ class MelFilterBankAnalysis { return is_valid_; } + /** + * @return Center frequencies in Hz. + */ + bool GetCenterFrequencies(std::vector* center_frequencies) const; + /** * @param[in] power_spectrum @f$(N/2+1)@f$-length power spectrum. * @param[out] filter_bank_output @f$C@f$-channel filter-bank outputs. @@ -123,6 +128,7 @@ class MelFilterBankAnalysis { int lower_bin_index_; int upper_bin_index_; + std::vector center_frequencies_; std::vector channel_indices_; std::vector channel_weights_; diff --git a/include/SPTK/analysis/perceptual_linear_predictive_coefficients_analysis.h b/include/SPTK/analysis/perceptual_linear_predictive_coefficients_analysis.h new file mode 100644 index 0000000..5026e5e --- /dev/null +++ b/include/SPTK/analysis/perceptual_linear_predictive_coefficients_analysis.h @@ -0,0 +1,173 @@ +// ------------------------------------------------------------------------ // +// Copyright 2021 SPTK Working Group // +// // +// Licensed under the Apache License, Version 2.0 (the "License"); // +// you may not use this file except in compliance with the License. // +// You may obtain a copy of the License at // +// // +// http://www.apache.org/licenses/LICENSE-2.0 // +// // +// Unless required by applicable law or agreed to in writing, software // +// distributed under the License is distributed on an "AS IS" BASIS, // +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // +// See the License for the specific language governing permissions and // +// limitations under the License. // +// ------------------------------------------------------------------------ // + +#ifndef SPTK_ANALYSIS_PERCEPTUAL_LINEAR_PREDICTIVE_COEFFICIENTS_ANALYSIS_H_ +#define SPTK_ANALYSIS_PERCEPTUAL_LINEAR_PREDICTIVE_COEFFICIENTS_ANALYSIS_H_ + +#include // std::vector + +#include "SPTK/analysis/mel_filter_bank_analysis.h" +#include "SPTK/conversion/linear_predictive_coefficients_to_cepstrum.h" +#include "SPTK/math/inverse_fourier_transform.h" +#include "SPTK/math/levinson_durbin_recursion.h" +#include "SPTK/utils/sptk_utils.h" + +namespace sptk { + +/** + * Perform perceptual linear predictive (PLP) coefficients analysis. + * + * The input is the half part of power spectrum: + * @f[ + * \begin{array}{cccc} + * |X(0)|^2, & |X(1)|^2, & \ldots, & |X(N/2)|^2, + * \end{array} + * @f] + * where @f$N@f$ is the FFT length. The outputs are the @f$M@f$-th order PLP + * features with the zeroth cepstral parameter: + * @f[ + * \begin{array}{ccccc} + * c(0), & \bar{c}(1), & \bar{c}(2), & \ldots, & \bar{c}(M) + * \end{array} + * @f] + * and the log-signal energy @f$E@f$. + * + * [1] S. Young et al., "The HTK book," Cambridge University + * Engineering Department, 2006. + */ +class PerceptualLinearPredictiveCoefficientsAnalysis { + public: + /** + * Buffer for PerceptualLinearPredictiveCoefficientsAnalysis class. + */ + class Buffer { + public: + Buffer() { + } + + virtual ~Buffer() { + } + + private: + std::vector filter_bank_output_; + std::vector spectrum_; + std::vector cepstrum_; + + std::vector real_part_input_; + std::vector real_part_output_; + std::vector imag_part_input_; + std::vector imag_part_output_; + + LevinsonDurbinRecursion::Buffer buffer_for_levinson_durbin_recursion_; + + friend class PerceptualLinearPredictiveCoefficientsAnalysis; + DISALLOW_COPY_AND_ASSIGN(Buffer); + }; + + /** + * @param[in] fft_length Number of FFT bins, @f$N@f$. + * @param[in] num_channel Number of channels, @f$C@f$. + * @param[in] num_order Order of cepstral coefficients, @f$M@f$. + * @param[in] liftering_coefficient A parameter of liftering, @f$L@f$. + * @param[in] compression_factor Amplitude compression factor. + * @param[in] sampling_rate Sampling rate in Hz. + * @param[in] lowest_frequency Lowest frequency in Hz. + * @param[in] highest_frequency Highest frequency in Hz. + * @param[in] floor Floor value of raw filter-bank output. + */ + PerceptualLinearPredictiveCoefficientsAnalysis( + int fft_length, int num_channel, int num_order, int liftering_coefficient, + double compression_factor, double sampling_rate, double lowest_frequency, + double highest_frequency, double floor); + + virtual ~PerceptualLinearPredictiveCoefficientsAnalysis() { + } + + /** + * @return FFT size. + */ + int GetFftLength() const { + return mel_filter_bank_analysis_.GetFftLength(); + } + + /** + * @return Number of channels. + */ + int GetNumChannel() const { + return mel_filter_bank_analysis_.GetNumChannel(); + } + + /** + * @return Order of cepstral coefficients. + */ + int GetNumOrder() const { + return levinson_durbin_recursion_.GetNumOrder(); + } + + /** + * @return Liftering coefficient. + */ + int GetLifteringCoefficient() const { + return liftering_coefficient_; + } + + /** + * @return Compression factor. + */ + double GetCompressionFactor() const { + return compression_factor_; + } + + /** + * @return True if this object is valid. + */ + bool IsValid() const { + return is_valid_; + } + + /** + * @param[in] power_spectrum @f$(N/2+1)@f$-length power spectrum. + * @param[out] plp @f$M@f$-th order PLP features. + * @param[out] energy Signal energy @f$E@f$ (optional). + * @param[out] buffer Buffer. + * @return True on success, false on failure. + */ + bool Run( + const std::vector& power_spectrum, std::vector* plp, + double* energy, + PerceptualLinearPredictiveCoefficientsAnalysis::Buffer* buffer) const; + + private: + const int liftering_coefficient_; + const double compression_factor_; + + const MelFilterBankAnalysis mel_filter_bank_analysis_; + const InverseFourierTransform inverse_fourier_transform_; + const LevinsonDurbinRecursion levinson_durbin_recursion_; + const LinearPredictiveCoefficientsToCepstrum + linear_predictive_coefficients_to_cepstrum_; + + bool is_valid_; + + std::vector equal_loudness_curve_; + std::vector cepstal_weights_; + + DISALLOW_COPY_AND_ASSIGN(PerceptualLinearPredictiveCoefficientsAnalysis); +}; + +} // namespace sptk + +#endif // SPTK_ANALYSIS_PERCEPTUAL_LINEAR_PREDICTIVE_COEFFICIENTS_ANALYSIS_H_ diff --git a/include/SPTK/math/discrete_fourier_transform.h b/include/SPTK/math/discrete_fourier_transform.h index bc61373..a61b221 100644 --- a/include/SPTK/math/discrete_fourier_transform.h +++ b/include/SPTK/math/discrete_fourier_transform.h @@ -94,6 +94,9 @@ class DiscreteFourierTransform { bool is_valid_; + std::vector sine_table_; + std::vector cosine_table_; + DISALLOW_COPY_AND_ASSIGN(DiscreteFourierTransform); }; diff --git a/include/SPTK/math/fast_fourier_transform.h b/include/SPTK/math/fast_fourier_transform.h index 30205ed..5ff8498 100644 --- a/include/SPTK/math/fast_fourier_transform.h +++ b/include/SPTK/math/fast_fourier_transform.h @@ -24,7 +24,7 @@ namespace sptk { /** - * Calculate DFT of complex-valued input data. + * Calculate FFT of complex-valued input data. * * The inputs are @f$M@f$-th order complex-valued data: * @f[ diff --git a/include/SPTK/math/fourier_transform.h b/include/SPTK/math/fourier_transform.h index aa23de1..450a923 100644 --- a/include/SPTK/math/fourier_transform.h +++ b/include/SPTK/math/fourier_transform.h @@ -106,8 +106,8 @@ class FourierTransform { } /** - * @param[in,out] real_part Real part. - * @param[in,out] imag_part Imaginary part. + * @param[in,out] real_part @f$L@f$-length real part. + * @param[in,out] imag_part @f$L@f$-length imaginary part. * @return True on success, false on failure. */ bool Run(std::vector* real_part, diff --git a/include/SPTK/math/inverse_discrete_fourier_transform.h b/include/SPTK/math/inverse_discrete_fourier_transform.h new file mode 100644 index 0000000..d213fc7 --- /dev/null +++ b/include/SPTK/math/inverse_discrete_fourier_transform.h @@ -0,0 +1,106 @@ +// ------------------------------------------------------------------------ // +// Copyright 2021 SPTK Working Group // +// // +// Licensed under the Apache License, Version 2.0 (the "License"); // +// you may not use this file except in compliance with the License. // +// You may obtain a copy of the License at // +// // +// http://www.apache.org/licenses/LICENSE-2.0 // +// // +// Unless required by applicable law or agreed to in writing, software // +// distributed under the License is distributed on an "AS IS" BASIS, // +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // +// See the License for the specific language governing permissions and // +// limitations under the License. // +// ------------------------------------------------------------------------ // + +#ifndef SPTK_MATH_INVERSE_DISCRETE_FOURIER_TRANSFORM_H_ +#define SPTK_MATH_INVERSE_DISCRETE_FOURIER_TRANSFORM_H_ + +#include // std::vector + +#include "SPTK/math/discrete_fourier_transform.h" +#include "SPTK/utils/sptk_utils.h" + +namespace sptk { + +/** + * Calculate inverse DFT of complex-valued input data. + * + * The inputs are @f$L@f$-length complex-valued data: + * @f[ + * \begin{array}{cccc} + * \mathrm{Re}(X(0)), & \mathrm{Re}(X(1)), & \ldots, & \mathrm{Re}(X(L-1)), \\ + * \mathrm{Im}(X(0)), & \mathrm{Im}(X(1)), & \ldots, & \mathrm{Im}(X(L-1)). + * \end{array} + * @f] + * The outputs are + * @f[ + * \begin{array}{cccc} + * \mathrm{Re}(x(0)), & \mathrm{Re}(x(1)), & \ldots, & \mathrm{Re}(x(L-1)), \\ + * \mathrm{Im}(x(0)), & \mathrm{Im}(x(1)), & \ldots, & \mathrm{Im}(x(L-1)). + * \end{array} + * @f] + * They are computed as + * @f[ + * x(n) = \frac{1}{L} \sum_{n=0}^{L-1} X(k) e^{j2\pi nk / L}. + * @f] + */ +class InverseDiscreteFourierTransform { + public: + /** + * @param[in] dft_length DFT length, @f$L@f$. + */ + explicit InverseDiscreteFourierTransform(int dft_length); + + virtual ~InverseDiscreteFourierTransform() { + } + + /** + * @return DFT length. + */ + int GetDftLength() const { + return dft_length_; + } + + /** + * @return True if this object is valid. + */ + bool IsValid() const { + return is_valid_; + } + + /** + * @param[in] real_part_input @f$L@f$-length real part of input. + * @param[in] imag_part_input @f$L@f$-length imaginary part of input. + * @param[out] real_part_output @f$L@f$-length real part of output. + * @param[out] imag_part_output @f$L@f$-length iaginary part of output. + * @return True on success, false on failure. + */ + bool Run(const std::vector& real_part_input, + const std::vector& imag_part_input, + std::vector* real_part_output, + std::vector* imag_part_output) const; + + /** + * @param[in,out] real_part @f$L@f$-length real part. + * @param[in,out] imag_part @f$L@f$-length imaginary part. + * @return True on success, false on failure. + */ + bool Run(std::vector* real_part, + std::vector* imag_part) const; + + private: + const int dft_length_; + + bool is_valid_; + + std::vector sine_table_; + std::vector cosine_table_; + + DISALLOW_COPY_AND_ASSIGN(InverseDiscreteFourierTransform); +}; + +} // namespace sptk + +#endif // SPTK_MATH_INVERSE_DISCRETE_FOURIER_TRANSFORM_H_ diff --git a/include/SPTK/math/inverse_fast_fourier_transform.h b/include/SPTK/math/inverse_fast_fourier_transform.h index 95bcba9..44394cf 100644 --- a/include/SPTK/math/inverse_fast_fourier_transform.h +++ b/include/SPTK/math/inverse_fast_fourier_transform.h @@ -25,7 +25,7 @@ namespace sptk { /** - * Calculate inverse DFT of complex-valued input data. + * Calculate inverse FFT of complex-valued input data. * * The inputs are @f$M@f$-th order complex-valued data: * @f[ diff --git a/include/SPTK/math/inverse_fourier_transform.h b/include/SPTK/math/inverse_fourier_transform.h new file mode 100644 index 0000000..772624d --- /dev/null +++ b/include/SPTK/math/inverse_fourier_transform.h @@ -0,0 +1,88 @@ +// ------------------------------------------------------------------------ // +// Copyright 2021 SPTK Working Group // +// // +// Licensed under the Apache License, Version 2.0 (the "License"); // +// you may not use this file except in compliance with the License. // +// You may obtain a copy of the License at // +// // +// http://www.apache.org/licenses/LICENSE-2.0 // +// // +// Unless required by applicable law or agreed to in writing, software // +// distributed under the License is distributed on an "AS IS" BASIS, // +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // +// See the License for the specific language governing permissions and // +// limitations under the License. // +// ------------------------------------------------------------------------ // + +#ifndef SPTK_MATH_INVERSE_FOURIER_TRANSFORM_H_ +#define SPTK_MATH_INVERSE_FOURIER_TRANSFORM_H_ + +#include // std::vector + +#include "SPTK/math/fourier_transform.h" +#include "SPTK/utils/sptk_utils.h" + +namespace sptk { + +/** + * Inverse Fourier transform wrapper. + */ +class InverseFourierTransform { + public: + /** + * @param[in] length DFT length, @f$L@f$. + */ + explicit InverseFourierTransform(int length); + + ~InverseFourierTransform() { + delete inverse_fourier_transform_; + } + + /** + * @return DFT length. + */ + int GetLength() const { + return inverse_fourier_transform_->GetLength(); + } + + /** + * @return True if this object is valid. + */ + bool IsValid() const { + return inverse_fourier_transform_->IsValid(); + } + + /** + * @param[in] real_part_input @f$L@f$-length real part of input. + * @param[in] imag_part_input @f$L@f$-length imaginary part of input. + * @param[out] real_part_output @f$L@f$-length real part of output. + * @param[out] imag_part_output @f$L@f$-length imaginary part of output. + * @return True on success, false on failure. + */ + bool Run(const std::vector& real_part_input, + const std::vector& imag_part_input, + std::vector* real_part_output, + std::vector* imag_part_output) const { + return inverse_fourier_transform_->Run(real_part_input, imag_part_input, + real_part_output, imag_part_output); + } + + /** + * @param[in,out] real_part @f$L@f$-length real part. + * @param[in,out] imag_part @f$L@f$-length imaginary part. + * @return True on success, false on failure. + */ + bool Run(std::vector* real_part, + std::vector* imag_part) const { + return inverse_fourier_transform_->Run(real_part, imag_part); + } + + private: + FourierTransform::FourierTransformInterface* inverse_fourier_transform_; + + DISALLOW_COPY_AND_ASSIGN(InverseFourierTransform); +}; + +} // namespace sptk + +#endif // SPTK_MATH_INVERSE_FOURIER_TRANSFORM_H_ diff --git a/src/analysis/mel_filter_bank_analysis.cc b/src/analysis/mel_filter_bank_analysis.cc index 9032d5a..5d6ab15 100644 --- a/src/analysis/mel_filter_bank_analysis.cc +++ b/src/analysis/mel_filter_bank_analysis.cc @@ -17,7 +17,7 @@ #include "SPTK/analysis/mel_filter_bank_analysis.h" #include // std::fill, std::max, std::min -#include // std::log, std::sqrt +#include // std::exp, std::log, std::sqrt #include // std::size_t #include // std::accumulate @@ -25,8 +25,11 @@ namespace { // Note that HTK use 1127 instead of 1127.01048. double HzToMel(double hz) { - // return 1127.01048 * std::log(hz / 700.0 + 1.0); - return 1127 * std::log(hz / 700.0 + 1.0); + return 1127.0 * std::log(hz / 700.0 + 1.0); +} + +double MelToHz(double mel) { + return 700.0 * (std::exp(mel / 1127.0) - 1.0); } double SampleMel(int index, int fft_length, double sampling_rate) { @@ -74,8 +77,8 @@ MelFilterBankAnalysis::MelFilterBankAnalysis(int fft_length, int num_channel, const double mel_high(HzToMel(highest_frequency)); // Create vector of filter-bank center frequencies. - std::vector center_frequencies(num_channel_ + 1); - double* cf(&(center_frequencies[0])); + center_frequencies_.resize(num_channel_ + 1); + double* cf(&(center_frequencies_[0])); { const double diff(mel_high - mel_low); for (int m(0); m <= num_channel_; ++m) { @@ -108,6 +111,24 @@ MelFilterBankAnalysis::MelFilterBankAnalysis(int fft_length, int num_channel, } } +bool MelFilterBankAnalysis::GetCenterFrequencies( + std::vector* center_frequencies) const { + if (!is_valid_ || NULL == center_frequencies) { + return false; + } + + if (center_frequencies->size() != + static_cast(num_channel_ + 1)) { + center_frequencies->resize(num_channel_ + 1); + } + + for (int m(0); m <= num_channel_; ++m) { + (*center_frequencies)[m] = MelToHz(center_frequencies_[m]); + } + + return true; +} + bool MelFilterBankAnalysis::Run(const std::vector& power_spectrum, std::vector* filter_bank_output, double* energy) const { diff --git a/src/analysis/perceptual_linear_predictive_coefficients_analysis.cc b/src/analysis/perceptual_linear_predictive_coefficients_analysis.cc new file mode 100644 index 0000000..1b70444 --- /dev/null +++ b/src/analysis/perceptual_linear_predictive_coefficients_analysis.cc @@ -0,0 +1,150 @@ +// ------------------------------------------------------------------------ // +// Copyright 2021 SPTK Working Group // +// // +// Licensed under the Apache License, Version 2.0 (the "License"); // +// you may not use this file except in compliance with the License. // +// You may obtain a copy of the License at // +// // +// http://www.apache.org/licenses/LICENSE-2.0 // +// // +// Unless required by applicable law or agreed to in writing, software // +// distributed under the License is distributed on an "AS IS" BASIS, // +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // +// See the License for the specific language governing permissions and // +// limitations under the License. // +// ------------------------------------------------------------------------ // + +#include "SPTK/analysis/perceptual_linear_predictive_coefficients_analysis.h" + +#include // std::copy, std::reverse_copy, std::transform +#include // std::exp, std::pow, std::sin +#include // std::size_t + +namespace sptk { + +PerceptualLinearPredictiveCoefficientsAnalysis:: + PerceptualLinearPredictiveCoefficientsAnalysis( + int fft_length, int num_channel, int num_order, + int liftering_coefficient, double compression_factor, + double sampling_rate, double lowest_frequency, double highest_frequency, + double floor) + : liftering_coefficient_(liftering_coefficient), + compression_factor_(compression_factor), + mel_filter_bank_analysis_(fft_length, num_channel, sampling_rate, + lowest_frequency, highest_frequency, floor, + true), + inverse_fourier_transform_(2 * GetNumChannel() + 2), + levinson_durbin_recursion_(num_order), + linear_predictive_coefficients_to_cepstrum_(num_order, num_order), + is_valid_(true) { + if (num_channel <= num_order || liftering_coefficient_ <= 0 || + compression_factor_ <= 0.0 || !mel_filter_bank_analysis_.IsValid() || + !inverse_fourier_transform_.IsValid() || + !levinson_durbin_recursion_.IsValid() || + !linear_predictive_coefficients_to_cepstrum_.IsValid()) { + is_valid_ = false; + return; + } + + std::vector center_frequencies; + if (!mel_filter_bank_analysis_.GetCenterFrequencies(¢er_frequencies)) { + is_valid_ = false; + return; + } + + equal_loudness_curve_.resize(num_channel); + double* e(&(equal_loudness_curve_[0])); + for (int m(0); m < num_channel; ++m) { + const double f1(center_frequencies[m] * center_frequencies[m]); + const double f2(f1 / (f1 + 1.6e5)); + e[m] = f2 * f2 * (f1 + 1.44e6) / (f1 + 9.61e6); + } + + cepstal_weights_.resize(num_order); + double* w(&(cepstal_weights_[0])); + for (int m(0); m < num_order; ++m) { + const double theta(sptk::kPi * (m + 1) / liftering_coefficient_); + w[m] = 1.0 + 0.5 * liftering_coefficient_ * std::sin(theta); + } +} + +bool PerceptualLinearPredictiveCoefficientsAnalysis::Run( + const std::vector& power_spectrum, std::vector* plp, + double* energy, + PerceptualLinearPredictiveCoefficientsAnalysis::Buffer* buffer) const { + // Check inputs. + if (!is_valid_ || NULL == plp || NULL == buffer) { + return false; + } + + // Prepare memories. + const int num_order(GetNumOrder()); + const int num_channel(GetNumChannel()); + const int dft_length(inverse_fourier_transform_.GetLength()); + if (plp->size() != static_cast(num_order + 1)) { + plp->resize(num_order + 1); + } + if (buffer->spectrum_.size() != static_cast(num_channel)) { + buffer->spectrum_.resize(num_channel); + } + if (buffer->real_part_input_.size() != static_cast(dft_length)) { + buffer->real_part_input_.resize(dft_length); + } + if (buffer->imag_part_input_.size() != static_cast(dft_length)) { + buffer->imag_part_input_.resize(dft_length); + } + + if (!mel_filter_bank_analysis_.Run(power_spectrum, + &buffer->filter_bank_output_, energy)) { + return false; + } + + std::transform(buffer->filter_bank_output_.begin(), + buffer->filter_bank_output_.end(), + equal_loudness_curve_.begin(), buffer->spectrum_.begin(), + [this](double x, double e) { + return std::pow(std::exp(x) * e, compression_factor_); + }); + + buffer->real_part_input_[0] = buffer->spectrum_[0]; + std::copy(buffer->spectrum_.begin(), buffer->spectrum_.end(), + buffer->real_part_input_.begin() + 1); + buffer->real_part_input_[num_channel + 1] = + buffer->spectrum_[num_channel - 1]; + std::reverse_copy(buffer->spectrum_.begin(), buffer->spectrum_.end(), + buffer->real_part_input_.begin() + num_channel + 2); + + // Convert spectrum to autocorrelation. + if (!inverse_fourier_transform_.Run( + buffer->real_part_input_, buffer->imag_part_input_, + &buffer->real_part_output_, &buffer->imag_part_output_)) { + return false; + } + + // Convert autocorrelation to LPC. + bool is_stable; + buffer->real_part_output_.resize(num_order + 1); + if (!levinson_durbin_recursion_.Run( + &buffer->real_part_output_, &is_stable, + &buffer->buffer_for_levinson_durbin_recursion_)) { + return false; + } + + // Convert LPC to cepstrum. + if (!linear_predictive_coefficients_to_cepstrum_.Run( + buffer->real_part_output_, &buffer->cepstrum_)) { + return false; + } + + (*plp)[0] = buffer->cepstrum_[0] * 2.0; + + // Lifter. + std::transform(buffer->cepstrum_.begin() + 1, + buffer->cepstrum_.begin() + 1 + num_order, + cepstal_weights_.begin(), plp->begin() + 1, + [](double c, double w) { return c * w; }); + + return true; +} + +} // namespace sptk diff --git a/src/main/fbank.cc b/src/main/fbank.cc index 6216c81..bed12ae 100644 --- a/src/main/fbank.cc +++ b/src/main/fbank.cc @@ -95,7 +95,7 @@ void PrintUsage(std::ostream* stream) { * - @b -s @e double * - sampling rate in kHz @f$(0 < F_s)@f$ * - @b -L @e double - * - lowest frequency in Hz @f$(0.0 \le F_l < F_h)@f$ + * - lowest frequency in Hz @f$(0 \le F_l < F_h)@f$ * - @b -H @e double * - highest frequency in Hz @f$(F_l < F_h \le 500F_s)@f$ * - @b -q @e int diff --git a/src/main/mfcc.cc b/src/main/mfcc.cc index 1073f43..7fbd140 100644 --- a/src/main/mfcc.cc +++ b/src/main/mfcc.cc @@ -111,7 +111,7 @@ void PrintUsage(std::ostream* stream) { * - @b -s @e double * - sampling rate in kHz @f$(0 < F_s)@f$ * - @b -L @e double - * - lowest frequency in Hz @f$(0.0 \le F_l < F_h)@f$ + * - lowest frequency in Hz @f$(0 \le F_l < F_h)@f$ * - @b -H @e double * - highest frequency in Hz @f$(F_l < F_h \le 500F_s)@f$ * - @b -q @e int @@ -158,6 +158,7 @@ void PrintUsage(std::ostream* stream) { * TARGETRATE = 100000.0 * WINDOWSIZE = 250000.0 * USEHAMMING = T + * USEPOWER = F * RAWENERGY = F * ENORMALIZE = F * PREEMCOEF = 0.97 @@ -497,14 +498,14 @@ int main(int argc, char* argv[]) { : NULL, &buffer_for_mfcc_analysis)) { std::ostringstream error_message; - error_message << "Failed to run mfcc analysis"; + error_message << "Failed to run MFCC analysis"; sptk::PrintErrorMessage("mfcc", error_message); return 1; } if (!sptk::WriteStream(1, output_length, output, &std::cout, NULL)) { std::ostringstream error_message; - error_message << "Failed to write filter-bank output"; + error_message << "Failed to write MFCC"; sptk::PrintErrorMessage("mfcc", error_message); return 1; } diff --git a/src/main/plp.cc b/src/main/plp.cc new file mode 100644 index 0000000..430bae4 --- /dev/null +++ b/src/main/plp.cc @@ -0,0 +1,485 @@ +// ------------------------------------------------------------------------ // +// Copyright 2021 SPTK Working Group // +// // +// Licensed under the Apache License, Version 2.0 (the "License"); // +// you may not use this file except in compliance with the License. // +// You may obtain a copy of the License at // +// // +// http://www.apache.org/licenses/LICENSE-2.0 // +// // +// Unless required by applicable law or agreed to in writing, software // +// distributed under the License is distributed on an "AS IS" BASIS, // +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // +// See the License for the specific language governing permissions and // +// limitations under the License. // +// ------------------------------------------------------------------------ // + +#include // std::ifstream +#include // std::setw +#include // std::cerr, std::cin, std::cout, std::endl, etc. +#include // std::ostringstream +#include // std::vector + +#include "GETOPT/ya_getopt.h" +#include "SPTK/analysis/perceptual_linear_predictive_coefficients_analysis.h" +#include "SPTK/conversion/spectrum_to_spectrum.h" +#include "SPTK/conversion/waveform_to_spectrum.h" +#include "SPTK/utils/sptk_utils.h" + +namespace { + +enum InputFormats { + kLogAmplitudeSpectrumInDecibels = 0, + kLogAmplitudeSpectrum, + kAmplitudeSpectrum, + kPowerSpectrum, + kWaveform, + kNumInputFormats +}; + +enum OutputFormats { + kPlp = 0, + kPlpAndEnergy, + kPlpAndC0, + kPlpAndC0AndEnergy, + kNumOutputFormats +}; + +const int kDefaultNumChannel(20); +const int kDefaultNumOrder(12); +const int kDefaultFftLength(256); +const int kDefaultLifteringCoefficient(22); +const double kDefaultSamplingRate(16.0); +const double kDefaultLowestFrequency(0.0); +const double kDefaultCompressionFactor(0.33); +const InputFormats kDefaultInputFormat(kWaveform); +const OutputFormats kDefaultOutputFormat(kPlp); +const double kDefaultFloor(1.0); + +void PrintUsage(std::ostream* stream) { + // clang-format off + *stream << std::endl; + *stream << " plp - perceptual linear predictive coefficients analysis" << std::endl; // NOLINT + *stream << std::endl; + *stream << " usage:" << std::endl; + *stream << " plp [ options ] [ infile ] > stdout" << std::endl; // NOLINT + *stream << " options:" << std::endl; + *stream << " -n n : number of channels ( int)[" << std::setw(5) << std::right << kDefaultNumChannel << "][ 1 <= n <= ]" << std::endl; // NOLINT + *stream << " -m m : order of cepstrum ( int)[" << std::setw(5) << std::right << kDefaultNumOrder << "][ 1 <= m < n ]" << std::endl; // NOLINT + *stream << " -l l : frame length (FFT length) ( int)[" << std::setw(5) << std::right << kDefaultFftLength << "][ 2 <= l <= ]" << std::endl; // NOLINT + *stream << " -c c : liftering coefficient ( int)[" << std::setw(5) << std::right << kDefaultLifteringCoefficient << "][ 1 <= c < ]" << std::endl; // NOLINT + *stream << " -f f : amplitude compression factor (double)[" << std::setw(5) << std::right << kDefaultCompressionFactor << "][ 0.0 < f <= ]" << std::endl; // NOLINT + *stream << " -s s : sampling rate [kHz] (double)[" << std::setw(5) << std::right << kDefaultSamplingRate << "][ 0.0 < s <= ]" << std::endl; // NOLINT + *stream << " -L L : lowest frequency [Hz] (double)[" << std::setw(5) << std::right << kDefaultLowestFrequency << "][ 0.0 <= L < H ]" << std::endl; // NOLINT + *stream << " -H H : highest frequency [Hz] (double)[" << std::setw(5) << std::right << "500*s" << "][ L < H <= 500*s ]" << std::endl; // NOLINT + *stream << " -q q : input format ( int)[" << std::setw(5) << std::right << kDefaultInputFormat << "][ 0 <= q <= 4 ]" << std::endl; // NOLINT + *stream << " 0 (20*log|X(z)|)" << std::endl; + *stream << " 1 (ln|X(z)|)" << std::endl; + *stream << " 2 (|X(z)|)" << std::endl; + *stream << " 3 (|X(z)|^2)" << std::endl; + *stream << " 4 (windowed waveform)" << std::endl; + *stream << " -o o : output format ( int)[" << std::setw(5) << std::right << kDefaultOutputFormat << "][ 0 <= o <= 3 ]" << std::endl; // NOLINT + *stream << " 0 (plp)" << std::endl; + *stream << " 1 (plp and energy)" << std::endl; + *stream << " 2 (plp and c0)" << std::endl; + *stream << " 3 (plp, c0, and energy)" << std::endl; + *stream << " -e e : floor of raw filter-bank output (double)[" << std::setw(5) << std::right << kDefaultFloor << "][ 0.0 < e <= ]" << std::endl; // NOLINT + *stream << " -h : print this message" << std::endl; + *stream << " infile:" << std::endl; + *stream << " windowed data sequence or spectrum (double)[stdin]" << std::endl; // NOLINT + *stream << " stdout:" << std::endl; + *stream << " plp (double)" << std::endl; // NOLINT + *stream << " notice:" << std::endl; + *stream << " value of l must be a power of 2" << std::endl; + *stream << std::endl; + *stream << " SPTK: version " << sptk::kVersion << std::endl; + *stream << std::endl; + // clang-format on +} + +} // namespace + +/** + * @a plp [ @e option ] [ @e infile ] + * + * - @b -n @e int + * - number of channels @f$(1 \le C)@f$ + * - @b -m @e int + * - order of coeffcients @f$(1 \le M)@f$ + * - @b -l @e int + * - FFT length @f$(2 \le N)@f$ + * - @b -c @e int + * - liftering parameter @f$(1 \le L)@f$ + * - @b -f @e double + * - compression factor @f$(0 < f)@f$ + * - @b -s @e double + * - sampling rate in kHz @f$(0 < F_s)@f$ + * - @b -L @e double + * - lowest frequency in Hz @f$(0 \le F_l < F_h)@f$ + * - @b -H @e double + * - highest frequency in Hz @f$(F_l < F_h \le 500F_s)@f$ + * - @b -q @e int + * - input format + * @arg @c 0 amplitude spectrum in dB + * @arg @c 1 log amplitude spectrum + * @arg @c 2 amplitude spectrum + * @arg @c 3 power spectrum + * @arg @c 4 windowed waveform + * - @b -o @e int + * - output format + * @arg @c 0 PLP + * @arg @c 1 PLP and energy + * @arg @c 2 PLP and C0 + * @arg @c 3 PLP, C0, and energy + * - @b -e @e double + * - floor value of raw filter-bank output @f$(0 < \epsilon)@f$ + * - @b infile @e str + * - double-type windowed sequence or spectrum + * - @b stdout + * - double-type PLP features + * + * The below example extracts the 12-th order PLP from @c data.short. The + * analysis condition is that: frame length is 10 ms, frame shift is 25 ms, + * and sampling rate is 16 kHz. A pre-emphais filter and the hamming window + * are applied to the input signal. + * + * @code{.sh} + * x2x +sd data.short | + * frame -l 400 -p 160 -n 1 | + * dfs -b 1 -0.97 | + * window -l 400 -L 512 -w 1 -n 0 | + * plp -l 512 -n 40 -c 22 -m 12 -L 64 -H 4000 -f 0.33 -o 2 > data.plp + * @endcode + * + * The corresponding HTK config file is shown as below. + * + * @code + * SOURCEFORMAT = NOHEAD + * SOURCEKIND = WAVEFORM + * SOURCERATE = 625.0 + * TARGETKIND = PLP_0 + * TARGETRATE = 100000.0 + * WINDOWSIZE = 250000.0 + * USEHAMMING = T + * USEPOWER = T + * RAWENERGY = F + * ENORMALIZE = F + * PREEMCOEF = 0.97 + * COMPRESSFACT = 0.33 + * NUMCHANS = 40 + * CEPLIFTER = 22 + * NUMCEPS = 12 + * LOFREQ = 64 + * HIFREQ = 4000 + * @endcode + * + * @param[in] argc Number of arguments. + * @param[in] argv Argument vector. + * @return 0 on success, 1 on failure. + */ +int main(int argc, char* argv[]) { + int num_channel(kDefaultNumChannel); + int num_order(kDefaultNumOrder); + int fft_length(kDefaultFftLength); + int liftering_coefficient(kDefaultLifteringCoefficient); + double compression_factor(kDefaultCompressionFactor); + double sampling_rate(kDefaultSamplingRate); + double lowest_frequency(kDefaultLowestFrequency); + double highest_frequency(0.0); + bool is_highest_frequency_specified(false); + InputFormats input_format(kDefaultInputFormat); + OutputFormats output_format(kDefaultOutputFormat); + double floor(kDefaultFloor); + + for (;;) { + const int option_char( + getopt_long(argc, argv, "n:m:l:c:f:s:L:H:q:o:e:h", NULL, NULL)); + if (-1 == option_char) break; + + switch (option_char) { + case 'n': { + if (!sptk::ConvertStringToInteger(optarg, &num_channel) || + num_channel <= 0) { + std::ostringstream error_message; + error_message + << "The argument for the -n option must be a positive integer"; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + break; + } + case 'm': { + if (!sptk::ConvertStringToInteger(optarg, &num_order) || + num_order <= 0) { + std::ostringstream error_message; + error_message + << "The argument for the -m option must be a positive integer"; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + break; + } + case 'l': { + if (!sptk::ConvertStringToInteger(optarg, &fft_length)) { + std::ostringstream error_message; + error_message << "The argument for the -l option must be an integer"; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + break; + } + case 'c': { + if (!sptk::ConvertStringToInteger(optarg, &liftering_coefficient) || + liftering_coefficient <= 0) { + std::ostringstream error_message; + error_message + << "The argument for the -c option must be a positive integer"; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + break; + } + case 'f': { + if (!sptk::ConvertStringToDouble(optarg, &compression_factor) || + compression_factor <= 0.0) { + std::ostringstream error_message; + error_message + << "The argument for the -f option must be a positive number"; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + break; + } + case 's': { + if (!sptk::ConvertStringToDouble(optarg, &sampling_rate) || + sampling_rate <= 0.0) { + std::ostringstream error_message; + error_message + << "The argument for the -s option must be a positive number"; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + break; + } + case 'L': { + if (!sptk::ConvertStringToDouble(optarg, &lowest_frequency) || + lowest_frequency < 0.0) { + std::ostringstream error_message; + error_message << "The argument for the -L option must be a " + << "non-negative number"; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + break; + } + case 'H': { + if (!sptk::ConvertStringToDouble(optarg, &highest_frequency) || + highest_frequency <= 0.0) { + std::ostringstream error_message; + error_message + << "The argument for the -H option must be a positive number"; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + is_highest_frequency_specified = true; + break; + } + case 'q': { + const int min(0); + const int max(static_cast(kNumInputFormats) - 1); + int tmp; + if (!sptk::ConvertStringToInteger(optarg, &tmp) || + !sptk::IsInRange(tmp, min, max)) { + std::ostringstream error_message; + error_message << "The argument for the -q option must be an integer " + << "in the range of " << min << " to " << max; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + input_format = static_cast(tmp); + break; + } + case 'o': { + const int min(0); + const int max(static_cast(kNumOutputFormats) - 1); + int tmp; + if (!sptk::ConvertStringToInteger(optarg, &tmp) || + !sptk::IsInRange(tmp, min, max)) { + std::ostringstream error_message; + error_message << "The argument for the -o option must be an integer " + << "in the range of " << min << " to " << max; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + output_format = static_cast(tmp); + break; + } + case 'e': { + if (!sptk::ConvertStringToDouble(optarg, &floor) || floor <= 0.0) { + std::ostringstream error_message; + error_message + << "The argument for the -e option must be a positive number"; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + break; + } + case 'h': { + PrintUsage(&std::cout); + return 0; + } + default: { + PrintUsage(&std::cerr); + return 1; + } + } + } + + const double sampling_rate_in_hz(1000.0 * sampling_rate); + if (!is_highest_frequency_specified) { + highest_frequency = 0.5 * sampling_rate_in_hz; + } else if (0.5 * sampling_rate_in_hz < highest_frequency) { + std::ostringstream error_message; + error_message + << "Highest frequency must be less than or equal to Nyquist frequency"; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + + if (highest_frequency <= lowest_frequency) { + std::ostringstream error_message; + error_message << "Lowest frequency must be less than highest one"; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + + const int num_input_files(argc - optind); + if (1 < num_input_files) { + std::ostringstream error_message; + error_message << "Too many input files"; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + const char* input_file(0 == num_input_files ? NULL : argv[optind]); + + if (!sptk::SetBinaryMode()) { + std::ostringstream error_message; + error_message << "Cannot set translation mode"; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + + std::ifstream ifs; + if (NULL != input_file) { + ifs.open(input_file, std::ios::in | std::ios::binary); + if (ifs.fail()) { + std::ostringstream error_message; + error_message << "Cannot open file " << input_file; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + } + std::istream& input_stream(ifs.is_open() ? ifs : std::cin); + + sptk::SpectrumToSpectrum spectrum_to_spectrum( + fft_length, + static_cast(input_format), + sptk::SpectrumToSpectrum::InputOutputFormats::kPowerSpectrum); + if (kWaveform != input_format && !spectrum_to_spectrum.IsValid()) { + std::ostringstream error_message; + error_message << "Failed to set condition for input formatting"; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + + sptk::WaveformToSpectrum waveform_to_spectrum( + fft_length, fft_length, + sptk::SpectrumToSpectrum::InputOutputFormats::kPowerSpectrum); + sptk::WaveformToSpectrum::Buffer buffer_for_spectral_analysis; + if (kWaveform == input_format && !waveform_to_spectrum.IsValid()) { + std::ostringstream error_message; + error_message << "Failed to set condition for spectral analysis"; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + + sptk::PerceptualLinearPredictiveCoefficientsAnalysis analysis( + fft_length, num_channel, num_order, liftering_coefficient, + compression_factor, sampling_rate_in_hz, lowest_frequency, + highest_frequency, floor); + sptk::PerceptualLinearPredictiveCoefficientsAnalysis::Buffer + buffer_for_plp_analysis; + if (!analysis.IsValid()) { + std::ostringstream error_message; + error_message << "Failed to set condition for PLP analysis"; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + + const int input_length(kWaveform == input_format ? fft_length + : fft_length / 2 + 1); + const int output_length(num_order); + std::vector input(input_length); + std::vector processed_input(fft_length / 2 + 1); + std::vector output(output_length); + double energy(0.0); + + while (sptk::ReadStream(false, 0, 0, input_length, &input, &input_stream, + NULL)) { + if (kWaveform != input_format) { + if (!spectrum_to_spectrum.Run(input, &processed_input)) { + std::ostringstream error_message; + error_message << "Failed to convert spectrum"; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + } else { + if (!waveform_to_spectrum.Run(input, &processed_input, + &buffer_for_spectral_analysis)) { + std::ostringstream error_message; + error_message << "Failed to transform waveform to spectrum"; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + } + + if (!analysis.Run(processed_input, &output, + (kPlpAndEnergy == output_format || + kPlpAndC0AndEnergy == output_format) + ? &energy + : NULL, + &buffer_for_plp_analysis)) { + std::ostringstream error_message; + error_message << "Failed to run PLP analysis"; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + + if (!sptk::WriteStream(1, output_length, output, &std::cout, NULL)) { + std::ostringstream error_message; + error_message << "Failed to write PLP"; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + + if (kPlpAndC0 == output_format || kPlpAndC0AndEnergy == output_format) { + if (!sptk::WriteStream(output[0], &std::cout)) { + std::ostringstream error_message; + error_message << "Failed to write c0"; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + } + + if (kPlpAndEnergy == output_format || kPlpAndC0AndEnergy == output_format) { + if (!sptk::WriteStream(energy, &std::cout)) { + std::ostringstream error_message; + error_message << "Failed to write energy"; + sptk::PrintErrorMessage("plp", error_message); + return 1; + } + } + } + + return 0; +} diff --git a/src/math/discrete_fourier_transform.cc b/src/math/discrete_fourier_transform.cc index f197b3f..23eb328 100644 --- a/src/math/discrete_fourier_transform.cc +++ b/src/math/discrete_fourier_transform.cc @@ -27,6 +27,14 @@ DiscreteFourierTransform::DiscreteFourierTransform(int dft_length) is_valid_ = false; return; } + + sine_table_.resize(dft_length_); + cosine_table_.resize(dft_length_); + for (int i(0); i < dft_length_; ++i) { + const double argument(-sptk::kTwoPi * i / dft_length_); + sine_table_[i] = std::sin(argument); + cosine_table_[i] = std::cos(argument); + } } bool DiscreteFourierTransform::Run( @@ -59,11 +67,11 @@ bool DiscreteFourierTransform::Run( double sum_x(0.0); double sum_y(0.0); for (int n(0); n < dft_length_; ++n) { - const double w(-sptk::kTwoPi * (n * k) / dft_length_); - const double sin_w(std::sin(w)); - const double cos_w(std::cos(w)); - sum_x += input_x[n] * cos_w - input_y[n] * sin_w; - sum_y += input_x[n] * sin_w + input_y[n] * cos_w; + const int index(k * n % dft_length_); + sum_x += + input_x[n] * cosine_table_[index] - input_y[n] * sine_table_[index]; + sum_y += + input_x[n] * sine_table_[index] + input_y[n] * cosine_table_[index]; } output_x[k] = sum_x; output_y[k] = sum_y; diff --git a/src/math/inverse_discrete_fourier_transform.cc b/src/math/inverse_discrete_fourier_transform.cc new file mode 100644 index 0000000..03747da --- /dev/null +++ b/src/math/inverse_discrete_fourier_transform.cc @@ -0,0 +1,91 @@ +// ------------------------------------------------------------------------ // +// Copyright 2021 SPTK Working Group // +// // +// Licensed under the Apache License, Version 2.0 (the "License"); // +// you may not use this file except in compliance with the License. // +// You may obtain a copy of the License at // +// // +// http://www.apache.org/licenses/LICENSE-2.0 // +// // +// Unless required by applicable law or agreed to in writing, software // +// distributed under the License is distributed on an "AS IS" BASIS, // +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // +// See the License for the specific language governing permissions and // +// limitations under the License. // +// ------------------------------------------------------------------------ // + +#include "SPTK/math/inverse_discrete_fourier_transform.h" + +#include // std::cos, std::sin +#include // std::size_t + +namespace sptk { + +InverseDiscreteFourierTransform::InverseDiscreteFourierTransform(int dft_length) + : dft_length_(dft_length), is_valid_(true) { + if (dft_length_ <= 0) { + is_valid_ = false; + return; + } + + sine_table_.resize(dft_length_); + cosine_table_.resize(dft_length_); + for (int i(0); i < dft_length_; ++i) { + const double argument(sptk::kTwoPi * i / dft_length_); + sine_table_[i] = std::sin(argument); + cosine_table_[i] = std::cos(argument); + } +} + +bool InverseDiscreteFourierTransform::Run( + const std::vector& real_part_input, + const std::vector& imag_part_input, + std::vector* real_part_output, + std::vector* imag_part_output) const { + // Check inputs + if (!is_valid_ || + real_part_input.size() != static_cast(dft_length_) || + imag_part_input.size() != static_cast(dft_length_) || + NULL == real_part_output || NULL == imag_part_output) { + return false; + } + + // Prepare memories. + if (real_part_output->size() != static_cast(dft_length_)) { + real_part_output->resize(dft_length_); + } + if (imag_part_output->size() != static_cast(dft_length_)) { + imag_part_output->resize(dft_length_); + } + + const double* input_x(&(real_part_input[0])); + const double* input_y(&(imag_part_input[0])); + double* output_x(&((*real_part_output)[0])); + double* output_y(&((*imag_part_output)[0])); + + for (int n(0); n < dft_length_; ++n) { + double sum_x(0.0); + double sum_y(0.0); + for (int k(0); k < dft_length_; ++k) { + const int index(k * n % dft_length_); + sum_x += + input_x[k] * cosine_table_[index] + input_y[k] * sine_table_[index]; + sum_y += + input_x[k] * sine_table_[index] - input_y[k] * cosine_table_[index]; + } + output_x[n] = sum_x / dft_length_; + output_y[n] = sum_y / dft_length_; + } + + return true; +} + +bool InverseDiscreteFourierTransform::Run( + std::vector* real_part, std::vector* imag_part) const { + if (NULL == real_part || NULL == imag_part) return false; + std::vector real_part_input(*real_part); + std::vector imag_part_input(*imag_part); + return Run(real_part_input, imag_part_input, real_part, imag_part); +} + +} // namespace sptk diff --git a/src/math/inverse_fourier_transform.cc b/src/math/inverse_fourier_transform.cc new file mode 100644 index 0000000..ed35763 --- /dev/null +++ b/src/math/inverse_fourier_transform.cc @@ -0,0 +1,112 @@ +// ------------------------------------------------------------------------ // +// Copyright 2021 SPTK Working Group // +// // +// Licensed under the Apache License, Version 2.0 (the "License"); // +// you may not use this file except in compliance with the License. // +// You may obtain a copy of the License at // +// // +// http://www.apache.org/licenses/LICENSE-2.0 // +// // +// Unless required by applicable law or agreed to in writing, software // +// distributed under the License is distributed on an "AS IS" BASIS, // +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // +// See the License for the specific language governing permissions and // +// limitations under the License. // +// ------------------------------------------------------------------------ // + +#include "SPTK/math/inverse_fourier_transform.h" + +#include "SPTK/math/inverse_discrete_fourier_transform.h" +#include "SPTK/math/inverse_fast_fourier_transform.h" + +namespace { + +class InverseFastFourierTransformWrapper + : public sptk::FourierTransform::FourierTransformInterface { + public: + explicit InverseFastFourierTransformWrapper(int fft_length) + : inverse_fast_fourier_tranform_(fft_length) { + } + + virtual ~InverseFastFourierTransformWrapper() { + } + + virtual int GetLength() const { + return inverse_fast_fourier_tranform_.GetFftLength(); + } + + virtual bool IsValid() const { + return inverse_fast_fourier_tranform_.IsValid(); + } + + virtual bool Run(const std::vector& real_part_input, + const std::vector& imag_part_input, + std::vector* real_part_output, + std::vector* imag_part_output) const { + return inverse_fast_fourier_tranform_.Run( + real_part_input, imag_part_input, real_part_output, imag_part_output); + } + + virtual bool Run(std::vector* real_part, + std::vector* imag_part) const { + return inverse_fast_fourier_tranform_.Run(real_part, imag_part); + } + + private: + const sptk::InverseFastFourierTransform inverse_fast_fourier_tranform_; + + DISALLOW_COPY_AND_ASSIGN(InverseFastFourierTransformWrapper); +}; + +class InverseDiscreteFourierTransformWrapper + : public sptk::FourierTransform::FourierTransformInterface { + public: + explicit InverseDiscreteFourierTransformWrapper(int dft_length) + : inverse_discrete_fourier_tranform_(dft_length) { + } + + virtual ~InverseDiscreteFourierTransformWrapper() { + } + + virtual int GetLength() const { + return inverse_discrete_fourier_tranform_.GetDftLength(); + } + + virtual bool IsValid() const { + return inverse_discrete_fourier_tranform_.IsValid(); + } + + virtual bool Run(const std::vector& real_part_input, + const std::vector& imag_part_input, + std::vector* real_part_output, + std::vector* imag_part_output) const { + return inverse_discrete_fourier_tranform_.Run( + real_part_input, imag_part_input, real_part_output, imag_part_output); + } + + virtual bool Run(std::vector* real_part, + std::vector* imag_part) const { + return inverse_discrete_fourier_tranform_.Run(real_part, imag_part); + } + + private: + const sptk::InverseDiscreteFourierTransform + inverse_discrete_fourier_tranform_; + + DISALLOW_COPY_AND_ASSIGN(InverseDiscreteFourierTransformWrapper); +}; + +} // namespace + +namespace sptk { + +InverseFourierTransform::InverseFourierTransform(int length) { + if (sptk::IsPowerOfTwo(length)) { + inverse_fourier_transform_ = new InverseFastFourierTransformWrapper(length); + } else { + inverse_fourier_transform_ = + new InverseDiscreteFourierTransformWrapper(length); + } +} + +} // namespace sptk diff --git a/test/test_plp.bats b/test/test_plp.bats new file mode 100755 index 0000000..ada4299 --- /dev/null +++ b/test/test_plp.bats @@ -0,0 +1,34 @@ +#!/usr/bin/env bats +# ------------------------------------------------------------------------ # +# Copyright 2021 SPTK Working Group # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); # +# you may not use this file except in compliance with the License. # +# You may obtain a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +# ------------------------------------------------------------------------ # + +sptk3=tools/sptk/bin +sptk4=bin +tmp=test_plp + +setup() { + mkdir -p $tmp +} + +teardown() { + rm -rf $tmp +} + +@test "plp: valgrind" { + $sptk3/nrand -l 16 > $tmp/1 + run valgrind $sptk4/plp -l 8 -n 4 -m 3 $tmp/1 + [ "$(echo "${lines[-1]}" | sed -r 's/.*SUMMARY: ([0-9]*) .*/\1/')" -eq 0 ] +}