diff --git a/CMakeLists.txt b/CMakeLists.txt index 123309a..960a2cb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,13 +58,18 @@ set(CC_SOURCES ${THIRD_PARTY_DIR}/SWIPE/vector.cc ${THIRD_PARTY_DIR}/Snack/jkGetF0.cc ${THIRD_PARTY_DIR}/Snack/sigproc.cc + ${THIRD_PARTY_DIR}/WORLD/aperiodicity.cc ${THIRD_PARTY_DIR}/WORLD/common.cc + ${THIRD_PARTY_DIR}/WORLD/d4c.cc ${THIRD_PARTY_DIR}/WORLD/dio.cc ${THIRD_PARTY_DIR}/WORLD/fft_world.cc ${THIRD_PARTY_DIR}/WORLD/matlabfunctions.cc ${SOURCE_DIR}/analysis/adaptive_generalized_cepstral_analysis.cc ${SOURCE_DIR}/analysis/adaptive_mel_cepstral_analysis.cc ${SOURCE_DIR}/analysis/adaptive_mel_generalized_cepstral_analysis.cc + ${SOURCE_DIR}/analysis/aperiodicity_extraction.cc + ${SOURCE_DIR}/analysis/aperiodicity_extraction_by_tandem.cc + ${SOURCE_DIR}/analysis/aperiodicity_extraction_by_world.cc ${SOURCE_DIR}/analysis/autocorrelation_analysis.cc ${SOURCE_DIR}/analysis/fast_fourier_transform_cepstral_analysis.cc ${SOURCE_DIR}/analysis/mel_cepstral_analysis.cc @@ -213,6 +218,7 @@ set(MAIN_SOURCES ${SOURCE_DIR}/main/acr2csm.cc ${SOURCE_DIR}/main/aeq.cc ${SOURCE_DIR}/main/amgcep.cc + ${SOURCE_DIR}/main/ap.cc ${SOURCE_DIR}/main/average.cc ${SOURCE_DIR}/main/b2mc.cc ${SOURCE_DIR}/main/bcp.cc diff --git a/README.md b/README.md index 2c286c9..0567caf 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,7 @@ Changes from SPTK3 - No memory leaks - Thread-safe - New features: + - Aperiodicity extraction (`ap`) - Conversion from/to log area ratio (`lar2par` and `par2lar`) - Dynamic range compression (`drc`) - Entropy calculation (`entropy`) diff --git a/doc/main/ap.rst b/doc/main/ap.rst new file mode 100644 index 0000000..a41c5d2 --- /dev/null +++ b/doc/main/ap.rst @@ -0,0 +1,9 @@ +.. _ap: + +ap +== + +.. doxygenfile:: ap.cc + +.. doxygenclass:: sptk::AperiodicityExtraction + :members: diff --git a/include/SPTK/analysis/aperiodicity_extraction.h b/include/SPTK/analysis/aperiodicity_extraction.h new file mode 100644 index 0000000..2cd3833 --- /dev/null +++ b/include/SPTK/analysis/aperiodicity_extraction.h @@ -0,0 +1,87 @@ +// ------------------------------------------------------------------------ // +// Copyright 2021 SPTK Working Group // +// // +// Licensed under the Apache License, Version 2.0 (the "License"); // +// you may not use this file except in compliance with the License. // +// You may obtain a copy of the License at // +// // +// http://www.apache.org/licenses/LICENSE-2.0 // +// // +// Unless required by applicable law or agreed to in writing, software // +// distributed under the License is distributed on an "AS IS" BASIS, // +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // +// See the License for the specific language governing permissions and // +// limitations under the License. // +// ------------------------------------------------------------------------ // + +#ifndef SPTK_ANALYSIS_APERIODICITY_EXTRACTION_H_ +#define SPTK_ANALYSIS_APERIODICITY_EXTRACTION_H_ + +#include // std::vector + +#include "SPTK/analysis/aperiodicity_extraction_interface.h" +#include "SPTK/utils/sptk_utils.h" + +namespace sptk { + +/** + * Extract aperiodicity from waveform. + * + * The input is whole audio waveform and the output is the sequence of the + * aperiodicity measure. The implemented algorithms of the extraction are + * TANDEM-STRAIGHT and D4C. + * + * [1] H. Kawahara et al., "Simplification and extension of non-periodic + * excitation source representations for high-quality speech manipulation + * systems," Proc. of Interspeech, pp. 38-41, 2010. + * + * [2] M. Morise, "D4C, a band-aperiodicity estimator for high-quality + * speech synthesis," Proc. of Speech Communication, vol. 84, + * pp. 57-65, 2016. + */ +class AperiodicityExtraction { + public: + /** + * Aperiodicity extraction algorithms. + */ + enum Algorithms { kTandem = 0, kWorld, kNumAlgorithms }; + + /** + * @param[in] fft_length FFT length. + * @param[in] frame_shift Frame shift in point. + * @param[in] sampling_rate Sampling rate in Hz. + * @param[in] algorithm Algorithm used for aperiodicity extraction. + */ + AperiodicityExtraction(int fft_length, int frame_shift, double sampling_rate, + Algorithms algorithm); + + virtual ~AperiodicityExtraction() { + delete aperiodicity_extraction_; + } + + /** + * @return True if this object is valid. + */ + bool IsValid() const { + return (NULL != aperiodicity_extraction_ && + aperiodicity_extraction_->IsValid()); + } + + /** + * @param[in] waveform Waveform. + * @param[in] f0 Fundamental frequency in Hz. + * @param[out] aperiodicity Aperiodicity measure. + * @return True on success, false on failure. + */ + bool Run(const std::vector& waveform, const std::vector& f0, + std::vector >* aperiodicity) const; + + private: + AperiodicityExtractionInterface* aperiodicity_extraction_; + + DISALLOW_COPY_AND_ASSIGN(AperiodicityExtraction); +}; + +} // namespace sptk + +#endif // SPTK_ANALYSIS_APERIODICITY_EXTRACTION_H_ diff --git a/include/SPTK/analysis/aperiodicity_extraction_by_tandem.h b/include/SPTK/analysis/aperiodicity_extraction_by_tandem.h new file mode 100644 index 0000000..444f37f --- /dev/null +++ b/include/SPTK/analysis/aperiodicity_extraction_by_tandem.h @@ -0,0 +1,93 @@ +// ------------------------------------------------------------------------ // +// Copyright 2021 SPTK Working Group // +// // +// Licensed under the Apache License, Version 2.0 (the "License"); // +// you may not use this file except in compliance with the License. // +// You may obtain a copy of the License at // +// // +// http://www.apache.org/licenses/LICENSE-2.0 // +// // +// Unless required by applicable law or agreed to in writing, software // +// distributed under the License is distributed on an "AS IS" BASIS, // +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // +// See the License for the specific language governing permissions and // +// limitations under the License. // +// ------------------------------------------------------------------------ // + +#ifndef SPTK_ANALYSIS_APERIODICITY_EXTRACTION_BY_TANDEM_H_ +#define SPTK_ANALYSIS_APERIODICITY_EXTRACTION_BY_TANDEM_H_ + +#include // std::vector + +#include "SPTK/analysis/aperiodicity_extraction_interface.h" +#include "SPTK/utils/sptk_utils.h" + +namespace sptk { + +/** + * Extract aperiodicity based on TANDEM-STRAIGHT. + */ +class AperiodicityExtractionByTandem : public AperiodicityExtractionInterface { + public: + /** + * @param[in] fft_length FFT length. + * @param[in] frame_shift Frame shift in point. + * @param[in] sampling_rate Sampling rate in Hz. + */ + AperiodicityExtractionByTandem(int fft_length, int frame_shift, + double sampling_rate); + + virtual ~AperiodicityExtractionByTandem() { + } + + /** + * @return FFT length. + */ + int GetFftLength() const { + return fft_length_; + } + + /** + * @return Frame shift. + */ + virtual int GetFrameShift() const { + return frame_shift_; + } + + /** + * @return Sampling rate. + */ + double GetSamplingRate() const { + return sampling_rate_; + } + + /** + * @return True if this object is valid. + */ + virtual bool IsValid() const { + return is_valid_; + } + + /** + * @param[in] waveform Waveform. + * @param[in] f0 Fundamental frequency in Hz. + * @param[out] aperiodicity Aperiodicity measure. + * @return True on success, false on failure. + */ + virtual bool Run(const std::vector& waveform, + const std::vector& f0, + std::vector >* aperiodicity) const; + + private: + const int fft_length_; + const int frame_shift_; + const double sampling_rate_; + + bool is_valid_; + + DISALLOW_COPY_AND_ASSIGN(AperiodicityExtractionByTandem); +}; + +} // namespace sptk + +#endif // SPTK_ANALYSIS_APERIODICITY_EXTRACTION_BY_TANDEM_H_ diff --git a/include/SPTK/analysis/aperiodicity_extraction_by_world.h b/include/SPTK/analysis/aperiodicity_extraction_by_world.h new file mode 100644 index 0000000..90635bc --- /dev/null +++ b/include/SPTK/analysis/aperiodicity_extraction_by_world.h @@ -0,0 +1,93 @@ +// ------------------------------------------------------------------------ // +// Copyright 2021 SPTK Working Group // +// // +// Licensed under the Apache License, Version 2.0 (the "License"); // +// you may not use this file except in compliance with the License. // +// You may obtain a copy of the License at // +// // +// http://www.apache.org/licenses/LICENSE-2.0 // +// // +// Unless required by applicable law or agreed to in writing, software // +// distributed under the License is distributed on an "AS IS" BASIS, // +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // +// See the License for the specific language governing permissions and // +// limitations under the License. // +// ------------------------------------------------------------------------ // + +#ifndef SPTK_ANALYSIS_APERIODICITY_EXTRACTION_BY_WORLD_H_ +#define SPTK_ANALYSIS_APERIODICITY_EXTRACTION_BY_WORLD_H_ + +#include // std::vector + +#include "SPTK/analysis/aperiodicity_extraction_interface.h" +#include "SPTK/utils/sptk_utils.h" + +namespace sptk { + +/** + * Extract aperiodicity based on WORLD (D4C). + */ +class AperiodicityExtractionByWorld : public AperiodicityExtractionInterface { + public: + /** + * @param[in] fft_length FFT length. + * @param[in] frame_shift Frame shift in point. + * @param[in] sampling_rate Sampling rate in Hz. + */ + AperiodicityExtractionByWorld(int fft_length, int frame_shift, + double sampling_rate); + + virtual ~AperiodicityExtractionByWorld() { + } + + /** + * @return FFT length. + */ + int GetFftLength() const { + return fft_length_; + } + + /** + * @return Frame shift. + */ + virtual int GetFrameShift() const { + return frame_shift_; + } + + /** + * @return Sampling rate. + */ + double GetSamplingRate() const { + return sampling_rate_; + } + + /** + * @return True if this object is valid. + */ + virtual bool IsValid() const { + return is_valid_; + } + + /** + * @param[in] waveform Waveform. + * @param[in] f0 Fundamental frequency in Hz. + * @param[out] aperiodicity Aperiodicity measure. + * @return True on success, false on failure. + */ + virtual bool Run(const std::vector& waveform, + const std::vector& f0, + std::vector >* aperiodicity) const; + + private: + const int fft_length_; + const int frame_shift_; + const double sampling_rate_; + + bool is_valid_; + + DISALLOW_COPY_AND_ASSIGN(AperiodicityExtractionByWorld); +}; + +} // namespace sptk + +#endif // SPTK_ANALYSIS_APERIODICITY_EXTRACTION_BY_WORLD_H_ diff --git a/include/SPTK/analysis/aperiodicity_extraction_interface.h b/include/SPTK/analysis/aperiodicity_extraction_interface.h new file mode 100644 index 0000000..e7c452b --- /dev/null +++ b/include/SPTK/analysis/aperiodicity_extraction_interface.h @@ -0,0 +1,55 @@ +// ------------------------------------------------------------------------ // +// Copyright 2021 SPTK Working Group // +// // +// Licensed under the Apache License, Version 2.0 (the "License"); // +// you may not use this file except in compliance with the License. // +// You may obtain a copy of the License at // +// // +// http://www.apache.org/licenses/LICENSE-2.0 // +// // +// Unless required by applicable law or agreed to in writing, software // +// distributed under the License is distributed on an "AS IS" BASIS, // +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // +// See the License for the specific language governing permissions and // +// limitations under the License. // +// ------------------------------------------------------------------------ // + +#ifndef SPTK_ANALYSIS_APERIODICITY_EXTRACTION_INTERFACE_H_ +#define SPTK_ANALYSIS_APERIODICITY_EXTRACTION_INTERFACE_H_ + +#include // std::vector + +namespace sptk { + +/** + * An interface of aperiodicity extraction. + */ +class AperiodicityExtractionInterface { + public: + virtual ~AperiodicityExtractionInterface() { + } + + /** + * @return Frame shift. + */ + virtual int GetFrameShift() const = 0; + + /** + * @return True if this object is valid. + */ + virtual bool IsValid() const = 0; + + /** + * @param[in] waveform Waveform. + * @param[in] f0 Fundamental frequency in Hz. + * @param[out] aperiodicity Aperiodicity measure. + * @return True on success, false on failure. + */ + virtual bool Run(const std::vector& waveform, + const std::vector& f0, + std::vector >* aperiodicity) const = 0; +}; + +} // namespace sptk + +#endif // SPTK_ANALYSIS_APERIODICITY_EXTRACTION_INTERFACE_H_ diff --git a/include/SPTK/analysis/pitch_extraction.h b/include/SPTK/analysis/pitch_extraction.h index ffa2a58..fa1f35c 100644 --- a/include/SPTK/analysis/pitch_extraction.h +++ b/include/SPTK/analysis/pitch_extraction.h @@ -42,13 +42,13 @@ namespace sptk { * * [4] M. Morise, H. Kawahara and H. Katayose, "Fast and reliable F0 * estimation method based on the period extraction of vocal fold vibration - * of singing voice and speech, Proc. of AES 35th International Conference, - * 2009. + * of singing voice and speech," Proc. of AES 35th International + * Conference, 2009. */ class PitchExtraction { public: /** - * Pitch extraction algorithm type. + * Pitch extraction algorithms. */ enum Algorithms { kRapt = 0, kSwipe, kReaper, kWorld, kNumAlgorithms }; @@ -84,10 +84,7 @@ class PitchExtraction { */ bool Run(const std::vector& waveform, std::vector* f0, std::vector* epochs, - PitchExtractionInterface::Polarity* polarity) const { - return (NULL != pitch_extraction_ && - pitch_extraction_->Get(waveform, f0, epochs, polarity)); - } + PitchExtractionInterface::Polarity* polarity) const; private: PitchExtractionInterface* pitch_extraction_; diff --git a/src/analysis/aperiodicity_extraction.cc b/src/analysis/aperiodicity_extraction.cc new file mode 100644 index 0000000..91d39c1 --- /dev/null +++ b/src/analysis/aperiodicity_extraction.cc @@ -0,0 +1,74 @@ +// ------------------------------------------------------------------------ // +// Copyright 2021 SPTK Working Group // +// // +// Licensed under the Apache License, Version 2.0 (the "License"); // +// you may not use this file except in compliance with the License. // +// You may obtain a copy of the License at // +// // +// http://www.apache.org/licenses/LICENSE-2.0 // +// // +// Unless required by applicable law or agreed to in writing, software // +// distributed under the License is distributed on an "AS IS" BASIS, // +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // +// See the License for the specific language governing permissions and // +// limitations under the License. // +// ------------------------------------------------------------------------ // + +#include "SPTK/analysis/aperiodicity_extraction.h" + +#include // std::copy, std::fill, std::min +#include // std::ceil + +#include "SPTK/analysis/aperiodicity_extraction_by_tandem.h" +#include "SPTK/analysis/aperiodicity_extraction_by_world.h" + +namespace sptk { + +AperiodicityExtraction::AperiodicityExtraction( + int fft_length, int frame_shift, double sampling_rate, + AperiodicityExtraction::Algorithms algorithm) { + switch (algorithm) { + case kTandem: { + aperiodicity_extraction_ = new AperiodicityExtractionByTandem( + fft_length, frame_shift, sampling_rate); + break; + } + case kWorld: { + aperiodicity_extraction_ = new AperiodicityExtractionByWorld( + fft_length, frame_shift, sampling_rate); + break; + } + default: { + aperiodicity_extraction_ = NULL; + break; + } + } +} + +bool AperiodicityExtraction::Run( + const std::vector& waveform, const std::vector& f0, + std::vector >* aperiodicity) const { + if (!IsValid()) { + return false; + } + + const int target_f0_length( + static_cast(std::ceil(static_cast(waveform.size()) / + aperiodicity_extraction_->GetFrameShift()))); + const int given_f0_length(static_cast(f0.size())); + if (target_f0_length == given_f0_length) { + return aperiodicity_extraction_->Run(waveform, f0, aperiodicity); + } + + std::vector length_fixed_f0(target_f0_length); + std::copy(f0.begin(), + f0.begin() + std::min(target_f0_length, given_f0_length), + length_fixed_f0.begin()); + if (1 <= given_f0_length && given_f0_length < target_f0_length) { + std::fill(length_fixed_f0.begin() + given_f0_length, length_fixed_f0.end(), + f0.back()); + } + return aperiodicity_extraction_->Run(waveform, length_fixed_f0, aperiodicity); +} + +} // namespace sptk diff --git a/src/analysis/aperiodicity_extraction_by_tandem.cc b/src/analysis/aperiodicity_extraction_by_tandem.cc new file mode 100644 index 0000000..8204663 --- /dev/null +++ b/src/analysis/aperiodicity_extraction_by_tandem.cc @@ -0,0 +1,79 @@ +// ------------------------------------------------------------------------ // +// Copyright 2021 SPTK Working Group // +// // +// Licensed under the Apache License, Version 2.0 (the "License"); // +// you may not use this file except in compliance with the License. // +// You may obtain a copy of the License at // +// // +// http://www.apache.org/licenses/LICENSE-2.0 // +// // +// Unless required by applicable law or agreed to in writing, software // +// distributed under the License is distributed on an "AS IS" BASIS, // +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // +// See the License for the specific language governing permissions and // +// limitations under the License. // +// ------------------------------------------------------------------------ // + +#include "SPTK/analysis/aperiodicity_extraction_by_tandem.h" + +#include // std::size_t + +#include "WORLD/world/aperiodicity.h" + +namespace sptk { + +AperiodicityExtractionByTandem::AperiodicityExtractionByTandem( + int fft_length, int frame_shift, double sampling_rate) + : fft_length_(fft_length), + frame_shift_(frame_shift), + sampling_rate_(sampling_rate), + is_valid_(true) { + if (fft_length <= 0 || frame_shift_ <= 0 || + !sptk::IsInRange(sampling_rate_, 8000.0, 98000.0)) { + is_valid_ = false; + return; + } +} + +bool AperiodicityExtractionByTandem::Run( + const std::vector& waveform, const std::vector& f0, + std::vector >* aperiodicity) const { + // Check inputs. + if (!is_valid_ || waveform.empty() || f0.empty() || NULL == aperiodicity) { + return false; + } + + const int f0_length(f0.size()); + const double frame_shift_in_sec(frame_shift_ / sampling_rate_); + std::vector time_axis(f0_length); + for (int i(0); i < f0_length; ++i) { + time_axis[i] = i * frame_shift_in_sec; + } + + // Prepare memories. + if (aperiodicity->size() != static_cast(f0_length)) { + aperiodicity->resize(f0_length); + } + const int length(fft_length_ / 2 + 1); + for (int i(0); i < f0_length; ++i) { + if ((*aperiodicity)[i].size() != static_cast(length)) { + (*aperiodicity)[i].resize(length); + } + } + + std::vector pointers; + pointers.reserve(f0_length); + for (std::vector& vector : *aperiodicity) { + pointers.push_back(vector.data()); + } + + world::AperiodicityRatio( + const_cast(waveform.data()), static_cast(waveform.size()), + static_cast(sampling_rate_), const_cast(f0.data()), + f0_length, const_cast(time_axis.data()), fft_length_, + pointers.data()); + + return true; +} + +} // namespace sptk diff --git a/src/analysis/aperiodicity_extraction_by_world.cc b/src/analysis/aperiodicity_extraction_by_world.cc new file mode 100644 index 0000000..e02f24c --- /dev/null +++ b/src/analysis/aperiodicity_extraction_by_world.cc @@ -0,0 +1,81 @@ +// ------------------------------------------------------------------------ // +// Copyright 2021 SPTK Working Group // +// // +// Licensed under the Apache License, Version 2.0 (the "License"); // +// you may not use this file except in compliance with the License. // +// You may obtain a copy of the License at // +// // +// http://www.apache.org/licenses/LICENSE-2.0 // +// // +// Unless required by applicable law or agreed to in writing, software // +// distributed under the License is distributed on an "AS IS" BASIS, // +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // +// See the License for the specific language governing permissions and // +// limitations under the License. // +// ------------------------------------------------------------------------ // + +#include "SPTK/analysis/aperiodicity_extraction_by_world.h" + +#include // std::size_t + +#include "WORLD/world/d4c.h" + +namespace sptk { + +AperiodicityExtractionByWorld::AperiodicityExtractionByWorld( + int fft_length, int frame_shift, double sampling_rate) + : fft_length_(fft_length), + frame_shift_(frame_shift), + sampling_rate_(sampling_rate), + is_valid_(true) { + if (fft_length <= 0 || frame_shift_ <= 0 || + !sptk::IsInRange(sampling_rate_, 8000.0, 98000.0)) { + is_valid_ = false; + return; + } +} + +bool AperiodicityExtractionByWorld::Run( + const std::vector& waveform, const std::vector& f0, + std::vector >* aperiodicity) const { + // Check inputs. + if (!is_valid_ || waveform.empty() || f0.empty() || NULL == aperiodicity) { + return false; + } + + world::D4COption option; + world::InitializeD4COption(&option); + option.threshold = 0.0; + + const int f0_length(f0.size()); + const double frame_shift_in_sec(frame_shift_ / sampling_rate_); + std::vector time_axis(f0_length); + for (int i(0); i < f0_length; ++i) { + time_axis[i] = i * frame_shift_in_sec; + } + + // Prepare memories. + if (aperiodicity->size() != static_cast(f0_length)) { + aperiodicity->resize(f0_length); + } + const int length(fft_length_ / 2 + 1); + for (int i(0); i < f0_length; ++i) { + if ((*aperiodicity)[i].size() != static_cast(length)) { + (*aperiodicity)[i].resize(length); + } + } + + std::vector pointers; + pointers.reserve(f0_length); + for (std::vector& vector : *aperiodicity) { + pointers.push_back(vector.data()); + } + + world::D4C(waveform.data(), static_cast(waveform.size()), + static_cast(sampling_rate_), time_axis.data(), f0.data(), + f0_length, fft_length_, &option, pointers.data()); + + return true; +} + +} // namespace sptk diff --git a/src/analysis/pitch_extraction.cc b/src/analysis/pitch_extraction.cc index f0a1d2e..1e3fcf2 100644 --- a/src/analysis/pitch_extraction.cc +++ b/src/analysis/pitch_extraction.cc @@ -55,4 +55,14 @@ PitchExtraction::PitchExtraction(int frame_shift, double sampling_rate, } } +bool PitchExtraction::Run(const std::vector& waveform, + std::vector* f0, std::vector* epochs, + PitchExtractionInterface::Polarity* polarity) const { + if (!IsValid()) { + return false; + } + + return pitch_extraction_->Get(waveform, f0, epochs, polarity); +} + } // namespace sptk diff --git a/src/analysis/pitch_extraction_by_reaper.cc b/src/analysis/pitch_extraction_by_reaper.cc index c9e51e8..32c9076 100644 --- a/src/analysis/pitch_extraction_by_reaper.cc +++ b/src/analysis/pitch_extraction_by_reaper.cc @@ -55,10 +55,11 @@ bool PitchExtractionByReaper::Get( reaper::EpochTracker epoch_tracker; epoch_tracker.set_unvoiced_cost(static_cast(voicing_threshold_)); std::vector integer_waveform(waveform.begin(), waveform.end()); - if (!epoch_tracker.Init( - &(integer_waveform[0]), static_cast(waveform.size()), - static_cast(sampling_rate_), static_cast(lower_f0_), - static_cast(upper_f0_), true, false)) { + if (!epoch_tracker.Init(integer_waveform.data(), + static_cast(integer_waveform.size()), + static_cast(sampling_rate_), + static_cast(lower_f0_), + static_cast(upper_f0_), true, false)) { return false; } diff --git a/src/analysis/pitch_extraction_by_world.cc b/src/analysis/pitch_extraction_by_world.cc index 2b69ae4..fbff8d1 100644 --- a/src/analysis/pitch_extraction_by_world.cc +++ b/src/analysis/pitch_extraction_by_world.cc @@ -65,9 +65,9 @@ bool PitchExtractionByWorld::Get( frame_period)); std::vector time_axis(tmp_length); std::vector tmp_f0(tmp_length); - world::Dio(&(waveform[0]), static_cast(waveform.size()), - static_cast(sampling_rate_), &option, &(time_axis[0]), - &(tmp_f0[0])); + world::Dio(waveform.data(), static_cast(waveform.size()), + static_cast(sampling_rate_), &option, time_axis.data(), + tmp_f0.data()); const int target_length(static_cast( std::ceil(static_cast(waveform.size()) / frame_shift_))); diff --git a/src/main/ap.cc b/src/main/ap.cc new file mode 100644 index 0000000..345b140 --- /dev/null +++ b/src/main/ap.cc @@ -0,0 +1,420 @@ +// ------------------------------------------------------------------------ // +// Copyright 2021 SPTK Working Group // +// // +// Licensed under the Apache License, Version 2.0 (the "License"); // +// you may not use this file except in compliance with the License. // +// You may obtain a copy of the License at // +// // +// http://www.apache.org/licenses/LICENSE-2.0 // +// // +// Unless required by applicable law or agreed to in writing, software // +// distributed under the License is distributed on an "AS IS" BASIS, // +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // +// See the License for the specific language governing permissions and // +// limitations under the License. // +// ------------------------------------------------------------------------ // + +#include // std::max, std::min, std::transform +#include // std::exp +#include // std::ifstream +#include // std::setw +#include // std::cerr, std::cin, std::cout, std::endl, etc. +#include // std::ostringstream +#include // std::vector + +#include "Getopt/getoptwin.h" +#include "SPTK/analysis/aperiodicity_extraction.h" +#include "SPTK/utils/sptk_utils.h" + +namespace { + +enum InputFormats { kPitch = 0, kF0, kLogF0, kNumInputFormats }; + +enum OutputFormats { + kAperiodicity = 0, + kPeriodicity, + kAperiodicityOverPeriodicity, + kPeriodicityOverAperiodicity, + kNumOutputFormats +}; + +const sptk::AperiodicityExtraction::Algorithms kDefaultAlgorithm( + sptk::AperiodicityExtraction::Algorithms::kTandem); +const int kDefaultFftLength(256); +const int kDefaultFrameShift(80); +const double kDefaultSamplingRate(16.0); +const double kDefaultLowerBound(1e-3); +const double kDefaultUpperBound(1.0 - kDefaultLowerBound); +const InputFormats kDefaultInputFormat(kPitch); +const OutputFormats kDefaultOutputFormat(kAperiodicity); +const double kDefaultF0(150.0); + +void PrintUsage(std::ostream* stream) { + // clang-format off + *stream << std::endl; + *stream << " ap - aperiodicity extraction" << std::endl; + *stream << std::endl; + *stream << " usage:" << std::endl; + *stream << " ap [ options ] f0file [ infile ] > stdout" << std::endl; + *stream << " options:" << std::endl; + *stream << " -a a : algorithm used for ( int)[" << std::setw(5) << std::right << kDefaultAlgorithm << "][ 0 <= a <= 1 ]" << std::endl; // NOLINT + *stream << " aperiodicity estimation" << std::endl; + *stream << " 0 (TANDEM-STRAIGHT)" << std::endl; + *stream << " 1 (WORLD)" << std::endl; + *stream << " -l l : FFT length ( int)[" << std::setw(5) << std::right << kDefaultFftLength << "][ 1 <= l <= ]" << std::endl; // NOLINT + *stream << " -p p : frame shift [point] ( int)[" << std::setw(5) << std::right << kDefaultFrameShift << "][ 1 <= p <= ]" << std::endl; // NOLINT + *stream << " -s s : sampling rate [kHz] (double)[" << std::setw(5) << std::right << kDefaultSamplingRate << "][ 8.0 <= s <= 98.0 ]" << std::endl; // NOLINT + *stream << " -L L : lower bound of Ha (double)[" << std::setw(5) << std::right << kDefaultLowerBound << "][ 0.0 <= L < H ]" << std::endl; // NOLINT + *stream << " -H H : upper bound of Ha (double)[" << std::setw(5) << std::right << kDefaultUpperBound << "][ L < H <= 1.0 ]" << std::endl; // NOLINT + *stream << " -q q : f0 input format ( int)[" << std::setw(5) << std::right << kDefaultInputFormat << "][ 0 <= q <= 2 ]" << std::endl; // NOLINT + *stream << " 0 (Fs/F0)" << std::endl; + *stream << " 1 (F0)" << std::endl; + *stream << " 2 (log F0)" << std::endl; + *stream << " -o o : output format ( int)[" << std::setw(5) << std::right << kDefaultOutputFormat << "][ 0 <= o <= 3 ]" << std::endl; // NOLINT + *stream << " 0 (Ha)" << std::endl; + *stream << " 1 (Hp)" << std::endl; + *stream << " 2 (Ha/Hp)" << std::endl; + *stream << " 3 (Hp/Ha)" << std::endl; + *stream << " -h : print this message" << std::endl; + *stream << " infile:" << std::endl; + *stream << " waveform (double)[stdin]" << std::endl; // NOLINT + *stream << " f0file:" << std::endl; + *stream << " pitch (double)" << std::endl; + *stream << " stdout:" << std::endl; + *stream << " aperiodicity (double)" << std::endl; + *stream << " notice:" << std::endl; + *stream << " magic number representing unvoiced symbol is 0 (q = 0, 1) or -1e+10 (q = 2)" << std::endl; // NOLINT + *stream << std::endl; + *stream << " SPTK: version " << sptk::kVersion << std::endl; + *stream << std::endl; + // clang-format on +} + +} // namespace + +/** + * @a ap [ @e option ] @e f0file [ @e infile ] + * + * - @b -a @e int + * - algorithm used for aperiodicity extraction + * @arg @c 0 TANDEM-STRAIGHT + * @arg @c 1 WORLD (D4C) + * - @b -l @e int + * - FFT length + * - @b -p @e int + * - frame shift [point] @f$(1 \le P)@f$ + * - @b -s @e double + * - sampling rate [kHz] @f$(8 \le F_s \le 98)@f$ + * - @b -L @e double + * - lower bound of aperiodicity @f$(0 \le L < H)@f$ + * - @b -H @e double + * - upper bound of aperiodicity @f$(L < H \le 1)@f$ + * - @b -q @e int + * - f0 input format + * @arg @c 0 pitch @f$(F_s / F_0)@f$ + * @arg @c 1 F0 + * @arg @c 2 log F0 + * - @b -o @e int + * - output format + * @arg @c 0 Ha + * @arg @c 1 Hp + * @arg @c 2 Ha/Hp + * @arg @c 3 Hp/Ha + * - @b infile @e str + * - double-type waveform + * - @b f0file @e str + * - double-type pitch + * - @b stdout + * - double-type aperiodicity + * + * The below is a simple example to extract aperiodicity from @c data.d + * + * @code{.sh} + * pitch -s 16 -p 80 -L 80 -H 200 -o 0 < data.d > data.f0 + * ap -s 16 -p 80 -q 0 data.f0 < data.d > data.ap + * @endcode + * + * @param[in] argc Number of arguments. + * @param[in] argv Argument vector. + * @return 0 on success, 1 on failure. + */ +int main(int argc, char* argv[]) { + sptk::AperiodicityExtraction::Algorithms algorithm(kDefaultAlgorithm); + int fft_length(kDefaultFftLength); + int frame_shift(kDefaultFrameShift); + double sampling_rate(kDefaultSamplingRate); + double lower_bound(kDefaultLowerBound); + double upper_bound(kDefaultUpperBound); + InputFormats input_format(kDefaultInputFormat); + OutputFormats output_format(kDefaultOutputFormat); + + for (;;) { + const int option_char( + getopt_long_only(argc, argv, "a:l:p:s:L:H:q:o:h", NULL, NULL)); + if (-1 == option_char) break; + + switch (option_char) { + case 'a': { + const int min(0); + const int max( + static_cast( + sptk::AperiodicityExtraction::Algorithms::kNumAlgorithms) - + 1); + int tmp; + if (!sptk::ConvertStringToInteger(optarg, &tmp) || + !sptk::IsInRange(tmp, min, max)) { + std::ostringstream error_message; + error_message << "The argument for the -a option must be an integer " + << "in the range of " << min << " to " << max; + sptk::PrintErrorMessage("ap", error_message); + return 1; + } + algorithm = static_cast(tmp); + break; + } + case 'l': { + if (!sptk::ConvertStringToInteger(optarg, &fft_length) || + fft_length <= 0) { + std::ostringstream error_message; + error_message + << "The argument for the -l option must be a positive integer"; + sptk::PrintErrorMessage("ap", error_message); + return 1; + } + break; + } + case 'p': { + if (!sptk::ConvertStringToInteger(optarg, &frame_shift) || + frame_shift <= 0) { + std::ostringstream error_message; + error_message + << "The argument for the -p option must be a positive integer"; + sptk::PrintErrorMessage("ap", error_message); + return 1; + } + break; + } + case 's': { + const double min(8.0); + const double max(98.0); + if (!sptk::ConvertStringToDouble(optarg, &sampling_rate) || + !sptk::IsInRange(sampling_rate, min, max)) { + std::ostringstream error_message; + error_message << "The argument for the -s option must be in a number " + << "in the interval [" << min << ", " << max << "]"; + sptk::PrintErrorMessage("ap", error_message); + return 1; + } + break; + } + case 'L': { + if (!sptk::ConvertStringToDouble(optarg, &lower_bound) || + !sptk::IsInRange(lower_bound, 0.0, 1.0)) { + std::ostringstream error_message; + error_message + << "The argument for the -L option must be in [0.0, 1.0]"; + sptk::PrintErrorMessage("ap", error_message); + return 1; + } + break; + } + case 'H': { + if (!sptk::ConvertStringToDouble(optarg, &upper_bound) || + !sptk::IsInRange(upper_bound, 0.0, 1.0)) { + std::ostringstream error_message; + error_message + << "The argument for the -H option must be in [0.0, 1.0]"; + sptk::PrintErrorMessage("ap", error_message); + return 1; + } + break; + } + case 'q': { + const int min(0); + const int max(static_cast(kNumInputFormats) - 1); + int tmp; + if (!sptk::ConvertStringToInteger(optarg, &tmp) || + !sptk::IsInRange(tmp, min, max)) { + std::ostringstream error_message; + error_message << "The argument for the -q option must be an integer " + << "in the range of " << min << " to " << max; + sptk::PrintErrorMessage("ap", error_message); + return 1; + } + input_format = static_cast(tmp); + break; + } + case 'o': { + const int min(0); + const int max(static_cast(kNumOutputFormats) - 1); + int tmp; + if (!sptk::ConvertStringToInteger(optarg, &tmp) || + !sptk::IsInRange(tmp, min, max)) { + std::ostringstream error_message; + error_message << "The argument for the -o option must be an integer " + << "in the range of " << min << " to " << max; + sptk::PrintErrorMessage("ap", error_message); + return 1; + } + output_format = static_cast(tmp); + break; + } + case 'h': { + PrintUsage(&std::cout); + return 0; + } + default: { + PrintUsage(&std::cerr); + return 1; + } + } + } + + if (upper_bound <= lower_bound) { + std::ostringstream error_message; + error_message << "Lower bound must be less than upper one"; + sptk::PrintErrorMessage("ap", error_message); + return 1; + } + + const char* f0_file; + const char* raw_file; + const int num_input_files(argc - optind); + if (2 == num_input_files) { + f0_file = argv[argc - 2]; + raw_file = argv[argc - 1]; + } else if (1 == num_input_files) { + f0_file = argv[argc - 1]; + raw_file = NULL; + } else { + std::ostringstream error_message; + error_message << "Just two input files, f0file and infile, are required"; + sptk::PrintErrorMessage("ap", error_message); + return 1; + } + + const double sampling_rate_in_hz(1000.0 * sampling_rate); + + std::vector f0; + { + std::ifstream ifs; + ifs.open(f0_file, std::ios::in | std::ios::binary); + if (ifs.fail()) { + std::ostringstream error_message; + error_message << "Cannot open file " << f0_file; + sptk::PrintErrorMessage("ap", error_message); + return 1; + } + std::istream& input_stream(ifs); + + double tmp; + while (sptk::ReadStream(&tmp, &input_stream)) { + f0.push_back(tmp); + } + if (f0.empty()) return 0; + + switch (input_format) { + case kPitch: { + std::transform( + f0.begin(), f0.end(), f0.begin(), [sampling_rate_in_hz](double x) { + return (0.0 == x) ? kDefaultF0 : sampling_rate_in_hz / x; + }); + break; + } + case kF0: { + std::transform(f0.begin(), f0.end(), f0.begin(), + [](double x) { return (0.0 == x) ? kDefaultF0 : x; }); + break; + } + case kLogF0: { + std::transform(f0.begin(), f0.end(), f0.begin(), [](double x) { + return (sptk::kLogZero == x) ? kDefaultF0 : std::exp(x); + }); + break; + } + default: { + break; + } + } + } + + std::vector waveform; + { + std::ifstream ifs; + ifs.open(raw_file, std::ios::in | std::ios::binary); + if (ifs.fail() && NULL != raw_file) { + std::ostringstream error_message; + error_message << "Cannot open file " << raw_file; + sptk::PrintErrorMessage("ap", error_message); + return 1; + } + std::istream& input_stream(ifs.fail() ? std::cin : ifs); + + double tmp; + while (sptk::ReadStream(&tmp, &input_stream)) { + waveform.push_back(tmp); + } + } + if (waveform.empty()) return 0; + + sptk::AperiodicityExtraction aperiodicity_extraction( + fft_length, frame_shift, sampling_rate_in_hz, algorithm); + if (!aperiodicity_extraction.IsValid()) { + std::ostringstream error_message; + error_message << "Failed to initialize AperiodicityExtraction"; + sptk::PrintErrorMessage("ap", error_message); + return 1; + } + + std::vector > aperiodicity; + if (!aperiodicity_extraction.Run(waveform, f0, &aperiodicity)) { + std::ostringstream error_message; + error_message << "Failed to extract aperiodicity"; + sptk::PrintErrorMessage("ap", error_message); + return 1; + } + + const int output_length(fft_length / 2 + 1); + std::vector output(output_length); + + for (const std::vector& tmp : aperiodicity) { + std::transform(tmp.begin(), tmp.end(), output.begin(), + [lower_bound, upper_bound](double a) { + return std::min(upper_bound, std::max(lower_bound, a)); + }); + + switch (output_format) { + case kAperiodicity: { + // nothing to do + break; + } + case kPeriodicity: { + std::transform(output.begin(), output.end(), output.begin(), + [](double a) { return 1.0 - a; }); + break; + } + case kAperiodicityOverPeriodicity: { + std::transform(output.begin(), output.end(), output.begin(), + [](double a) { return a / (1.0 - a); }); + break; + } + case kPeriodicityOverAperiodicity: { + std::transform(output.begin(), output.end(), output.begin(), + [](double a) { return (1.0 - a) / a; }); + break; + } + default: { + break; + } + } + + if (!sptk::WriteStream(0, output_length, output, &std::cout, NULL)) { + std::ostringstream error_message; + error_message << "Failed to write aperiodicity"; + sptk::PrintErrorMessage("ap", error_message); + return 1; + } + } + + return 0; +} diff --git a/src/main/pitch.cc b/src/main/pitch.cc index ae0fa51..e89152a 100644 --- a/src/main/pitch.cc +++ b/src/main/pitch.cc @@ -74,7 +74,7 @@ void PrintUsage(std::ostream* stream) { *stream << " -t2 t : voicing threshold for REAPER (double)[" << std::setw(5) << std::right << kDefaultVoicingThresholdForReaper << "][ -0.5 <= t <= 1.6 ]" << std::endl; // NOLINT *stream << " -t3 t : voicing threshold for WORLD (double)[" << std::setw(5) << std::right << kDefaultVoicingThresholdForWorld << "][ 0.02 <= t <= 0.2 ]" << std::endl; // NOLINT *stream << " -o o : output format ( int)[" << std::setw(5) << std::right << kDefaultOutputFormat << "][ 0 <= o <= 2 ]" << std::endl; // NOLINT - *stream << " 0 (1/F0)" << std::endl; + *stream << " 0 (Fs/F0)" << std::endl; *stream << " 1 (F0)" << std::endl; *stream << " 2 (log F0)" << std::endl; *stream << " -h : print this message" << std::endl; @@ -188,7 +188,7 @@ int main(int argc, char* argv[]) { } case 'p': { if (!sptk::ConvertStringToInteger(optarg, &frame_shift) || - frame_shift <= 0.0) { + frame_shift <= 0) { std::ostringstream error_message; error_message << "The argument for the -p option must be a positive integer"; diff --git a/test/test_ap.bats b/test/test_ap.bats new file mode 100755 index 0000000..8c4b3fd --- /dev/null +++ b/test/test_ap.bats @@ -0,0 +1,38 @@ +#!/usr/bin/env bats +# ------------------------------------------------------------------------ # +# Copyright 2021 SPTK Working Group # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); # +# you may not use this file except in compliance with the License. # +# You may obtain a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +# ------------------------------------------------------------------------ # + +sptk3=tools/sptk/bin +sptk4=bin +tmp=test_ap +data=asset/data.short + +setup() { + mkdir -p $tmp +} + +teardown() { + rm -rf $tmp +} + +@test "ap: valgrind" { + $sptk3/x2x +sd $data > $tmp/0 + $sptk4/pitch $tmp/0 > $tmp/1 + for a in $(seq 0 1); do + run valgrind $sptk4/ap -a "$a" -l 12 $tmp/1 $tmp/0 + [ "$(echo "${lines[-1]}" | sed -r 's/.*SUMMARY: ([0-9]*) .*/\1/')" -eq 0 ] + done +} diff --git a/third_party/WORLD/LICENSE b/third_party/WORLD/LICENSE index 598b5a8..1c12e76 100644 --- a/third_party/WORLD/LICENSE +++ b/third_party/WORLD/LICENSE @@ -1,11 +1,11 @@ /* ----------------------------------------------------------------- */ /* WORLD: High-quality speech analysis, */ -/* modification and synthesis system */ +/* manipulation and synthesis system */ /* developed by M. Morise */ -/* http://ml.cs.yamanashi.ac.jp/world/ */ +/* http://www.kisc.meiji.ac.jp/~mmorise/world/english/ */ /* ----------------------------------------------------------------- */ /* */ -/* Copyright (c) 2010-2016 M. Morise */ +/* Copyright (c) 2010 M. Morise */ /* */ /* All rights reserved. */ /* */ diff --git a/third_party/WORLD/aperiodicity.cc b/third_party/WORLD/aperiodicity.cc new file mode 100644 index 0000000..9144104 --- /dev/null +++ b/third_party/WORLD/aperiodicity.cc @@ -0,0 +1,691 @@ +//----------------------------------------------------------------------------- +// Copyright 2012-2015 Masanori Morise. All Rights Reserved. +// Author: mmorise [at] yamanashi.ac.jp (Masanori Morise) +// +// Aperiodicity based on TANDEM-STRAIGHT. +// This function would be changed in near future. +//----------------------------------------------------------------------------- +#if 0 +#include "./aperiodicity.h" +#else +#include "world/aperiodicity.h" +#endif + +#include +#include + +#if 0 +#include "./constantnumbers.h" +#include "./matlabfunctions.h" +#else +#include "world/constantnumbers.h" +#include "world/matlabfunctions.h" +#endif + +#if 1 +namespace sptk { +namespace world { +#endif + +const double kNormalCutoff = 600.0; + +// Names of these variables are copied by the source code of Matlab. +// The developer does not know the meaning of these names. +typedef struct { + int segmentLength; + int nMargin; + double **w; + double *wsqrt; + double **H; + double **Hw; + double **R; + double **invR; + + double *Hwx; + double *a; + double *Ha; + double *wxHa; + double *wx; +} InternalParameters; + +namespace { + +//----------------------------------------------------------------------------- +// SetInternalParameters() allocates the memory to the struct. +//----------------------------------------------------------------------------- +void SetInternalParameters(int segment_length, int n_margin, + InternalParameters *internal_parameters) { + internal_parameters->segmentLength = segment_length; + internal_parameters->nMargin = n_margin; + internal_parameters->w = new double *[segment_length]; + for (int i = 0; i < segment_length; ++i) + internal_parameters->w[i] = new double[segment_length]; + internal_parameters->wsqrt = new double[segment_length]; + internal_parameters->H = new double *[segment_length]; + for (int i = 0; i < segment_length; ++i) + internal_parameters->H[i] = new double[n_margin * 2]; + internal_parameters->Hw = new double *[n_margin * 2]; + for (int i = 0; i < n_margin * 2; ++i) + internal_parameters->Hw[i] = new double[segment_length]; + internal_parameters->R = new double *[n_margin * 2]; + for (int i = 0; i < n_margin * 2; ++i) + internal_parameters->R[i] = new double[n_margin * 2]; + internal_parameters->invR = new double *[n_margin * 2]; + for (int i = 0; i < n_margin * 2; ++i) + internal_parameters->invR[i] = new double[n_margin * 2]; + internal_parameters->Hwx = new double[n_margin * 2]; + internal_parameters->a = new double[n_margin * 2]; + internal_parameters->Ha = new double[segment_length]; + internal_parameters->wxHa = new double[segment_length]; + internal_parameters->wx = new double[segment_length]; +} + +//----------------------------------------------------------------------------- +// SetInternalParameters() frees the memory of the struct. +//----------------------------------------------------------------------------- +void DestroyInternalParameters(InternalParameters* internal_parameters) { + delete[] internal_parameters->wsqrt; + delete[] internal_parameters->wx; + delete[] internal_parameters->wxHa; + delete[] internal_parameters->Ha; + delete[] internal_parameters->a; + delete[] internal_parameters->Hwx; + for (int i = 0; i < internal_parameters->nMargin * 2; ++i) + delete[] internal_parameters->invR[i]; + delete[] internal_parameters->invR; + for (int i = 0; i < internal_parameters->nMargin * 2; ++i) + delete[] internal_parameters->R[i]; + delete[] internal_parameters->R; + for (int i = 0; i < internal_parameters->nMargin * 2; ++i) + delete[] internal_parameters->Hw[i]; + delete[] internal_parameters->Hw; + for (int i = 0; i < internal_parameters->segmentLength; ++i) + delete[] internal_parameters->H[i]; + delete[] internal_parameters->H; + for (int i = 0; i < internal_parameters->segmentLength; ++i) + delete[] internal_parameters->w[i]; + delete[] internal_parameters->w; +} + +//----------------------------------------------------------------------------- +// Get*() calculates each parameter. The names do not follow the style guide. +// These names are refered by an article that proposes the method. +// To avoid the confusion, we employed the original names. +//----------------------------------------------------------------------------- +void GetH(double *x, int x_length, int segment_length, int index_bias, + int current_position_in_sample, int t0_in_samples, double **H) { + int index; + for (int i = -1; i < 2; ++i) { + for (int j = 0; j < segment_length; ++j) { +#if 0 + index = MyMax(0, MyMin(x_length - 1, +#else + index = MyMaxInt(0, MyMinInt(x_length - 1, +#endif + i + current_position_in_sample-index_bias - t0_in_samples + j)); + H[j][i + 1] = x[index]; +#if 0 + index = MyMax(0, MyMin(x_length - 1, +#else + index = MyMaxInt(0, MyMinInt(x_length - 1, +#endif + i + current_position_in_sample - index_bias + t0_in_samples + j)); + H[j][i + 4] = x[index]; + } + } +} + +void GetHw(double **H, int segment_length, int n_margin2, double **w, + double **Hw) { + double tmp; + for (int i = 0; i < n_margin2; ++i) { + for (int j = 0; j < segment_length; ++j) { + tmp = 0.0; + for (int k = 0; k < segment_length; ++k) tmp += H[k][i] * w[k][j]; + Hw[i][j] = tmp; + } + } +} + +void GetR(double **Hw, int n_margin2, int segment_length, double **H, + double **R) { + double tmp; + for (int i = 0; i < n_margin2; ++i) { + for (int j = 0; j < n_margin2; ++j) { + tmp = 0.0; + for (int k = 0; k < segment_length; ++k) tmp += Hw[i][k] * H[k][j]; + R[i][j] = tmp; + } + } +} + +void GetHwx(double **Hw, int n_margin2, int segment_length, double *x, + int x_length, int origin, double *Hwx) { + double tmp; + for (int i = 0; i < n_margin2; ++i) { + tmp = 0.0; + for (int j = 0; j < segment_length; ++j) +#if 0 + tmp += Hw[i][j]*x[MyMax(0, MyMin(x_length - 1, origin + j))]; +#else + tmp += Hw[i][j]*x[MyMaxInt(0, MyMinInt(x_length - 1, origin + j))]; +#endif + Hwx[i] = tmp; + } +} + +void Geta(double **invR, int n_margin2, double *Hwx, double *a) { + double tmp; + for (int i = 0; i < n_margin2; ++i) { + tmp = 0.0; + for (int j = 0; j < n_margin2; ++j) tmp += invR[i][j]*Hwx[j]; + a[i] = tmp; + } +} + +void GetHa(double **H, int segment_length, int n_margin2, double *a, + double *Ha) { + double tmp; + for (int i = 0; i < segment_length; ++i) { + tmp = 0.0; + for (int j = 0; j < n_margin2; ++j) tmp += H[i][j]*a[j]; + Ha[i] = tmp; + } +} + +void GetW(int segment_length, double **w) { + for (int i = 0; i < segment_length; ++i) + for (int j = 0; j < segment_length; ++j) w[i][j] = 0.0; + + for (int i = 0; i < (segment_length - 1) / 2; ++i) { + w[i][i] = 0.5 - 0.5 * cos((i + 1.0) / + (segment_length + 1.0) * 2.0 * world::kPi); + w[segment_length - i - 1][segment_length - i - 1] = w[i][i]; + } + w[(segment_length - 1) / 2][(segment_length - 1) / 2] = 1.0; +} + +double GetStdwxHa(double *wsqrt, int segment_length, double *x, int x_length, + int origin, double *Ha, double *wxHa) { + for (int i = 0; i < segment_length; ++i) + wxHa[i] = wsqrt[i] * +#if 0 + (x[MyMax(0, MyMin(x_length - 1, origin + i))] - Ha[i]); +#else + (x[MyMaxInt(0, MyMinInt(x_length - 1, origin + i))] - Ha[i]); +#endif + return matlab_std(wxHa, segment_length); +} + +double GetStdwx(double *wsqrt, int segment_length, double *x, int x_length, + int origin, double *wx) { + for (int i = 0; i < segment_length; ++i) +#if 0 + wx[i] = wsqrt[i] * x[MyMax(0, MyMin(x_length - 1, origin + i))]; +#else + wx[i] = wsqrt[i] * x[MyMaxInt(0, MyMinInt(x_length - 1, origin + i))]; +#endif + return matlab_std(wx, segment_length); +} + +//----------------------------------------------------------------------------- +// f0PredictionResidualFixSegmentW() estimates the aperiodicity in a frequency +// band. +//----------------------------------------------------------------------------- +void f0PredictionResidualFixSegmentW(double *x, int x_length, double fs, + double *f0, double *temporalPositions, int f0_length, double initial_time, + int duration_ms, int current_band, double **aperiodicity) { + const int kNMargin = 3; + int segment_length = matlab_round(fs * duration_ms / 2000.0) * 2 + 1; + + InternalParameters internal_parameters = {0}; + SetInternalParameters(segment_length, kNMargin, &internal_parameters); + + GetW(segment_length, internal_parameters.w); + for (int i = 0; i < segment_length; ++i) + internal_parameters.wsqrt[i] = sqrt(internal_parameters.w[i][i]); + + int t0_in_samples; + int index_bias; + int current_position_in_sample; + int origin; + for (int i = 0; i < f0_length; ++i) { + if (f0[i] != 0.0) { + t0_in_samples = matlab_round(fs / f0[i]); + index_bias = matlab_round(fs / f0[i] / 2.0); + current_position_in_sample = + matlab_round(-initial_time + temporalPositions[i] * fs) + 1; + origin = current_position_in_sample - index_bias; + GetH(x, x_length, segment_length, index_bias, + current_position_in_sample, t0_in_samples, internal_parameters.H); + GetHw(internal_parameters.H, segment_length, kNMargin * 2, + internal_parameters.w, internal_parameters.Hw); + GetR(internal_parameters.Hw, kNMargin * 2, segment_length, + internal_parameters.H, internal_parameters.R); + GetHwx(internal_parameters.Hw, kNMargin * 2, segment_length, + x, x_length, origin, internal_parameters.Hwx); + inv(internal_parameters.R, kNMargin * 2, internal_parameters.invR); + Geta(internal_parameters.invR, kNMargin * 2, internal_parameters.Hwx, + internal_parameters.a); + GetHa(internal_parameters.H, segment_length, kNMargin * 2, + internal_parameters.a, internal_parameters.Ha); + aperiodicity[i][current_band] = GetStdwxHa(internal_parameters.wsqrt, + segment_length, x, x_length, origin, internal_parameters.Ha, + internal_parameters.wxHa) / GetStdwx(internal_parameters.wsqrt, + segment_length, x, x_length, origin, internal_parameters.wx); + } else { // Aperiodicity does not use if the speech is unvoiced. + aperiodicity[i][current_band] = 0.9999999995; // safe guard + } + } + DestroyInternalParameters(&internal_parameters); +} + +//----------------------------------------------------------------------------- +// GetQMFpairOfFilters() sets the coefficients of QM filter (hHP:41, hLP:37) +// Although this function requires fs as the input, the result does not depend +// on it. +//----------------------------------------------------------------------------- +void GetQMFpairOfFilters(int fs, double *hHP, double *hLP) { + hHP[0] = 0.00041447996898231424; + hHP[1] = 0.00078125051417292477; + hHP[2] = -0.0010917236836275842; + hHP[3] = -0.0019867925675967589; + hHP[4] = 0.0020903896961562292; + hHP[5] = 0.0040940570272849346; + hHP[6] = -0.0034025808529816698; + hHP[7] = -0.0074961541272056016; + hHP[8] = 0.0049722633399330637; + hHP[9] = 0.012738791249119802; + hHP[10] = -0.0066960326895749113; + hHP[11] = -0.020694051570247052; + hHP[12] = 0.0084324365650413451; + hHP[13] = 0.033074383758700532; + hHP[14] = -0.010018936738799522; + hHP[15] = -0.054231361405808247; + hHP[16] = 0.011293988915051487; + hHP[17] = 0.10020081367388213; + hHP[18] = -0.012120546202484579; + hHP[19] = -0.31630021039095702; + hHP[20] = 0.51240682580627639; + hHP[21] = -0.31630021039095702; + hHP[22] = -0.012120546202484579; + hHP[23] = 0.10020081367388213; + hHP[24] = 0.011293988915051487; + hHP[25] = -0.054231361405808247; + hHP[26] = -0.010018936738799522; + hHP[27] = 0.033074383758700532; + hHP[28] = 0.0084324365650413451; + hHP[29] = -0.020694051570247052; + hHP[30] = -0.0066960326895749113; + hHP[31] = 0.012738791249119802; + hHP[32] = 0.0049722633399330637; + hHP[33] = -0.0074961541272056016; + hHP[34] = -0.0034025808529816698; + hHP[35] = 0.0040940570272849346; + hHP[36] = 0.0020903896961562292; + hHP[37] = -0.0019867925675967589; + hHP[38] = -0.0010917236836275842; + hHP[39] = 0.00078125051417292477; + hHP[40] = 0.00041447996898231424; + + hLP[0] = -0.00065488170077483048; + hLP[1] = 0.00007561994958159384; + hLP[2] = 0.0020408456937895227; + hLP[3] = -0.00074680535322030437; + hLP[4] = -0.0043502235688264931; + hLP[5] = 0.0025966428382642732; + hLP[6] = 0.0076396022827566962; + hLP[7] = -0.0064904118901497852; + hLP[8] = -0.011765804538954506; + hLP[9] = 0.013649908479276255; + hLP[10] = 0.01636866479016021; + hLP[11] = -0.026075976030529347; + hLP[12] = -0.020910294856659444; + hLP[13] = 0.048260725032316647; + hLP[14] = 0.024767846611048111; + hLP[15] = -0.096178467583360641; + hLP[16] = -0.027359756709866623; + hLP[17] = 0.31488052161630042; + hLP[18] = 0.52827343594055032; + hLP[19] = 0.31488052161630042; + hLP[20] = -0.027359756709866623; + hLP[21] = -0.096178467583360641; + hLP[22] = 0.024767846611048111; + hLP[23] = 0.048260725032316647; + hLP[24] = -0.020910294856659444; + hLP[25] = -0.026075976030529347; + hLP[26] = 0.01636866479016021; + hLP[27] = 0.013649908479276255; + hLP[28] = -0.011765804538954506; + hLP[29] = -0.0064904118901497852; + hLP[30] = 0.0076396022827566962; + hLP[31] = 0.0025966428382642732; + hLP[32] = -0.0043502235688264931; + hLP[33] = -0.00074680535322030437; + hLP[34] = 0.0020408456937895227; + hLP[35] = 0.00007561994958159384; + hLP[36] = -0.00065488170077483048; +} + +//----------------------------------------------------------------------------- +// GetSignalsForAperiodicity() calculates the signals used to calculate the +// aperiodicity. low_signal, high_signal and downsampled_high_signal are +// calculated in this function. +//----------------------------------------------------------------------------- +void GetSignalsForAperiodicity(int fft_size, double *whole_signal, + int filtered_signal_length, double *hHP, double *hLP, + double *low_signal, double *high_signal, double *downsampled_high_signal) { + ForwardRealFFT forward_real_fft = {0}; + InverseRealFFT inverse_real_fft = {0}; + InitializeForwardRealFFT(fft_size, &forward_real_fft); + InitializeInverseRealFFT(fft_size, &inverse_real_fft); + fast_fftfilt(whole_signal, filtered_signal_length, hHP, 41, + fft_size, &forward_real_fft, &inverse_real_fft, high_signal); + fast_fftfilt(whole_signal, filtered_signal_length, hLP, 37, + fft_size, &forward_real_fft, &inverse_real_fft, low_signal); + DestroyForwardRealFFT(&forward_real_fft); + DestroyInverseRealFFT(&inverse_real_fft); + + // Maintain the amplitude + for (int i = 0; i < filtered_signal_length; ++i) { + low_signal[i] *= fft_size; + high_signal[i] *= fft_size; + } + for (int j = 0; j < filtered_signal_length; j += 2) + downsampled_high_signal[j / 2] = high_signal[j]; +} + +//----------------------------------------------------------------------------- +// UpdateWholeSignal() updates the whole_signal. +//----------------------------------------------------------------------------- +inline int UpdateWholeSignal(int filtered_signal_length, int fft_size, + double *low_signal, double *whole_signal) { + for (int i = 0; i < filtered_signal_length; i += 2) + whole_signal[i / 2] = low_signal[i]; + for (int i = matlab_round(filtered_signal_length / 2.0); i < fft_size; i++) + whole_signal[i] = 0.0; + return matlab_round(filtered_signal_length / 2.0) + 82; +} + +//----------------------------------------------------------------------------- +// BandwiseAperiodicity() calculates the aperiodicity in each frequency band. +//----------------------------------------------------------------------------- +void BandwiseAperiodicity(double *x, int x_length, int fs, double *f0, + int f0_length, double *stretched_locations, + int window_length_ms, double **aperiodicity) { + double hHP[41], hLP[37]; + GetQMFpairOfFilters(fs, hHP, hLP); + + int number_of_bands = + static_cast(log(fs / kNormalCutoff) / world::kLog2); + double *cutoff_list = new double[number_of_bands]; + + for (int i = 0; i < number_of_bands; ++i) + cutoff_list[i] = fs / pow(2.0, i + 2.0); + + // 82 = 41 (length of hHP) * 2 + int fft_size = GetSuitableFFTSize(x_length + 82); + + double *whole_signal = new double[fft_size]; + double *high_signal = new double[fft_size]; + double *low_signal = new double[fft_size]; + double *downsampled_high_signal = new double[fft_size]; + + int filtered_signal_length = x_length + 82; + + for (int i = 0; i < x_length; ++i) whole_signal[i] = x[i]; + for (int i = x_length; i < fft_size; ++i) whole_signal[i] = 0.0; + + double tmp_fs = 0.0; + for (int i = 0; i < number_of_bands - 1; ++i) { + tmp_fs = cutoff_list[i] * 2.0; + GetSignalsForAperiodicity(fft_size, whole_signal, filtered_signal_length, + hHP, hLP, low_signal, high_signal, downsampled_high_signal); + + f0PredictionResidualFixSegmentW(downsampled_high_signal, + matlab_round(filtered_signal_length / 2.0), tmp_fs, f0, + stretched_locations, f0_length, 41.0 / 2.0 / tmp_fs, + window_length_ms, number_of_bands - i - 1, aperiodicity); + // subband separation + filtered_signal_length = UpdateWholeSignal(filtered_signal_length, + fft_size, low_signal, whole_signal); + // update the fft size + fft_size = GetSuitableFFTSize(filtered_signal_length); + } + + filtered_signal_length = (filtered_signal_length - 82) * 2; + f0PredictionResidualFixSegmentW(whole_signal, + matlab_round(filtered_signal_length / 2.0), tmp_fs, f0, + stretched_locations, f0_length, 41.0 / 2.0 / tmp_fs, + window_length_ms, 0, aperiodicity); + + delete[] downsampled_high_signal; + delete[] low_signal; + delete[] high_signal; + delete[] whole_signal; + delete[] cutoff_list; +} + +//----------------------------------------------------------------------------- +// GetInterpolatedSignal() carries out the up sampling (target is 4 * fs) +//----------------------------------------------------------------------------- +void GetInterpolatedSignal(double *x, int x_length, double *interpolated_x) { + interpolated_x[0] = x[0] * 0.14644660940672621; + interpolated_x[1] = x[0] * 0.49999999999999994; + interpolated_x[2] = x[0] * 0.85355339059327373; + for (int i = 0; i < x_length - 1; ++i) { + interpolated_x[i * 4 + 3] = x[i]; + interpolated_x[i * 4 + 4] = x[i] * 0.85355339059327373 + + x[i + 1] * 0.14644660940672621; + interpolated_x[i * 4 + 5] = x[i] * 0.49999999999999994 + + x[i + 1] * 0.49999999999999994; + interpolated_x[i * 4 + 6] = x[i] * 0.14644660940672621 + + x[i + 1] * 0.85355339059327373; + } + interpolated_x[(x_length - 1) * 4 + 3] = x[x_length - 1]; + interpolated_x[(x_length - 1) * 4 + 4] = + x[x_length - 1] * 0.85355339059327373; + interpolated_x[(x_length - 1) * 4 + 5] = + x[x_length - 1] * 0.49999999999999994; + interpolated_x[(x_length - 1) * 4 + 6] = + x[x_length - 1] * 0.14644660940672621; + interpolated_x[(x_length - 1) * 4 + 7] = + interpolated_x[(x_length - 1) * 4 + 8] = + interpolated_x[(x_length - 1) * 4 + 9] = 0.0; + return; +} + +//----------------------------------------------------------------------------- +// GetNormalizedSignal() calculates the signal that the f0 contour is constant. +//----------------------------------------------------------------------------- +int GetNormalizedSignal(double *x, int x_length, int fs, double *f0, + int f0_length, double frame_period, double target_f0, + double **stretched_signal, double **stretched_locations) { + int ix_length = x_length * 4 + 6; + + double *interpolated_x = new double[ix_length]; + GetInterpolatedSignal(x, x_length, interpolated_x); + + double *original_signal_time_axis = new double[ix_length]; + + for (int i = 0; i < ix_length; ++i) + original_signal_time_axis[i] = i / (fs * 4.0); + + double *base_f0 = new double[f0_length + 1]; + double *base_time_axis = new double[f0_length + 1]; + + for (int i = 0; i < f0_length; ++i) { + base_f0[i] = f0[i] == 0.0 ? target_f0 : f0[i]; + base_time_axis[i] = i * frame_period; + } + base_f0[f0_length] = base_f0[f0_length - 1] * 2 - base_f0[f0_length - 2]; + base_time_axis[f0_length] = f0_length * frame_period; + + double *interpolated_f0 = new double[ix_length]; + double *stretched_time_axis = new double[ix_length]; + interp1(base_time_axis, base_f0, f0_length + 1, + original_signal_time_axis, ix_length, interpolated_f0); + + double tmp = target_f0 * fs * 4.0; + stretched_time_axis[0] = interpolated_f0[0] / tmp; + for (int i = 1; i < ix_length; ++i) + stretched_time_axis[i] = stretched_time_axis[i - 1] + + (interpolated_f0[i] / tmp); + + int stretched_signal_length = + static_cast(stretched_time_axis[ix_length - 1] * fs * 4.0) + 1; + + double *tmp_time_axis = new double[stretched_signal_length]; + double *stretched_signal4 = new double[stretched_signal_length]; + + for (int i = 0; i < stretched_signal_length; ++i) + tmp_time_axis[i] = i / (fs * 4.0); + interp1(stretched_time_axis, interpolated_x, ix_length, + tmp_time_axis, stretched_signal_length, stretched_signal4); + + *stretched_locations = new double[f0_length]; + interp1(original_signal_time_axis, stretched_time_axis, ix_length, + base_time_axis, f0_length, *stretched_locations); + + // 17 is a safe guard. + *stretched_signal = new double[stretched_signal_length / 4 + 17]; + decimate(stretched_signal4, stretched_signal_length, 4, *stretched_signal); + + delete[] stretched_signal4; + delete[] tmp_time_axis; + delete[] stretched_time_axis; + delete[] base_f0; + delete[] base_time_axis; + delete[] interpolated_f0; + delete[] original_signal_time_axis; + delete[] interpolated_x; + + return 1 + stretched_signal_length / 4; +} + +//----------------------------------------------------------------------------- +// CalculateAperiodicity() transforms the input aperiodicity in each band +// into the aperiodicity spectrum whose length is fft_size / 2 + 1. +//----------------------------------------------------------------------------- +void CalculateAperiodicity(double *coarse_aperiodicity, int number_of_bands, + int fft_size, double stretching_factor, int fs, double *aperiodicity) { + if (stretching_factor == 0) { + for (int i = 0; i <= fft_size / 2; ++i) aperiodicity[i] = 0.0; + return; + } + // 0 Hz and fs / 2 Hz are added for interporation + double *coarse_aperiodicity_expand = new double[number_of_bands + 1]; + double *coarse_axis = new double[number_of_bands + 1]; + double *frequency_axis = new double[fft_size / 2 + 1]; + +#if 0 + const double kMySafeGuardLogMinimum = -21.416413017506358; +#else + const double kMySafeGuardLogMinimum = log(coarse_aperiodicity[0]); +#endif + + coarse_aperiodicity_expand[0] = kMySafeGuardLogMinimum; + coarse_axis[0] = 0.0; + for (int i = 0; i < number_of_bands; ++i) { + coarse_aperiodicity_expand[i + 1] = log(coarse_aperiodicity[i]); + coarse_axis[i + 1] = fs / pow(2.0, number_of_bands - i) * + stretching_factor; + } + + for (int i = 0; i <= fft_size / 2; ++i) + frequency_axis[i] = static_cast(i * fs) / fft_size; + + interp1(coarse_axis, coarse_aperiodicity_expand, number_of_bands + 1, + frequency_axis, fft_size / 2 + 1, aperiodicity); + + for (int i = 0; i <= fft_size / 2; ++i) + aperiodicity[i] = exp(aperiodicity[i]); + + delete[] frequency_axis; + delete[] coarse_axis; + delete[] coarse_aperiodicity_expand; +} + +int GetNumberOfBands(int fs) { + return static_cast(log(fs / kNormalCutoff) / world::kLog2); +} + +} // namespace + +void AperiodicityRatio(double *x, int x_length, int fs, double *f0, + int f0_length, double *time_axis, int fft_size, double **aperiodicity) { + double **original_ap = new double *[f0_length]; + double **normalized_ap = new double *[f0_length]; + for (int i = 0; i < f0_length; ++i) { + original_ap[i] = new double[GetNumberOfBands(fs)]; + normalized_ap[i] = new double[GetNumberOfBands(fs)]; + } + + const double kMinimumF0ForNormalization = 32.0; + const double kMaximumF0ForNormalization = 200.0; + double min_f0 = kMaximumF0ForNormalization; + for (int i = 0; i < f0_length; ++i) + if (f0[i] < min_f0 && f0[i] > kMinimumF0ForNormalization) min_f0 = f0[i]; + +#if 0 + double target_f0 = MyMax(kMinimumF0ForNormalization, + MyMin(kMaximumF0ForNormalization, min_f0)); +#else + double target_f0 = MyMaxDouble(kMinimumF0ForNormalization, + MyMinDouble(kMaximumF0ForNormalization, min_f0)); +#endif + + double *stretched_signal = NULL; + double *stretched_locations = NULL; + int normalized_signal_length = GetNormalizedSignal(x, x_length, fs, f0, + f0_length, time_axis[1], target_f0, &stretched_signal, + &stretched_locations); + + double *stretched_f0 = new double[f0_length]; + for (int i = 0; i < f0_length; ++i) + stretched_f0[i] = f0[i] == 0 ? 0.0 : target_f0; + + BandwiseAperiodicity(stretched_signal, normalized_signal_length, fs, + stretched_f0, f0_length, stretched_locations, + matlab_round(2000.0 / target_f0), normalized_ap); + + BandwiseAperiodicity(x, x_length, fs, f0, f0_length, time_axis, + 30, original_ap); + + double *tmp_ap = new double[fft_size / 2 + 1]; + for (int i = 0; i < f0_length; ++i) { + CalculateAperiodicity(normalized_ap[i], GetNumberOfBands(fs), +#if 0 + fft_size, MyMax(f0[i], target_f0) / target_f0, fs, aperiodicity[i]); +#else + fft_size, MyMaxDouble(f0[i], target_f0) / target_f0, fs, aperiodicity[i]); +#endif + CalculateAperiodicity(original_ap[i], GetNumberOfBands(fs), + fft_size, 1.0, fs, tmp_ap); + for (int j = 0; j < fft_size / 2 + 1; ++j) +#if 0 + aperiodicity[i][j] = MyMin(0.9999999995, + MyMax(0.0000000005, MyMin(aperiodicity[i][j], tmp_ap[j]))); +#else + aperiodicity[i][j] = MyMinDouble(aperiodicity[i][j], tmp_ap[j]); +#endif + } + + delete[] tmp_ap; + delete[] stretched_f0; + delete[] stretched_signal; + delete[] stretched_locations; + for (int i = 0; i < f0_length; ++i) { + delete[] original_ap[i]; + delete[] normalized_ap[i]; + } + delete[] original_ap; + delete[] normalized_ap; + + return; +} + +#if 1 +} // namespace world +} // namespace sptk +#endif diff --git a/third_party/WORLD/common.cc b/third_party/WORLD/common.cc index 341e1ae..c648c6c 100644 --- a/third_party/WORLD/common.cc +++ b/third_party/WORLD/common.cc @@ -1,7 +1,7 @@ //----------------------------------------------------------------------------- // Copyright 2012 Masanori Morise -// Author: mmorise [at] yamanashi.ac.jp (Masanori Morise) -// Last update: 2017/04/29 +// Author: mmorise [at] meiji.ac.jp (Masanori Morise) +// Last update: 2021/02/15 // // common.cpp includes functions used in at least two files. // (1) Common functions diff --git a/third_party/WORLD/d4c.cc b/third_party/WORLD/d4c.cc new file mode 100644 index 0000000..944d7de --- /dev/null +++ b/third_party/WORLD/d4c.cc @@ -0,0 +1,411 @@ +//----------------------------------------------------------------------------- +// Copyright 2012 Masanori Morise +// Author: mmorise [at] meiji.ac.jp (Masanori Morise) +// Last update: 2021/02/15 +// +// Band-aperiodicity estimation on the basis of the idea of D4C. +//----------------------------------------------------------------------------- +#include "world/d4c.h" + +#include +#include // for std::sort() + +#include "world/common.h" +#include "world/constantnumbers.h" +#include "world/matlabfunctions.h" + +#if 1 +namespace sptk { +namespace world { +#endif + +namespace { +//----------------------------------------------------------------------------- +// SetParametersForGetWindowedWaveform() +//----------------------------------------------------------------------------- +static void SetParametersForGetWindowedWaveform(int half_window_length, + int x_length, double current_position, int fs, double current_f0, + int window_type, double window_length_ratio, int *base_index, + int *safe_index, double *window) { + for (int i = -half_window_length; i <= half_window_length; ++i) + base_index[i + half_window_length] = i; + int origin = matlab_round(current_position * fs + 0.001); + for (int i = 0; i <= half_window_length * 2; ++i) + safe_index[i] = + MyMinInt(x_length - 1, MyMaxInt(0, origin + base_index[i])); + + // Designing of the window function + double position; + if (window_type == world::kHanning) { // Hanning window + for (int i = 0; i <= half_window_length * 2; ++i) { + position = (2.0 * base_index[i] / window_length_ratio) / fs; + window[i] = 0.5 * cos(world::kPi * position * current_f0) + 0.5; + } + } else { // Blackman window + for (int i = 0; i <= half_window_length * 2; ++i) { + position = (2.0 * base_index[i] / window_length_ratio) / fs; + window[i] = 0.42 + 0.5 * cos(world::kPi * position * current_f0) + + 0.08 * cos(world::kPi * position * current_f0 * 2); + } + } +} + +//----------------------------------------------------------------------------- +// GetWindowedWaveform() windows the waveform by F0-adaptive window +// In the variable window_type, 1: hanning, 2: blackman +//----------------------------------------------------------------------------- +static void GetWindowedWaveform(const double *x, int x_length, int fs, + double current_f0, double current_position, int window_type, + double window_length_ratio, double *waveform) { + int half_window_length = + matlab_round(window_length_ratio * fs / current_f0 / 2.0); + + int *base_index = new int[half_window_length * 2 + 1]; + int *safe_index = new int[half_window_length * 2 + 1]; + double *window = new double[half_window_length * 2 + 1]; + + SetParametersForGetWindowedWaveform(half_window_length, x_length, + current_position, fs, current_f0, window_type, window_length_ratio, + base_index, safe_index, window); + + // F0-adaptive windowing + for (int i = 0; i <= half_window_length * 2; ++i) + waveform[i] = + x[safe_index[i]] * window[i] + randn() * world::kMySafeGuardMinimum; + + double tmp_weight1 = 0; + double tmp_weight2 = 0; + for (int i = 0; i <= half_window_length * 2; ++i) { + tmp_weight1 += waveform[i]; + tmp_weight2 += window[i]; + } + double weighting_coefficient = tmp_weight1 / tmp_weight2; + for (int i = 0; i <= half_window_length * 2; ++i) + waveform[i] -= window[i] * weighting_coefficient; + + delete[] base_index; + delete[] safe_index; + delete[] window; +} + +//----------------------------------------------------------------------------- +// GetCentroid() calculates the energy centroid (see the book, time-frequency +// analysis written by L. Cohen). +//----------------------------------------------------------------------------- +static void GetCentroid(const double *x, int x_length, int fs, + double current_f0, int fft_size, double current_position, + const ForwardRealFFT *forward_real_fft, double *centroid) { + for (int i = 0; i < fft_size; ++i) forward_real_fft->waveform[i] = 0.0; + GetWindowedWaveform(x, x_length, fs, current_f0, + current_position, world::kBlackman, 4.0, forward_real_fft->waveform); + double power = 0.0; + for (int i = 0; i <= matlab_round(2.0 * fs / current_f0) * 2; ++i) + power += forward_real_fft->waveform[i] * forward_real_fft->waveform[i]; + for (int i = 0; i <= matlab_round(2.0 * fs / current_f0) * 2; ++i) + forward_real_fft->waveform[i] /= sqrt(power); + + fft_execute(forward_real_fft->forward_fft); + double *tmp_real = new double[fft_size / 2 + 1]; + double *tmp_imag = new double[fft_size / 2 + 1]; + for (int i = 0; i <= fft_size / 2; ++i) { + tmp_real[i] = forward_real_fft->spectrum[i][0]; + tmp_imag[i] = forward_real_fft->spectrum[i][1]; + } + + for (int i = 0; i < fft_size; ++i) + forward_real_fft->waveform[i] *= i + 1.0; + fft_execute(forward_real_fft->forward_fft); + for (int i = 0; i <= fft_size / 2; ++i) + centroid[i] = forward_real_fft->spectrum[i][0] * tmp_real[i] + + tmp_imag[i] * forward_real_fft->spectrum[i][1]; + + delete[] tmp_real; + delete[] tmp_imag; +} + +//----------------------------------------------------------------------------- +// GetStaticCentroid() calculates the temporally static energy centroid. +// Basic idea was proposed by H. Kawahara. +//----------------------------------------------------------------------------- +static void GetStaticCentroid(const double *x, int x_length, int fs, + double current_f0, int fft_size, double current_position, + const ForwardRealFFT *forward_real_fft, double *static_centroid) { + double *centroid1 = new double[fft_size / 2 + 1]; + double *centroid2 = new double[fft_size / 2 + 1]; + + GetCentroid(x, x_length, fs, current_f0, fft_size, + current_position - 0.25 / current_f0, forward_real_fft, centroid1); + GetCentroid(x, x_length, fs, current_f0, fft_size, + current_position + 0.25 / current_f0, forward_real_fft, centroid2); + + for (int i = 0; i <= fft_size / 2; ++i) + static_centroid[i] = centroid1[i] + centroid2[i]; + + DCCorrection(static_centroid, current_f0, fs, fft_size, static_centroid); + delete[] centroid1; + delete[] centroid2; +} + +//----------------------------------------------------------------------------- +// GetSmoothedPowerSpectrum() calculates the smoothed power spectrum. +// The parameters used for smoothing are optimized in davance. +//----------------------------------------------------------------------------- +static void GetSmoothedPowerSpectrum(const double *x, int x_length, int fs, + double current_f0, int fft_size, double current_position, + const ForwardRealFFT *forward_real_fft, double *smoothed_power_spectrum) { + for (int i = 0; i < fft_size; ++i) forward_real_fft->waveform[i] = 0.0; + GetWindowedWaveform(x, x_length, fs, current_f0, + current_position, world::kHanning, 4.0, forward_real_fft->waveform); + + fft_execute(forward_real_fft->forward_fft); + for (int i = 0; i <= fft_size / 2; ++i) + smoothed_power_spectrum[i] = + forward_real_fft->spectrum[i][0] * forward_real_fft->spectrum[i][0] + + forward_real_fft->spectrum[i][1] * forward_real_fft->spectrum[i][1]; + DCCorrection(smoothed_power_spectrum, current_f0, fs, fft_size, + smoothed_power_spectrum); + LinearSmoothing(smoothed_power_spectrum, current_f0, fs, fft_size, + smoothed_power_spectrum); +} + +//----------------------------------------------------------------------------- +// GetStaticGroupDelay() calculates the temporally static group delay. +// This is the fundamental parameter in D4C. +//----------------------------------------------------------------------------- +static void GetStaticGroupDelay(const double *static_centroid, + const double *smoothed_power_spectrum, int fs, double f0, + int fft_size, double *static_group_delay) { + for (int i = 0; i <= fft_size / 2; ++i) + static_group_delay[i] = static_centroid[i] / smoothed_power_spectrum[i]; + LinearSmoothing(static_group_delay, f0 / 2.0, fs, fft_size, + static_group_delay); + + double *smoothed_group_delay = new double[fft_size / 2 + 1]; + LinearSmoothing(static_group_delay, f0, fs, fft_size, + smoothed_group_delay); + + for (int i = 0; i <= fft_size / 2; ++i) + static_group_delay[i] -= smoothed_group_delay[i]; + + delete[] smoothed_group_delay; +} + +//----------------------------------------------------------------------------- +// GetCoarseAperiodicity() calculates the aperiodicity in multiples of 3 kHz. +// The upper limit is given based on the sampling frequency. +//----------------------------------------------------------------------------- +static void GetCoarseAperiodicity(const double *static_group_delay, int fs, + int fft_size, int number_of_aperiodicities, const double *window, + int window_length, const ForwardRealFFT *forward_real_fft, + double *coarse_aperiodicity) { + int boundary = + matlab_round(fft_size * 8.0 / window_length); + int half_window_length = window_length / 2; + + for (int i = 0; i < fft_size; ++i) forward_real_fft->waveform[i] = 0.0; + + double *power_spectrum = new double[fft_size / 2 + 1]; + int center; + for (int i = 0; i < number_of_aperiodicities; ++i) { + center = + static_cast(world::kFrequencyInterval * (i + 1) * fft_size / fs); + for (int j = 0; j <= half_window_length * 2; ++j) + forward_real_fft->waveform[j] = + static_group_delay[center - half_window_length + j] * window[j]; + fft_execute(forward_real_fft->forward_fft); + for (int j = 0 ; j <= fft_size / 2; ++j) + power_spectrum[j] = + forward_real_fft->spectrum[j][0] * forward_real_fft->spectrum[j][0] + + forward_real_fft->spectrum[j][1] * forward_real_fft->spectrum[j][1]; + std::sort(power_spectrum, power_spectrum + fft_size / 2 + 1); + for (int j = 1 ; j <= fft_size / 2; ++j) + power_spectrum[j] += power_spectrum[j - 1]; + coarse_aperiodicity[i] = + 10 * log10(power_spectrum[fft_size / 2 - boundary - 1] / + power_spectrum[fft_size / 2]); + } + delete[] power_spectrum; +} + +static double D4CLoveTrainSub(const double *x, int fs, int x_length, + double current_f0, double current_position, int f0_length, int fft_size, + int boundary0, int boundary1, int boundary2, + ForwardRealFFT *forward_real_fft) { + double *power_spectrum = new double[fft_size]; + + int window_length = matlab_round(1.5 * fs / current_f0) * 2 + 1; + GetWindowedWaveform(x, x_length, fs, current_f0, current_position, + world::kBlackman, 3.0, forward_real_fft->waveform); + + for (int i = window_length; i < fft_size; ++i) + forward_real_fft->waveform[i] = 0.0; + fft_execute(forward_real_fft->forward_fft); + + for (int i = 0; i <= boundary0; ++i) power_spectrum[i] = 0.0; + for (int i = boundary0 + 1; i < fft_size / 2 + 1; ++i) + power_spectrum[i] = + forward_real_fft->spectrum[i][0] * forward_real_fft->spectrum[i][0] + + forward_real_fft->spectrum[i][1] * forward_real_fft->spectrum[i][1]; + for (int i = boundary0; i <= boundary2; ++i) + power_spectrum[i] += +power_spectrum[i - 1]; + + double aperiodicity0 = power_spectrum[boundary1] / power_spectrum[boundary2]; + delete[] power_spectrum; + return aperiodicity0; +} + +//----------------------------------------------------------------------------- +// D4CLoveTrain() determines the aperiodicity with VUV detection. +// If a frame was determined as the unvoiced section, aperiodicity is set to +// very high value as the safeguard. +// If it was voiced section, the aperiodicity of 0 Hz is set to -60 dB. +//----------------------------------------------------------------------------- +static void D4CLoveTrain(const double *x, int fs, int x_length, + const double *f0, int f0_length, const double *temporal_positions, + double *aperiodicity0) { + double lowest_f0 = 40.0; + int fft_size = static_cast(pow(2.0, 1.0 + + static_cast(log(3.0 * fs / lowest_f0 + 1) / world::kLog2))); + ForwardRealFFT forward_real_fft = { 0 }; + InitializeForwardRealFFT(fft_size, &forward_real_fft); + + // Cumulative powers at 100, 4000, 7900 Hz are used for VUV identification. + int boundary0 = static_cast(ceil(100.0 * fft_size / fs)); + int boundary1 = static_cast(ceil(4000.0 * fft_size / fs)); + int boundary2 = static_cast(ceil(7900.0 * fft_size / fs)); + for (int i = 0; i < f0_length; ++i) { + if (f0[i] == 0.0) { + aperiodicity0[i] = 0.0; + continue; + } + aperiodicity0[i] = D4CLoveTrainSub(x, fs, x_length, + MyMaxDouble(f0[i], lowest_f0), temporal_positions[i], f0_length, + fft_size, boundary0, boundary1, boundary2, &forward_real_fft); + } + + DestroyForwardRealFFT(&forward_real_fft); +} + +//----------------------------------------------------------------------------- +// D4CGeneralBody() calculates a spectral envelope at a temporal +// position. This function is only used in D4C(). +// Caution: +// forward_fft is allocated in advance to speed up the processing. +//----------------------------------------------------------------------------- +static void D4CGeneralBody(const double *x, int x_length, int fs, + double current_f0, int fft_size, double current_position, + int number_of_aperiodicities, const double *window, int window_length, + const ForwardRealFFT *forward_real_fft, double *coarse_aperiodicity) { + double *static_centroid = new double[fft_size / 2 + 1]; + double *smoothed_power_spectrum = new double[fft_size / 2 + 1]; + double *static_group_delay = new double[fft_size / 2 + 1]; + GetStaticCentroid(x, x_length, fs, current_f0, fft_size, current_position, + forward_real_fft, static_centroid); + GetSmoothedPowerSpectrum(x, x_length, fs, current_f0, fft_size, + current_position, forward_real_fft, smoothed_power_spectrum); + GetStaticGroupDelay(static_centroid, smoothed_power_spectrum, + fs, current_f0, fft_size, static_group_delay); + + GetCoarseAperiodicity(static_group_delay, fs, fft_size, + number_of_aperiodicities, window, window_length, forward_real_fft, + coarse_aperiodicity); + + // Revision of the result based on the F0 + for (int i = 0; i < number_of_aperiodicities; ++i) + coarse_aperiodicity[i] = MyMinDouble(0.0, + coarse_aperiodicity[i] + (current_f0 - 100) / 50.0); + + delete[] static_centroid; + delete[] smoothed_power_spectrum; + delete[] static_group_delay; +} + +static void InitializeAperiodicity(int f0_length, int fft_size, + double **aperiodicity) { + for (int i = 0; i < f0_length; ++i) + for (int j = 0; j < fft_size / 2 + 1; ++j) + aperiodicity[i][j] = 1.0 - world::kMySafeGuardMinimum; +} + +static void GetAperiodicity(const double *coarse_frequency_axis, + const double *coarse_aperiodicity, int number_of_aperiodicities, + const double *frequency_axis, int fft_size, double *aperiodicity) { + interp1(coarse_frequency_axis, coarse_aperiodicity, + number_of_aperiodicities + 2, frequency_axis, fft_size / 2 + 1, + aperiodicity); + for (int i = 0; i <= fft_size / 2; ++i) + aperiodicity[i] = pow(10.0, aperiodicity[i] / 20.0); +} + +} // namespace + +void D4C(const double *x, int x_length, int fs, + const double *temporal_positions, const double *f0, int f0_length, + int fft_size, const D4COption *option, double **aperiodicity) { + randn_reseed(); + + InitializeAperiodicity(f0_length, fft_size, aperiodicity); + + int fft_size_d4c = static_cast(pow(2.0, 1.0 + + static_cast(log(4.0 * fs / world::kFloorF0D4C + 1) / + world::kLog2))); + + ForwardRealFFT forward_real_fft = {0}; + InitializeForwardRealFFT(fft_size_d4c, &forward_real_fft); + + int number_of_aperiodicities = + static_cast(MyMinDouble(world::kUpperLimit, fs / 2.0 - + world::kFrequencyInterval) / world::kFrequencyInterval); + // Since the window function is common in D4CGeneralBody(), + // it is designed here to speed up. + int window_length = + static_cast(world::kFrequencyInterval * fft_size_d4c / fs) * 2 + 1; + double *window = new double[window_length]; + NuttallWindow(window_length, window); + + // D4C Love Train (Aperiodicity of 0 Hz is given by the different algorithm) + double *aperiodicity0 = new double[f0_length]; + D4CLoveTrain(x, fs, x_length, f0, f0_length, temporal_positions, + aperiodicity0); + + double *coarse_aperiodicity = new double[number_of_aperiodicities + 2]; + coarse_aperiodicity[0] = -60.0; + coarse_aperiodicity[number_of_aperiodicities + 1] = + -world::kMySafeGuardMinimum; + double *coarse_frequency_axis = new double[number_of_aperiodicities + 2]; + for (int i = 0; i <= number_of_aperiodicities; ++i) + coarse_frequency_axis[i] = i * world::kFrequencyInterval; + coarse_frequency_axis[number_of_aperiodicities + 1] = fs / 2.0; + + double *frequency_axis = new double[fft_size / 2 + 1]; + for (int i = 0; i <= fft_size / 2; ++i) + frequency_axis[i] = static_cast(i) * fs / fft_size; + + for (int i = 0; i < f0_length; ++i) { + if (f0[i] == 0 || aperiodicity0[i] <= option->threshold) continue; + D4CGeneralBody(x, x_length, fs, MyMaxDouble(world::kFloorF0D4C, f0[i]), + fft_size_d4c, temporal_positions[i], number_of_aperiodicities, window, + window_length, &forward_real_fft, &coarse_aperiodicity[1]); + + // Linear interpolation to convert the coarse aperiodicity into its + // spectral representation. + GetAperiodicity(coarse_frequency_axis, coarse_aperiodicity, + number_of_aperiodicities, frequency_axis, fft_size, aperiodicity[i]); + } + + DestroyForwardRealFFT(&forward_real_fft); + delete[] aperiodicity0; + delete[] coarse_frequency_axis; + delete[] coarse_aperiodicity; + delete[] window; + delete[] frequency_axis; +} + +void InitializeD4COption(D4COption *option) { + option->threshold = world::kThreshold; +} + +#if 1 +} // namespace world +} // namespace sptk +#endif diff --git a/third_party/WORLD/dio.cc b/third_party/WORLD/dio.cc index d5c3886..e8882a9 100644 --- a/third_party/WORLD/dio.cc +++ b/third_party/WORLD/dio.cc @@ -1,7 +1,7 @@ //----------------------------------------------------------------------------- // Copyright 2012 Masanori Morise -// Author: mmorise [at] yamanashi.ac.jp (Masanori Morise) -// Last update: 2017/03/04 +// Author: mmorise [at] meiji.ac.jp (Masanori Morise) +// Last update: 2021/02/15 // // F0 estimation based on DIO (Distributed Inline-filter Operation). //----------------------------------------------------------------------------- @@ -90,7 +90,7 @@ static void GetSpectrumForEstimation(const double *x, int x_length, fft_plan_dft_r2c_1d(fft_size, y, y_spectrum, FFT_ESTIMATE); fft_execute(forwardFFT); - // Low cut filtering (from 0.1.4). Cut off frequency is 50. + // Low cut filtering (from 0.1.4). Cut off frequency is 50.0 Hz. int cutoff_in_sample = matlab_round(actual_fs / world::kCutOff); DesignLowCutFilter(cutoff_in_sample * 2 + 1, fft_size, y); @@ -618,6 +618,7 @@ static void DioGeneralBody(const double *x, int x_length, int fs, double actual_fs = static_cast(fs) / decimation_ratio; #if 0 int fft_size = GetSuitableFFTSize(y_length + + matlab_round(actual_fs / world::kCutOff) * 2 + 1 + (4 * static_cast(1.0 + actual_fs / boundary_f0_list[0] / 2.0))); #else int fft_size = GetSuitableFFTSize(y_length + 5 + diff --git a/third_party/WORLD/fft_world.cc b/third_party/WORLD/fft_world.cc index 08136ba..0eb3037 100644 --- a/third_party/WORLD/fft_world.cc +++ b/third_party/WORLD/fft_world.cc @@ -1,7 +1,7 @@ //----------------------------------------------------------------------------- // Copyright 2012 Masanori Morise -// Author: mmorise [at] yamanashi.ac.jp (Masanori Morise) -// Last update: 2018/01/21 +// Author: mmorise [at] meiji.ac.jp (Masanori Morise) +// Last update: 2021/02/15 // // This file represents the functions about FFT (Fast Fourier Transform) // implemented by Mr. Ooura, and wrapper functions implemented by M. Morise. @@ -14,7 +14,11 @@ // (English) http://www.fftw.org/ // 2012/08/24 by M. Morise //----------------------------------------------------------------------------- +#if 0 +#include "world/fft.h" +#else #include "world/fft_world.h" +#endif #include #include diff --git a/third_party/WORLD/matlabfunctions.cc b/third_party/WORLD/matlabfunctions.cc index 89bdce1..8aa0aac 100644 --- a/third_party/WORLD/matlabfunctions.cc +++ b/third_party/WORLD/matlabfunctions.cc @@ -1,7 +1,7 @@ //----------------------------------------------------------------------------- // Copyright 2012 Masanori Morise -// Author: mmorise [at] yamanashi.ac.jp (Masanori Morise) -// Last update: 2017/02/01 +// Author: mmorise [at] meiji.ac.jp (Masanori Morise) +// Last update: 2021/02/15 // // Matlab functions implemented for WORLD // Since these functions are implemented as the same function of Matlab, @@ -162,27 +162,21 @@ void histc(const double *x, int x_length, const double *edges, void interp1(const double *x, const double *y, int x_length, const double *xi, int xi_length, double *yi) { double *h = new double[x_length - 1]; - double *p = new double[xi_length]; - double *s = new double[xi_length]; int *k = new int[xi_length]; for (int i = 0; i < x_length - 1; ++i) h[i] = x[i + 1] - x[i]; for (int i = 0; i < xi_length; ++i) { - p[i] = i; k[i] = 0; } histc(x, x_length, xi, xi_length, k); - for (int i = 0; i < xi_length; ++i) - s[i] = (xi[i] - x[k[i] - 1]) / h[k[i] - 1]; - - for (int i = 0; i < xi_length; ++i) - yi[i] = y[k[i] - 1] + s[i] * (y[k[i]] - y[k[i] - 1]); + for (int i = 0; i < xi_length; ++i) { + double s = (xi[i] - x[k[i] - 1]) / h[k[i] - 1]; + yi[i] = y[k[i] - 1] + s * (y[k[i]] - y[k[i] - 1]); + } delete[] k; - delete[] s; - delete[] p; delete[] h; } @@ -203,7 +197,7 @@ void decimate(const double *x, int x_length, int r, double *y) { for (int i = 0; i < 2 * kNFact + x_length; ++i) tmp1[i] = tmp2[2 * kNFact + x_length - i - 1]; - int nout = x_length / r + 1; + int nout = (x_length - 1) / r + 1; int nbeg = r - r * nout + x_length; int count = 0; @@ -318,6 +312,34 @@ void fast_fftfilt(const double *x, int x_length, const double *h, int h_length, delete[] x_spectrum; } +#if 1 +void inv(double **r, int n, double **invr) { + for (int i = 0; i < n; ++i) + for (int j = 0; j < n; ++j) invr[i][j] = 0.0; + for (int i = 0; i < n; ++i) invr[i][i] = 1.0; + + double tmp; + for (int i = 0; i < n; ++i) { + tmp = r[i][i]; + r[i][i] = 1.0; + for (int j = 0; j <= i; ++j) invr[i][j] /= tmp; + for (int j = i + 1; j < n; ++j) r[i][j] /= tmp; + for (int j = i + 1; j < n; ++j) { + tmp = r[j][i]; + for (int k = 0; k <= i; ++k) invr[j][k] -= invr[i][k] * tmp; + for (int k = i; k < n; ++k) r[j][k] -= r[i][k] * tmp; + } + } + + for (int i = n - 1; i >= 0; --i) { + for (int j = 0; j < i; ++j) { + tmp = r[j][i]; + for (int k = 0; k < n; ++k) invr[j][k] -= invr[i][k] * tmp; + } + } +} +#endif + double matlab_std(const double *x, int x_length) { double average = 0.0; for (int i = 0; i < x_length; ++i) average += x[i]; diff --git a/third_party/WORLD/world/aperiodicity.h b/third_party/WORLD/world/aperiodicity.h new file mode 100644 index 0000000..491d9b3 --- /dev/null +++ b/third_party/WORLD/world/aperiodicity.h @@ -0,0 +1,35 @@ +//----------------------------------------------------------------------------- +// Copyright 2012-2015 Masanori Morise. All Rights Reserved. +// Author: mmorise [at] yamanashi.ac.jp (Masanori Morise) +//----------------------------------------------------------------------------- +#ifndef WORLD_APERIODICITY_H_ +#define WORLD_APERIODICITY_H_ + +#if 1 +namespace sptk { +namespace world { +#endif + +//----------------------------------------------------------------------------- +// The latest version of aperiodicity estimation in TANDEM-STRAIGHT. +// This function skipped several complex processes. +// Input: +// x : Input signal +// x_length : Length of x +// f0 : f0 contour +// f0_length : Length of f0 +// frame_period : Time interval for analysis +// Output: +// aperiodicity : Estimated aperiodicity +// Value used for the aperiodicity estimation. This value is used for +// the synthesis. +//----------------------------------------------------------------------------- +void AperiodicityRatio(double *x, int x_length, int fs, double *f0, + int f0_length, double *time_axis, int fft_size, double **aperiodicity); + +#if 1 +} // namespace world +} // namespace sptk +#endif + +#endif // WORLD_APERIODICITY_H_ diff --git a/third_party/WORLD/world/common.h b/third_party/WORLD/world/common.h index cd9ed95..a451068 100644 --- a/third_party/WORLD/world/common.h +++ b/third_party/WORLD/world/common.h @@ -1,12 +1,16 @@ //----------------------------------------------------------------------------- // Copyright 2012 Masanori Morise -// Author: mmorise [at] yamanashi.ac.jp (Masanori Morise) -// Last update: 2017/04/29 +// Author: mmorise [at] meiji.ac.jp (Masanori Morise) +// Last update: 2021/02/15 //----------------------------------------------------------------------------- #ifndef WORLD_COMMON_H_ #define WORLD_COMMON_H_ +#if 0 +#include "world/fft.h" +#else #include "world/fft_world.h" +#endif #include "world/macrodefinitions.h" #if 1 diff --git a/third_party/WORLD/world/constantnumbers.h b/third_party/WORLD/world/constantnumbers.h index e87c226..5b5a142 100644 --- a/third_party/WORLD/world/constantnumbers.h +++ b/third_party/WORLD/world/constantnumbers.h @@ -1,7 +1,7 @@ //----------------------------------------------------------------------------- // Copyright 2012 Masanori Morise -// Author: mmorise [at] yamanashi.ac.jp (Masanori Morise) -// Last update: 2017/04/29 +// Author: mmorise [at] meiji.ac.jp (Masanori Morise) +// Last update: 2021/02/15 // // This header file only defines constant numbers used for several function. //----------------------------------------------------------------------------- diff --git a/third_party/WORLD/world/d4c.h b/third_party/WORLD/world/d4c.h new file mode 100644 index 0000000..6fd8876 --- /dev/null +++ b/third_party/WORLD/world/d4c.h @@ -0,0 +1,60 @@ +//----------------------------------------------------------------------------- +// Copyright 2012 Masanori Morise +// Author: mmorise [at] meiji.ac.jp (Masanori Morise) +// Last update: 2021/02/15 +//----------------------------------------------------------------------------- +#ifndef WORLD_D4C_H_ +#define WORLD_D4C_H_ + +#if 0 +#include "world/macrodefinitions.h" + +WORLD_BEGIN_C_DECLS +#else +namespace sptk { +namespace world { +#endif + +//----------------------------------------------------------------------------- +// Struct for D4C +//----------------------------------------------------------------------------- +typedef struct { + double threshold; +} D4COption; + +//----------------------------------------------------------------------------- +// D4C() calculates the aperiodicity estimated by D4C. +// +// Input: +// x : Input signal +// x_length : Length of x +// fs : Sampling frequency +// temporal_positions : Time axis +// f0 : F0 contour +// f0_length : Length of F0 contour +// fft_size : Number of samples of the aperiodicity in one frame. +// : It is given by the equation fft_size / 2 + 1. +// Output: +// aperiodicity : Aperiodicity estimated by D4C. +//----------------------------------------------------------------------------- +void D4C(const double *x, int x_length, int fs, + const double *temporal_positions, const double *f0, int f0_length, + int fft_size, const D4COption *option, double **aperiodicity); + +//----------------------------------------------------------------------------- +// InitializeD4COption allocates the memory to the struct and sets the +// default parameters. +// +// Output: +// option : Struct for the optional parameter. +//----------------------------------------------------------------------------- +void InitializeD4COption(D4COption *option); + +#if 0 +WORLD_END_C_DECLS +#else +} // namespace world +} // namespace sptk +#endif + +#endif // WORLD_D4C_H_ diff --git a/third_party/WORLD/world/dio.h b/third_party/WORLD/world/dio.h index 5b59aff..4be1e20 100644 --- a/third_party/WORLD/world/dio.h +++ b/third_party/WORLD/world/dio.h @@ -1,7 +1,7 @@ //----------------------------------------------------------------------------- // Copyright 2012 Masanori Morise -// Author: mmorise [at] yamanashi.ac.jp (Masanori Morise) -// Last update: 2017/02/01 +// Author: mmorise [at] meiji.ac.jp (Masanori Morise) +// Last update: 2021/02/15 //----------------------------------------------------------------------------- #ifndef WORLD_DIO_H_ #define WORLD_DIO_H_ diff --git a/third_party/WORLD/world/fft_world.h b/third_party/WORLD/world/fft_world.h index ba436d4..9c5536d 100644 --- a/third_party/WORLD/world/fft_world.h +++ b/third_party/WORLD/world/fft_world.h @@ -1,7 +1,7 @@ //----------------------------------------------------------------------------- // Copyright 2012 Masanori Morise -// Author: mmorise [at] yamanashi.ac.jp (Masanori Morise) -// Last update: 2017/02/01 +// Author: mmorise [at] meiji.ac.jp (Masanori Morise) +// Last update: 2021/02/15 // // These functions and variables are defined to use FFT as well as FFTW // Please see fft.cpp to show the detailed information diff --git a/third_party/WORLD/world/matlabfunctions.h b/third_party/WORLD/world/matlabfunctions.h index b546d2a..814af68 100644 --- a/third_party/WORLD/world/matlabfunctions.h +++ b/third_party/WORLD/world/matlabfunctions.h @@ -1,7 +1,7 @@ //----------------------------------------------------------------------------- // Copyright 2012 Masanori Morise -// Author: mmorise [at] yamanashi.ac.jp (Masanori Morise) -// Last update: 2017/02/01 +// Author: mmorise [at] meiji.ac.jp (Masanori Morise) +// Last update: 2021/02/15 //----------------------------------------------------------------------------- #ifndef WORLD_MATLABFUNCTIONS_H_ #define WORLD_MATLABFUNCTIONS_H_ @@ -163,6 +163,20 @@ void fast_fftfilt(const double *x, int x_length, const double *h, int h_length, int fft_size, const ForwardRealFFT *forward_real_fft, const InverseRealFFT *inverse_real_fft, double *y); +#if 1 +//----------------------------------------------------------------------------- +// inv() calculates the inverse matrix of input square matrix. +// +// Input: +// r : Input square matrix; +// n : Number of dimensions of the input +// +// Output: +// invr : Calculated inverse matrix. +//----------------------------------------------------------------------------- +void inv(double **r, int n, double **invr); +#endif + //----------------------------------------------------------------------------- // matlab_std() calculates the standard deviation of the input vector. //