Skip to content

Commit

Permalink
Merge pull request #36 from sp-nitech/harvest
Browse files Browse the repository at this point in the history
Support harvest
  • Loading branch information
takenori-y committed May 28, 2023
2 parents b0530ab + 2e9de46 commit d915c6c
Show file tree
Hide file tree
Showing 12 changed files with 1,671 additions and 45 deletions.
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ set(CC_SOURCES
${THIRD_PARTY_DIR}/WORLD/d4c.cc
${THIRD_PARTY_DIR}/WORLD/dio.cc
${THIRD_PARTY_DIR}/WORLD/fft_world.cc
${THIRD_PARTY_DIR}/WORLD/harvest.cc
${THIRD_PARTY_DIR}/WORLD/matlabfunctions.cc
${THIRD_PARTY_DIR}/WORLD/stonemask.cc
${THIRD_PARTY_DIR}/WORLD/synthesis.cc
Expand All @@ -80,10 +81,11 @@ set(CC_SOURCES
${SOURCE_DIR}/analysis/mel_frequency_cepstral_coefficients_analysis.cc
${SOURCE_DIR}/analysis/mel_generalized_cepstral_analysis.cc
${SOURCE_DIR}/analysis/pitch_extraction.cc
${SOURCE_DIR}/analysis/pitch_extraction_by_dio.cc
${SOURCE_DIR}/analysis/pitch_extraction_by_harvest.cc
${SOURCE_DIR}/analysis/pitch_extraction_by_rapt.cc
${SOURCE_DIR}/analysis/pitch_extraction_by_reaper.cc
${SOURCE_DIR}/analysis/pitch_extraction_by_swipe.cc
${SOURCE_DIR}/analysis/pitch_extraction_by_world.cc
${SOURCE_DIR}/analysis/second_order_all_pass_mel_cepstral_analysis.cc
${SOURCE_DIR}/analysis/spectrum_extraction.cc
${SOURCE_DIR}/analysis/spectrum_extraction_by_world.cc
Expand Down
8 changes: 5 additions & 3 deletions egs/pitch_voting/lf0/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ fp=$((sr * 5)) # Frame shift (16kHz x 5ms)
mkdir -p $dump

# Extract pitch.
for a in $(seq 0 3); do
for a in $(seq 0 4); do
$sptk4/x2x +sd $data |
$sptk4/pitch -s $sr -p $fp -o 2 -a "$a" > $dump/data.lf0."$a"
done
Expand All @@ -36,9 +36,11 @@ done
$sptk4/merge -l 1 -L 1 $dump/data.lf0.1 < $dump/data.lf0.0 |
$sptk4/merge -l 2 -L 1 $dump/data.lf0.2 |
$sptk4/merge -l 3 -L 1 $dump/data.lf0.3 |
$sptk4/medfilt -l 4 -k 2 -magic -1e+10 -w 1 > $dump/data.lf0
$sptk4/merge -l 4 -L 1 $dump/data.lf0.4 |
$sptk4/medfilt -l 5 -k 2 -magic -1e+10 -w 1 > $dump/data.lf0

# Draw pitch contours.
export VIRTUAL_ENV_DISABLE_PROMPT=1
# shellcheck disable=SC1091
. ../../../tools/venv/bin/activate
n=$($sptk4/x2x +da $dump/data.lf0 | wc -l)
Expand All @@ -47,6 +49,6 @@ cat $dump/data.lf0.? $dump/data.lf0 |
$sptk4/fdrw -n "$n" -g $dump/contour.png \
-xname "Time [frame]" \
-yname "Fundamental frequency [Hz]" \
-names "RAPT,SWIPE,REAPER,WORLD,Voting"
-names "RAPT,SWIPE,REAPER,DIO,Harvest,Voting"

echo "run.sh: successfully finished"
15 changes: 13 additions & 2 deletions include/SPTK/analysis/pitch_extraction.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ namespace sptk {
*
* The input is whole audio waveform and the output is the sequence of the
* fundamental frequency. The implemented algorithms of the extraction are
* RAPT, SWIPE, REAPER, and DIO.
* RAPT, SWIPE, REAPER, DIO, and harvest.
*
* [1] D. Talkin, &quot;A robust algorithm for pitch tracking,&quot; Speech
* Coding and Synthesis, pp. 497-518, 1995.
Expand All @@ -44,13 +44,24 @@ namespace sptk {
* estimation method based on the period extraction of vocal fold vibration
* of singing voice and speech,&quot; Proc. of AES 35th International
* Conference, 2009.
*
* [5] M. Morise, &quot;Harvest: A high-performance fundamental frequency
* estimator from speech signals,&quot; Proc. of Interspeech, pp. 2321-2325,
* 2017.
*/
class PitchExtraction {
public:
/**
* Pitch extraction algorithms.
*/
enum Algorithms { kRapt = 0, kSwipe, kReaper, kWorld, kNumAlgorithms };
enum Algorithms {
kRapt = 0,
kSwipe,
kReaper,
kDio,
kHarvest,
kNumAlgorithms
};

/**
* @param[in] frame_shift Frame shift in point.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
// limitations under the License. //
// ------------------------------------------------------------------------ //

#ifndef SPTK_ANALYSIS_PITCH_EXTRACTION_BY_WORLD_H_
#define SPTK_ANALYSIS_PITCH_EXTRACTION_BY_WORLD_H_
#ifndef SPTK_ANALYSIS_PITCH_EXTRACTION_BY_DIO_H_
#define SPTK_ANALYSIS_PITCH_EXTRACTION_BY_DIO_H_

#include <vector> // std::vector

Expand All @@ -25,9 +25,9 @@
namespace sptk {

/**
* Extract pitch based on WORLD (DUO).
* Extract pitch based on WORLD (DIO).
*/
class PitchExtractionByWorld : public PitchExtractionInterface {
class PitchExtractionByDio : public PitchExtractionInterface {
public:
/**
* @param[in] frame_shift Frame shift in point.
Expand All @@ -36,10 +36,10 @@ class PitchExtractionByWorld : public PitchExtractionInterface {
* @param[in] upper_f0 Upper bound of F0 in Hz.
* @param[in] voicing_threshold Threshold for determining voiced/unvoiced.
*/
PitchExtractionByWorld(int frame_shift, double sampling_rate, double lower_f0,
double upper_f0, double voicing_threshold);
PitchExtractionByDio(int frame_shift, double sampling_rate, double lower_f0,
double upper_f0, double voicing_threshold);

virtual ~PitchExtractionByWorld() {
virtual ~PitchExtractionByDio() {
}

/**
Expand Down Expand Up @@ -104,9 +104,9 @@ class PitchExtractionByWorld : public PitchExtractionInterface {

bool is_valid_;

DISALLOW_COPY_AND_ASSIGN(PitchExtractionByWorld);
DISALLOW_COPY_AND_ASSIGN(PitchExtractionByDio);
};

} // namespace sptk

#endif // SPTK_ANALYSIS_PITCH_EXTRACTION_BY_WORLD_H_
#endif // SPTK_ANALYSIS_PITCH_EXTRACTION_BY_DIO_H_
113 changes: 113 additions & 0 deletions include/SPTK/analysis/pitch_extraction_by_harvest.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
// ------------------------------------------------------------------------ //
// Copyright 2021 SPTK Working Group //
// //
// Licensed under the Apache License, Version 2.0 (the "License"); //
// you may not use this file except in compliance with the License. //
// You may obtain a copy of the License at //
// //
// http://www.apache.org/licenses/LICENSE-2.0 //
// //
// Unless required by applicable law or agreed to in writing, software //
// distributed under the License is distributed on an "AS IS" BASIS, //
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
// See the License for the specific language governing permissions and //
// limitations under the License. //
// ------------------------------------------------------------------------ //

#ifndef SPTK_ANALYSIS_PITCH_EXTRACTION_BY_HARVEST_H_
#define SPTK_ANALYSIS_PITCH_EXTRACTION_BY_HARVEST_H_

#include <vector> // std::vector

#include "SPTK/analysis/pitch_extraction_interface.h"
#include "SPTK/utils/sptk_utils.h"

namespace sptk {

/**
* Extract pitch based on WORLD (Harvest).
*/
class PitchExtractionByHarvest : public PitchExtractionInterface {
public:
/**
* @param[in] frame_shift Frame shift in point.
* @param[in] sampling_rate Sampling rate in Hz.
* @param[in] lower_f0 Lower bound of F0 in Hz.
* @param[in] upper_f0 Upper bound of F0 in Hz.
* @param[in] voicing_threshold Threshold for determining voiced/unvoiced.
*/
PitchExtractionByHarvest(int frame_shift, double sampling_rate,
double lower_f0, double upper_f0,
double voicing_threshold);

virtual ~PitchExtractionByHarvest() {
}

/**
* @return Frame shift.
*/
int GetFrameShift() const {
return frame_shift_;
}

/**
* @return Sampling rate.
*/
double GetSamplingRate() const {
return sampling_rate_;
}

/**
* @return Minimum fundamental frequency to search for.
*/
double GetLowerF0() const {
return lower_f0_;
}

/**
* @return Maximum fundamental frequency to search for.
*/
double GetUpperF0() const {
return upper_f0_;
}

/**
* @return Voicing threshold.
*/
double GetVoicingThreshold() const {
return voicing_threshold_;
}

/**
* @return True if this object is valid.
*/
virtual bool IsValid() const {
return is_valid_;
}

/**
* @param[in] waveform Waveform.
* @param[out] f0 Extracted pitch in Hz.
* @param[out] epochs Pitchmark (not used).
* @param[out] polarity Polarity (not used).
* @return True on success, false on failure.
*/
virtual bool Get(const std::vector<double>& waveform, std::vector<double>* f0,
std::vector<double>* epochs,
PitchExtractionInterface::Polarity* polarity) const;

private:
const int frame_shift_;
const double sampling_rate_;
const double lower_f0_;
const double upper_f0_;
const double voicing_threshold_;

bool is_valid_;

DISALLOW_COPY_AND_ASSIGN(PitchExtractionByHarvest);
};

} // namespace sptk

#endif // SPTK_ANALYSIS_PITCH_EXTRACTION_BY_HARVEST_H_
12 changes: 9 additions & 3 deletions src/analysis/pitch_extraction.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@

#include "SPTK/analysis/pitch_extraction.h"

#include "SPTK/analysis/pitch_extraction_by_dio.h"
#include "SPTK/analysis/pitch_extraction_by_harvest.h"
#include "SPTK/analysis/pitch_extraction_by_rapt.h"
#include "SPTK/analysis/pitch_extraction_by_reaper.h"
#include "SPTK/analysis/pitch_extraction_by_swipe.h"
#include "SPTK/analysis/pitch_extraction_by_world.h"

namespace sptk {

Expand All @@ -43,8 +44,13 @@ PitchExtraction::PitchExtraction(int frame_shift, double sampling_rate,
frame_shift, sampling_rate, lower_f0, upper_f0, voicing_threshold);
break;
}
case kWorld: {
pitch_extraction_ = new PitchExtractionByWorld(
case kDio: {
pitch_extraction_ = new PitchExtractionByDio(
frame_shift, sampling_rate, lower_f0, upper_f0, voicing_threshold);
break;
}
case kHarvest: {
pitch_extraction_ = new PitchExtractionByHarvest(
frame_shift, sampling_rate, lower_f0, upper_f0, voicing_threshold);
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
// limitations under the License. //
// ------------------------------------------------------------------------ //

#include "SPTK/analysis/pitch_extraction_by_world.h"
#include "SPTK/analysis/pitch_extraction_by_dio.h"

#include <algorithm> // std::copy, std::fill
#include <cmath> // std::ceil
Expand All @@ -23,10 +23,10 @@

namespace sptk {

PitchExtractionByWorld::PitchExtractionByWorld(int frame_shift,
double sampling_rate,
double lower_f0, double upper_f0,
double voicing_threshold)
PitchExtractionByDio::PitchExtractionByDio(int frame_shift,
double sampling_rate,
double lower_f0, double upper_f0,
double voicing_threshold)
: frame_shift_(frame_shift),
sampling_rate_(sampling_rate),
lower_f0_(lower_f0),
Expand All @@ -41,7 +41,7 @@ PitchExtractionByWorld::PitchExtractionByWorld(int frame_shift,
}
}

bool PitchExtractionByWorld::Get(
bool PitchExtractionByDio::Get(
const std::vector<double>& waveform, std::vector<double>* f0,
std::vector<double>* epochs,
PitchExtractionInterface::Polarity* polarity) const {
Expand Down
Loading

0 comments on commit d915c6c

Please sign in to comment.