Skip to content

Commit

Permalink
add ppgvc_f0 as prosodic_encoder
Browse files Browse the repository at this point in the history
  • Loading branch information
MingjieChen committed Feb 15, 2023
1 parent ee5e6e3 commit 8647381
Show file tree
Hide file tree
Showing 9 changed files with 37 additions and 80 deletions.
22 changes: 22 additions & 0 deletions bin/feature_extraction.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash
conda=/share/mini1/sw/std/python/anaconda3-2019.07/v3.7
conda_env=torch_1.7
source $conda/bin/activate $conda_env

dataset=vctk
config=configs/preprocess_vctk_ppgvc_mel.yaml
feature_type=ppgvc_f0
splits="train_nodev_all dev_all"


for split in $splits ; do

echo "[feature extraction]: $split $dataset $feature_type"
python3 feature_extraction.py \
--metadata data/$dataset/$split/metadata.csv \
--dump_dir dump/$dataset \
--config_path $config\
--split $split \
--feature_type $feature_type \
--max_workers 20
done
18 changes: 0 additions & 18 deletions bin/feature_extraction_libritts.sh

This file was deleted.

32 changes: 0 additions & 32 deletions bin/feature_extraction_libritts_multi_jobs.sh

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ conda_env=torch_1.7

dataset=vctk
config=configs/preprocess_vctk_ppgvc_mel.yaml
feature_type=ppgvc_mel
feature_type=ppgvc_f0
splits="train_nodev_all dev_all"

script_dir=scripts/$dataset/preprocess
Expand All @@ -16,15 +16,15 @@ script_dir=scripts/$dataset/preprocess

for split in $splits ; do

echo "[feature extraction]: $split for $dataset"
echo "[feature extraction]: $split $dataset $feature_type"
speakers=$(cat data/$dataset/$split/speakers.txt)
for spk in $speakers ; do
b=$script_dir/feature_extraction_${split}_${spk}.sh
l=logs/feature_extraction_${split}.${spk}.log
b=$script_dir/feature_extraction_${feature_type}_${split}_${spk}.sh
l=logs/feature_extraction_${feature_type}_${split}_${spk}.log
cat <<EOF > $b
#!/bin/bash
source $conda/bin/activate $conda_env
python3 preprocess/feature_extraction.py \
python3 feature_extraction.py \
--metadata data/$dataset/$split/metadata.csv \
--dump_dir dump/$dataset \
--config_path $config \
Expand All @@ -35,6 +35,6 @@ python3 preprocess/feature_extraction.py \
EOF
chmod +x $b
submitjob -m 10000 $l $b
echo "submitjob for $dataset $split $spk"
echo "submitjob for $dataset $split $spk $feature_type"
done
done
18 changes: 0 additions & 18 deletions bin/feature_extraction_vctk.sh

This file was deleted.

8 changes: 5 additions & 3 deletions dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import random
from torch.utils.data import DataLoader
from collections import defaultdict
from prosodic_encoder.ppgvc_f0.ppgvc_lf0 import get_cont_lf0 as process_ppgvc_f0
def get_dataloader(config):
train_dataset = Dataset(config, config['train_meta'], config['train_set'])
dev_dataset = Dataset(config, config['dev_meta'], config['dev_set'])
Expand Down Expand Up @@ -111,9 +112,9 @@ def __init__(self, config, metadata_csv, split):
self.ling_rep_dir = os.path.join(config['dump_dir'], config['dataset'], split, self.ling_enc)
self.spk_enc = config['spk_enc']
self.spk_emb_dir = os.path.join(config['dump_dir'], config['dataset'], split, self.spk_enc)
self.pros_enc = config['pros_enc']
self.pros_enc = config['pros_enc'] #e.g. ppgvc_f0
self.pros_rep_dir = os.path.join(config['dump_dir'], config['dataset'], split, self.pros_enc)

self.pros_rep_process_func = f'process_{self.pros_enc}'
# frames per step (only work for TacoMOL)
self.frames_per_step = config['frames_per_step'] if 'frames_per_step' in config else 1

Expand Down Expand Up @@ -146,7 +147,8 @@ def __getitem__(self, idx):
ling_rep = np.load(ling_rep_path)
ling_duration = ling_rep.shape[0]
spk_emb = np.load(spk_emb_path)
pros_rep = np.expand_dims(np.load(pros_rep_path), axis = 1)
pros_rep = np.load(pros_rep_path)
pros_rep = eval(self.pros_rep_process_func)(pros_rep)
pros_duration = pros_rep.shape[0]

# up_sample ling_rep to 10hz, in case some ling_rep are 50hz or 25hz.
Expand Down
2 changes: 1 addition & 1 deletion preprocess/feature_extraction.py → feature_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import json
import os
import glob
from audio_utils import mel_spectrogram, normalize
from preprocess.audio_utils import mel_spectrogram, normalize
from prosodic_encoder.ppgvc_f0.ppgvc_lf0 import compute_f0 as compute_ppgvc_f0
import pyworld as pw
import librosa
Expand Down
3 changes: 2 additions & 1 deletion prosodic_encoder/ppgvc_f0/ppgvc_lf0.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,4 +114,5 @@ def get_cont_lf0(f0, frame_period=10.0, lpf=False):
nonzero_indices = np.nonzero(cont_f0)
cont_lf0 = cont_f0.copy()
cont_lf0[cont_f0>0] = np.log(cont_f0[cont_f0>0])
return uv, cont_lf0
lf0_uv = np.concatenate([cont_lf0[:, np.newaxis], uv[:, np.newaxis]], axis=1)
return lf0_uv
2 changes: 1 addition & 1 deletion submit_train.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ config=configs/${dataset}_${ling}_${spk}_${pros}_${dec}.yaml
exp_dir=exp
model_name=${ling}_${spk}_${pros}_${dec}
exp=$exp_dir/$model_name/$exp_name
njobs=1
njobs=12
ngpus=2
slots=8
#gputypes="GeForceRTX3060|GeForceRTX3090"
Expand Down

0 comments on commit 8647381

Please sign in to comment.