Skip to content

Commit

Permalink
update evaluation
Browse files Browse the repository at this point in the history
  • Loading branch information
MingjieChen committed Jun 9, 2023
1 parent 9cd4d48 commit 030a126
Show file tree
Hide file tree
Showing 30 changed files with 1,669 additions and 124 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ pretrained_models
# parts that are not public yet
evaluation/UTMOS-demo
evaluation/eval_list*.txt
evaluation/speechbrain_asr_model_weights
results/



Expand Down
8 changes: 4 additions & 4 deletions bin/feature_extraction_multi_jobs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ conda_env=torch_1.7

# setup

dataset=vctk
config=configs/preprocess_bigvgan_mel.yaml
feature_type=bigvgan_mel
splits="train_nodev_all dev_all"
dataset=libritts
config=configs/preprocess_ppgvc_f0.yaml
feature_type=ppgvc_f0
splits="train_nodev_clean dev_clean"

script_dir=scripts/$dataset/preprocess

Expand Down
38 changes: 38 additions & 0 deletions bin/feature_extraction_sge_multi_tasks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/bin/bash

conda=/share/mini1/sw/std/python/anaconda3-2019.07/v3.7
conda_env=torch_1.9

# setup

dataset=libritts
config=configs/preprocess_ppgvc_mel.yaml
feature_type=ppgvc_mel
splits="train_nodev_clean dev_clean"

script_dir=scripts/$dataset/preprocess

[ ! -e $script_dir ] && mkdir -p $script_dir

for split in $splits ; do

echo "[feature extraction]: $split $dataset $feature_type"
b=$script_dir/feature_extraction_${feature_type}_${split}.sh
l=logs/feature_extraction_${feature_type}_${split}.log
cat <<EOF > $b
#!/bin/bash
source $conda/bin/activate $conda_env
python3 feature_extraction.py \
--metadata data/$dataset/$split/metadata.csv \
--dump_dir dump/$dataset \
--config_path $config \
--split $split \
--max_workers 20 \
--feature_type $feature_type \
--sge_task_id \$SGE_TASK_ID \
--sge_n_tasks 5000
EOF
chmod +x $b
submitjob -m 10000 -n 5000 $l $b
echo "submitjob for $dataset $split $feature_type see log $l"
done
6 changes: 3 additions & 3 deletions bin/generate_eval_list.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/bin/bash

task=vc
dataset=vctk
split=eval_all
eval_list=eval_list_m2m_vc_small_oneshot.json
dataset=libritts
split=eval_clean
eval_list=eval_list_a2a_vc_small_oneshot.json
n_trg_spk_samples=1
n_src_spk_samples=4
n_eval_spks=10
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# experiment
dataset: libritts
train_meta: data/libritts/train_nodev_clean/metadata.csv
dev_meta: data/libritts/dev_clean/metadata.csv
train_set: train_nodev_clean
dev_set: dev_clean


# encoder-decoder
ling_enc: conformer_ppg
spk_enc: utt_dvec
pros_enc: ppgvc_f0
decoder: FastSpeech2
mel_type: ppgvc_mel
vocoder: ppgvc_hifigan

# training
fp16_run: !!bool True
epochs: 200
save_freq: 2 # save ckpt frequency
show_freq: 100 # show training information frequency
load_only_params: !!bool False
seed: !!int 1234
trainer: FS2Trainer
ngpu: 1

#dataloader
dataset_class: Dataset
sort: !!bool False
dump_dir: dump
num_workers: !!int 8
batch_size: 32
drop_last: !!bool True
rm_long_utt: !!bool True # remove too long utterances from metadata
max_utt_duration: !!float 10.0 # max utterance duration (seconds)


# decoder params
decoder_params:
out_dim: 80
max_len: 1000
max_seq_len: 1000
spk_emb_dim: 256
prosodic_rep_type: continuous
prosodic_net:
hidden_dim: 256
prenet:
conv_kernel_size: 3
input_dim: 144
dropout: 0.1
postnet:
idim: 80
odim: 80
n_layers: 0
n_filts: 5
n_chans: 256
dropout_rate: 0.5
transformer:
encoder_layer: 4
encoder_head: 2
encoder_hidden: 256
decoder_layer: 4
decoder_head: 2
decoder_hidden: 256
conv_filter_size: 1024
conv_kernel_size: [3, 1]
encoder_dropout: 0.1
decoder_dropout: 0.1

#optimizer & scheduler
optimizer:
init_lr: !!float 1e-2
betas: [0.9,0.99]
weight_decay: 0.0
scheduler:
warm_up_step: 4000
anneal_steps: [800000, 900000, 1000000]
anneal_rate: 0.3

# loss hyper-parameters
losses:
alpha: 1.







Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# experiment
dataset: libritts
train_meta: data/libritts/train_nodev_clean/metadata.csv
dev_meta: data/libritts/dev_clean/metadata.csv
train_set: train_nodev_clean
dev_set: dev_clean


# encoder-decoder
ling_enc: conformer_ppg
spk_enc: utt_dvec
pros_enc: ppgvc_f0
decoder: TacoAR
mel_type: ppgvc_mel
vocoder: ppgvc_hifigan


# training
fp16_run: !!bool True
epochs: 200
save_freq: 2 # save ckpt frequency
show_freq: 10
load_only_params: !!bool False
seed: !!int 1234
trainer: TacoARTrainer
ngpu: 2

#dataloader
dataset_class: Dataset
sort: !!bool True
dump_dir: dump
num_workers: !!int 8
batch_size: 64
drop_last: !!bool True
rm_long_utt: !!bool True # remove too long utterances from metadata
max_utt_duration: !!float 10.0 # max utterance duration (seconds)


# decoder params
decoder_params:
prosodic_rep_type: continuous
prosodic_net:
hidden_dim: 1024
input_dim: 144
output_dim: 80
resample_ratio: 1
spk_emb_integration_type: concat # add or concat
spk_emb_dim: 256
ar: True
encoder_type: "taco2"
hidden_dim: 1024
prenet_layers: 2 # if set 0, no prenet is used
prenet_dim: 256
prenet_dropout_rate: 0.5
lstmp_layers: 2
lstmp_dropout_rate: 0.2
lstmp_proj_dim: 256
lstmp_layernorm: False

#optimizer & scheduler
optimizer:
weight_decay: 0.0
betas: [0.9,0.99]
lr: !!float 1e-4
scheduler:
num_training_steps: 500000
num_warmup_steps: 4000

# loss hyper-parameters
losses:
alpha: 1.







Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# experiment
dataset: libritts
train_meta: data/libritts/train_nodev_clean/metadata.csv
dev_meta: data/libritts/dev_clean/metadata.csv
train_set: train_nodev_clean
dev_set: dev_clean


# encoder-decoder
ling_enc: conformer_ppg
spk_enc: utt_dvec
pros_enc: ppgvc_f0
decoder: TacoMOL
mel_type: ppgvc_mel
vocoder: ppgvc_hifigan


# training
fp16_run: !!bool True
epochs: 200
save_freq: 2 # save ckpt frequency
show_freq: 10
load_only_params: !!bool False
seed: !!int 1234
trainer: TacoMOLTrainer
ngpu: 2

#dataloader
dataset_class: Dataset
sort: !!bool True
dump_dir: dump
num_workers: !!int 8
batch_size: 64
drop_last: !!bool True
rm_long_utt: !!bool True # remove too long utterances from metadata
max_utt_duration: !!float 10.0 # max utterance duration (seconds)
frames_per_step: !!int 4


# decoder params
decoder_params:
out_dim: 80
prosodic_rep_type: continuous
prosodic_net:
hidden_dim: 256
spk_embed_dim: 256
bottle_neck_feature_dim: 144

#optimizer & scheduler
optimizer:
weight_decay: !!float 1e-6
betas: [0.9,0.99]
lr: !!float 1e-4
scheduler:
num_training_steps: 500000
num_warmup_steps: 4000

# loss hyper-parameters
losses:
alpha: 1.







Loading

0 comments on commit 030a126

Please sign in to comment.