Skip to content

Commit

Permalink
mean var normalization
Browse files Browse the repository at this point in the history
  • Loading branch information
MingjieChen committed Feb 13, 2023
1 parent e2137ee commit d9b18df
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 22 deletions.
13 changes: 7 additions & 6 deletions bin/normalize.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ conda=/share/mini1/sw/std/python/anaconda3-2019.07/v3.7
conda_env=torch_1.7
source $conda/bin/activate $conda_env


python preprocess/normalize.py \
--stats_path dump/libritts/train_nodev_clean/mel/train_nodev_clean.npy \
--dump_dir dump/libritts/ \
--split train_nodev \
--metadata data/libritts/train_nodev_clean/metadata.csv
for split in train_nodev_clean dev_clean ;do
python preprocess/normalize.py \
--stats_path dump/libritts/train_nodev_clean/mel/train_nodev_clean.npy \
--dump_dir dump/libritts/ \
--split $split \
--metadata data/libritts/$split/metadata.csv
done


6 changes: 3 additions & 3 deletions dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,16 +96,16 @@ def __init__(self, config, metadata_csv, split):
# remove utterances that are too long for training.
if config['rm_long_utt']:
_duration = row['duration']
if float(_duration) <= config['max_utt_duration']:
if float(_duration) < config['max_utt_duration']:
self.metadata.append(row)
f.close()


print(f'{split} data samples {len(self.metadata)}')
self.batch_size = config['batch_size']
self.drop_last = config['drop_last']
self.sort = config['sort']
# feature dirs
self.mel_dir = os.path.join(config['dump_dir'], config['dataset'], split, 'mel')
self.mel_dir = os.path.join(config['dump_dir'], config['dataset'], split, 'norm_mel')

self.ling_enc = config['ling_enc']
self.ling_rep_dir = os.path.join(config['dump_dir'], config['dataset'], split, self.ling_enc)
Expand Down
2 changes: 1 addition & 1 deletion inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def load_wav(path, sample_rate = 16000):

print(f'generating {len(eval_list)} samples')
# split eval_list by sge_job_idx
n_per_task = round(len(eval_list) / args.sge_n_tasks, 0)
n_per_task = np.ceil(len(eval_list) / args.sge_n_tasks)
start = int(( args.sge_task_id -1 ) * n_per_task)
if int( args.sge_task_id * n_per_task) >= len(eval_list):
end = len(eval_list) -1
Expand Down
8 changes: 4 additions & 4 deletions preprocess/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@

# load stats
scaler = StandardScaler()
scaler.mean_ = np.load(args.stats)[0]
scaler.scale_ = np.load(args.stats)[1]
scaler.mean_ = np.load(args.stats_path)[0]
scaler.scale_ = np.load(args.stats_path)[1]
scaler.n_features_in_ = scaler.mean_.shape[0]


Expand All @@ -43,13 +43,13 @@
spk = _meta['spk']
mel_path = os.path.join(args.dump_dir, args.split, 'mel', spk, ID + '.npy')
norm_path = os.path.join(args.dump_dir, args.split, 'norm_mel', spk, ID + '.npy')
os.makedirs(os.path.basename(norm_path), exists_ok = True)
os.makedirs(os.path.dirname(norm_path), exist_ok = True)

mel = np.load(mel_path)
norm_mel = scaler.transform(mel)
np.save(
norm_path,
mel.astype(np.float32),
norm_mel.astype(np.float32),
allow_pickle=False,
)

Expand Down
16 changes: 8 additions & 8 deletions submit_train.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,26 @@ conda=/share/mini1/sw/std/python/anaconda3-2019.07/v3.7
conda_env=torch_1.9

#exp setup
#ling=vqw2v
ling=vqw2v
#ling=conformerppg
ling=hubertsoft
#ling=hubertsoft
spk=uttdvec
pros=none
#dec=fastspeech2
dec=fastspeech2
#dec=tacoar
dec=tacomol
#dec=tacomol

exp_name=first_train
exp_name=libritts_24khz_10ms
config=configs/${ling}_${spk}_${pros}_${dec}.yaml
exp_dir=exp
model_name=${ling}_${spk}_${pros}_${dec}
exp=$exp_dir/$model_name/$exp_name
njobs=24
njobs=1
ngpus=2
slots=8
#gputypes="GeForceRTX3060|GeForceRTX3090"
#gputypes="GeForceRTX3090"
gputypes="GeForceGTXTITANX|GeForceGTX1080Ti|GeForceRTX3060"
gputypes="GeForceRTX3090"
#gputypes="GeForceGTXTITANX|GeForceGTX1080Ti|GeForceRTX3060"

# create exp dir
[ ! -e $exp ] && mkdir -p $exp
Expand Down

0 comments on commit d9b18df

Please sign in to comment.