Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove pitch from nnet3 recipe #3686

Merged
merged 6 commits into from
Oct 28, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
remove pitch from nnet3 chain
  • Loading branch information
naxingyu committed Oct 12, 2019
commit 5c59c8c84fd499fd5e8126cc11a384199f60026f
33 changes: 13 additions & 20 deletions egs/multi_cn/s5/local/chain/run_ivector_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -75,29 +75,22 @@ if [ $stage -le 3 ]; then

# do volume-perturbation on the training data prior to extracting hires
# features; this helps make trained nnets more invariant to test data volume.
# create MFCC data dir without pitch to extract iVector
utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires
steps/make_mfcc_pitch_online.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \
steps/make_mfcc_online.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \
--cmd "$train_cmd" data/${train_set}_sp_hires || exit 1;
steps/compute_cmvn_stats.sh data/${train_set}_sp_hires || exit 1;
utils/fix_data_dir.sh data/${train_set}_sp_hires
utils/data/limit_feature_dim.sh 0:39 \
data/${train_set}_sp_hires data/${train_set}_sp_hires_nopitch || exit 1;
steps/compute_cmvn_stats.sh data/${train_set}_sp_hires_nopitch || exit 1;

for datadir in $test_sets; do
steps/make_mfcc_pitch_online.sh --nj 10 --mfcc-config conf/mfcc_hires.conf \
steps/make_mfcc_online.sh --nj 10 --mfcc-config conf/mfcc_hires.conf \
--cmd "$train_cmd" data/$datadir/test_hires || exit 1;
steps/compute_cmvn_stats.sh data/$datadir/test_hires || exit 1;
utils/fix_data_dir.sh data/$datadir/test_hires
utils/data/limit_feature_dim.sh 0:39 \
data/$datadir/test_hires data/$datadir/test_hires_nopitch || exit 1;
steps/compute_cmvn_stats.sh data/$datadir/test_hires_nopitch || exit 1;
done

# now create a data subset. 60k is 1/5th of the training dataset (around 200 hours).
utils/subset_data_dir.sh data/${train_set}_sp_hires_nopitch 60000 \
data/${train_set}_sp_hires_nopitch_60k
utils/subset_data_dir.sh data/${train_set}_sp_hires 60000 \
data/${train_set}_sp_hires_60k
fi


Expand All @@ -107,24 +100,24 @@ if [ $stage -le 4 ]; then
mkdir -p exp/nnet3${nnet3_affix}/diag_ubm
temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm

num_utts_total=$(wc -l <data/${train_set}_sp_hires_nopitch/utt2spk)
num_utts_total=$(wc -l <data/${train_set}_sp_hires/utt2spk)
num_utts=$[$num_utts_total/100]
utils/data/subset_data_dir.sh data/${train_set}_sp_hires_nopitch \
$num_utts ${temp_data_root}/${train_set}_sp_hires_nopitch_subset
utils/data/subset_data_dir.sh data/${train_set}_sp_hires \
$num_utts ${temp_data_root}/${train_set}_sp_hires_subset

echo "$0: computing a PCA transform from the hires data."
steps/online/nnet2/get_pca_transform.sh --cmd "$train_cmd" \
--splice-opts "--left-context=3 --right-context=3" \
--max-utts 10000 --subsample 2 \
${temp_data_root}/${train_set}_sp_hires_nopitch_subset \
${temp_data_root}/${train_set}_sp_hires_subset \
exp/nnet3${nnet3_affix}/pca_transform

echo "$0: training the diagonal UBM."
# Use 512 Gaussians in the UBM.
steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 \
--num-frames 700000 \
--num-threads $num_threads_ubm \
${temp_data_root}/${train_set}_sp_hires_nopitch_subset 512 \
${temp_data_root}/${train_set}_sp_hires_subset 512 \
exp/nnet3${nnet3_affix}/pca_transform exp/nnet3${nnet3_affix}/diag_ubm
fi

Expand All @@ -135,7 +128,7 @@ if [ $stage -le 5 ]; then
# we use just the 60k subset (about one fifth of the data, or 200 hours).
echo "$0: training the iVector extractor"
steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
--num-processes $num_processes data/${train_set}_sp_hires_nopitch_60k \
--num-processes $num_processes data/${train_set}_sp_hires_60k \
exp/nnet3${nnet3_affix}/diag_ubm exp/nnet3${nnet3_affix}/extractor || exit 1;
fi

Expand All @@ -154,18 +147,18 @@ if [ $stage -le 6 ]; then
# having a larger number of speakers is helpful for generalization, and to
# handle per-utterance decoding well (iVector starts at zero).
utils/data/modify_speaker_info.sh --utts-per-spk-max 2 \
data/${train_set}_sp_hires_nopitch ${ivectordir}/${train_set}_sp_hires_nopitch_max2
data/${train_set}_sp_hires ${ivectordir}/${train_set}_sp_hires_max2

steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 60 \
${ivectordir}/${train_set}_sp_hires_nopitch_max2 exp/nnet3${nnet3_affix}/extractor \
${ivectordir}/${train_set}_sp_hires_max2 exp/nnet3${nnet3_affix}/extractor \
$ivectordir || exit 1;
fi

if [ $stage -le 7 ]; then
echo "$0: extracting iVectors for test data"
for data in $test_sets; do
steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 10 \
data/${data}/test_hires_nopitch exp/nnet3${nnet3_affix}/extractor \
data/${data}/test_hires exp/nnet3${nnet3_affix}/extractor \
exp/nnet3${nnet3_affix}/ivectors_${data}_hires || exit 1;
done
fi
Expand Down
4 changes: 2 additions & 2 deletions egs/multi_cn/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ if [ $stage -le 14 ]; then

cat <<EOF > $dir/configs/network.xconfig
input dim=100 name=ivector
input dim=43 name=input
input dim=40 name=input

# MFCC to filterbank
idct-layer name=idct input=input dim=40 cepstral-lifter=22 affine-transform-file=$dir/configs/idct.mat
Expand Down Expand Up @@ -237,7 +237,7 @@ if $test_online_decoding && [ $stage -le 18 ]; then
# note: if the features change (e.g. you add pitch features), you will have to
# change the options of the following command line.
steps/online/nnet3/prepare_online_decoding.sh \
--mfcc-config conf/mfcc_hires.conf --add-pitch true \
--mfcc-config conf/mfcc_hires.conf \
$lang exp/nnet3${nnet3_affix}/extractor $dir ${dir}_online

rm $dir/.error 2>/dev/null || true
Expand Down
17 changes: 2 additions & 15 deletions egs/multi_cn/s5/local/chain/tuning/run_cnn_tdnn_1b.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,6 @@
# 1b is as 1a but adding SpecAugment and removing dropout (which, in
# combination with SpecAugment, no longer seemed to give an improvement).

# local/chain/compare_wer.sh --online exp/chain/cnn_tdnn1{a,a2,b,b2}_sp
# System cnn_tdnn1a_sp cnn_tdnn1a2_sp cnn_tdnn1b_sp cnn_tdnn1b2_sp
#WER dev_clean_2 (tgsmall) 10.89 10.96 10.04 9.93
# [online:] 10.91 10.93 9.99 9.99
#WER dev_clean_2 (tglarge) 7.50 7.80 6.94 6.89
# [online:] 7.58 7.84 6.97 7.04
# Final train prob -0.0476 -0.0470 -0.0577 -0.0575
# Final valid prob -0.0754 -0.0760 -0.0742 -0.0746
# Final train prob (xent) -1.0930 -1.0995 -1.3090 -1.3043
# Final valid prob (xent) -1.2916 -1.2904 -1.4242 -1.4225
# Num-params 4492816 4492816 4492816 4492816


# Set -e here so that we catch if any executable fails immediately
set -euo pipefail

Expand Down Expand Up @@ -120,7 +107,7 @@ if [ $stage -le 14 ]; then
mkdir -p $dir/configs
cat <<EOF > $dir/configs/network.xconfig
input dim=100 name=ivector
input dim=43 name=input
input dim=40 name=input

# this takes the MFCCs and generates filterbank coefficients. The MFCCs
# are more compressible so we prefer to dump the MFCCs to disk rather
Expand Down Expand Up @@ -236,7 +223,7 @@ if $test_online_decoding && [ $stage -le 18 ]; then
# note: if the features change (e.g. you add pitch features), you will have to
# change the options of the following command line.
steps/online/nnet3/prepare_online_decoding.sh \
--mfcc-config conf/mfcc_hires.conf --add-pitch true \
--mfcc-config conf/mfcc_hires.conf \
$lang exp/nnet3${nnet3_affix}/extractor ${dir} ${dir}_online

rm $dir/.error 2>/dev/null || true
Expand Down