Skip to content

Commit

Permalink
[egs] Update WSJ flat-start chain recipes to use TDNN-F not TDNN+LSTM (
Browse files Browse the repository at this point in the history
  • Loading branch information
hhadian authored and danpovey committed Jan 12, 2019
1 parent 9b6fbdd commit c017268
Show file tree
Hide file tree
Showing 4 changed files with 295 additions and 59 deletions.
91 changes: 50 additions & 41 deletions egs/wsj/s5/local/chain/e2e/run_tdnn_flatstart.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,31 @@

# This script performs chain training in a flat-start manner
# and without building or using any context-dependency tree.
# It does not use ivecors or other forms of speaker adaptation
# except simple mean and variance normalization.
# It does not use ivecors or other forms of speaker adaptation.
# It is called from run_e2e_phone.sh

# Note: this script is configured as phone-based, if you want
# to run it in character mode, you'll need to change _nosp
# to _char everywhere and also copy char_lm.fst instead
# of phone_lm.fst (in stage 1 below)

# local/chain/compare_wer.sh exp/chain/e2e_tdnn_1a
# System e2e_tdnn_1a
#WER dev93 (tgpr) 9.63
#WER dev93 (tg) 9.07
#WER dev93 (big-dict,tgpr) 7.41
#WER dev93 (big-dict,fg) 6.55
#WER eval92 (tgpr) 5.90
#WER eval92 (tg) 5.17
#WER eval92 (big-dict,tgpr) 3.56
#WER eval92 (big-dict,fg) 2.85
# Final train prob -0.0726
# Final valid prob -0.0884
# to _char everywhere.

# local/chain/compare_wer.sh exp/chain/e2e_tdnnf_1a
# System e2e_tdnnf_1a
#WER dev93 (tgpr) 8.77
#WER dev93 (tg) 8.11
#WER dev93 (big-dict,tgpr) 6.17
#WER dev93 (big-dict,fg) 5.66
#WER eval92 (tgpr) 5.62
#WER eval92 (tg) 5.19
#WER eval92 (big-dict,tgpr) 3.23
#WER eval92 (big-dict,fg) 2.80
# Final train prob -0.0618
# Final valid prob -0.0825
# Final train prob (xent)
# Final valid prob (xent)
# Num-params 3740934
# Num-params 6772564

# steps/info/chain_dir_info.pl exp/chain/e2e_tdnn_1a
# exp/chain/e2e_tdnn_1a: num-iters=102 nj=2..5 num-params=3.7M dim=40->84 combine=-0.117->-0.116 (over 3) logprob:train/valid[67,101,final]=(-0.080,-0.073,-0.073/-0.090,-0.089,-0.088)
# steps/info/chain_dir_info.pl exp/chain/e2e_tdnnf_1a
# exp/chain/e2e_tdnnf_1a: num-iters=180 nj=2..8 num-params=6.8M dim=40->84 combine=-0.060->-0.060 (over 3) logprob:train/valid[119,179,final]=(-0.080,-0.062,-0.062/-0.089,-0.083,-0.083)

set -e

Expand All @@ -40,15 +38,15 @@ get_egs_stage=-10
affix=1a

# training options
num_epochs=4
dropout_schedule='0,0@0.20,0.5@0.50,0'
num_epochs=10
num_jobs_initial=2
num_jobs_final=5
minibatch_size=150=128,64/300=100,64,32/600=50,32,16/1200=16,8
num_jobs_final=8
minibatch_size=150=128,64/300=64,32/600=32,16/1200=8
common_egs_dir=
l2_regularize=0.00005
dim=450
frames_per_iter=3000000
cmvn_opts="--norm-means=true --norm-vars=true"
cmvn_opts="--norm-means=false --norm-vars=false"
train_set=train_si284_spe2e_hires
test_sets="test_dev93 test_eval92"

Expand All @@ -69,7 +67,7 @@ fi

lang=data/lang_e2e
treedir=exp/chain/e2e_tree # it's actually just a trivial tree (no tree building)
dir=exp/chain/e2e_tdnn_${affix}
dir=exp/chain/e2e_tdnnf_${affix}

if [ $stage -le 0 ]; then
# Create a version of the lang/ directory that has one state per phone in the
Expand Down Expand Up @@ -102,25 +100,35 @@ fi
if [ $stage -le 2 ]; then
echo "$0: creating neural net configs using the xconfig parser";
num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}')
opts="l2-regularize=0.01"
output_opts="l2-regularize=0.0025"
tdnn_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim-continuous=true"
tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
prefinal_opts="l2-regularize=0.01"
output_opts="l2-regularize=0.005"

mkdir -p $dir/configs
cat <<EOF > $dir/configs/network.xconfig
input dim=40 name=input
relu-batchnorm-layer name=tdnn1 input=Append(-1,0,1) dim=$dim
relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$dim $opts
relu-batchnorm-layer name=tdnn3 dim=$dim $opts
relu-batchnorm-layer name=tdnn4 input=Append(-1,0,1) dim=$dim $opts
relu-batchnorm-layer name=tdnn5 dim=$dim $opts
relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$dim $opts
relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$dim $opts
relu-batchnorm-layer name=tdnn8 input=Append(-3,0,3) dim=$dim $opts
relu-batchnorm-layer name=prefinal-chain dim=$dim target-rms=0.5 $opts
output-layer name=output include-log-softmax=true dim=$num_targets $output_opts
relu-batchnorm-dropout-layer name=tdnn1 input=Append(-1,0,1) $tdnn_opts dim=1024
tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1
tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1
tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1
tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=0
tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
linear-component name=prefinal-l dim=192 $linear_opts
prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1024 small-dim=192
output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
EOF
steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
Expand All @@ -139,14 +147,15 @@ if [ $stage -le 3 ]; then
--egs.dir "$common_egs_dir" \
--egs.stage $get_egs_stage \
--egs.opts "" \
--trainer.dropout-schedule $dropout_schedule \
--trainer.num-chunk-per-minibatch $minibatch_size \
--trainer.frames-per-iter $frames_per_iter \
--trainer.num-epochs $num_epochs \
--trainer.optimization.momentum 0 \
--trainer.optimization.num-jobs-initial $num_jobs_initial \
--trainer.optimization.num-jobs-final $num_jobs_final \
--trainer.optimization.initial-effective-lrate 0.001 \
--trainer.optimization.final-effective-lrate 0.0001 \
--trainer.optimization.initial-effective-lrate 0.0005 \
--trainer.optimization.final-effective-lrate 0.00005 \
--trainer.optimization.shrink-value 1.0 \
--trainer.max-param-change 2.0 \
--cleanup.remove-egs true \
Expand Down
35 changes: 18 additions & 17 deletions egs/wsj/s5/local/chain/e2e/run_tdnn_lstm_flatstart.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,32 @@
# a full trivial biphone context-dependency tree. This is because this recipe is
# meant for character-based (i.e. lexicon-free) modeling where context helps
# significantly.
# It does not use ivecors or other forms of speaker adaptation
# except simple mean and variance normalization.
# It does not use ivecors or other forms of speaker adaptation.
# It is called from run_e2e_char.sh

# Note: this script is configured to run as character-based, if you want
# to run it in phoneme mode, you'll need to change _char
# to _nosp everywhere and also copy phone_lm.fst instead
# of char_lm.fst (in stage 1 below)
# to _nosp everywhere.


# local/chain/compare_wer.sh exp/chain/e2e_tdnn_lstm_bichar_1a
# System e2e_tdnn_lstm_bichar_1a
# WER dev93 (tgpr) 9.42
# WER dev93 (tg) 8.85
# WER dev93 (big-dict,tgpr) 7.70
# WER dev93 (big-dict,fg) 6.79
# WER eval92 (tgpr) 6.42
# WER eval92 (tg) 6.11
# WER eval92 (big-dict,tgpr) 4.50
# WER eval92 (big-dict,fg) 4.09
# Final train prob -0.7535
# Final valid prob -0.7786
#WER dev93 (tgpr) 9.85
#WER dev93 (tg) 9.32
#WER dev93 (big-dict,tgpr) 8.19
#WER dev93 (big-dict,fg) 7.27
#WER eval92 (tgpr) 6.89
#WER eval92 (tg) 6.70
#WER eval92 (big-dict,tgpr) 5.14
#WER eval92 (big-dict,fg) 4.29
# Final train prob -0.0610
# Final valid prob -0.0836
# Final train prob (xent)
# Final valid prob (xent)
# Num-params 9219188

# steps/info/chain_dir_info.pl exp/chain/e2e_tdnn_lstm_bichar_1a/
# exp/chain/e2e_tdnn_lstm_bichar_1a/: num-iters=138 nj=2..5 num-params=9.2M dim=40->3444 combine=-6.480->-6.478 logprob:train/valid[91,137,final]=(-0.766,-0.754,-0.754/-0.784,-0.779,-0.779)

# exp/chain/e2e_tdnn_lstm_bichar_1a_nocmvn: num-iters=138 nj=2..5 num-params=9.2M dim=40->3444 combine=-1.211->-1.211 (over 3) logprob:train/valid[91,137,final]=(-0.079,-0.062,-0.061/-0.093,-0.084,-0.084)

set -e

Expand All @@ -50,7 +51,7 @@ common_egs_dir=
l2_regularize=0.00001
dim=512
frames_per_iter=2500000
cmvn_opts="--norm-means=true --norm-vars=true"
cmvn_opts="--norm-means=false --norm-vars=false"
train_set=train_si284_spe2e_hires
test_sets="test_dev93 test_eval92"

Expand Down
Loading

0 comments on commit c017268

Please sign in to comment.