Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/kaldi-asr/kaldi
Browse files Browse the repository at this point in the history
* 'master' of https://github.com/kaldi-asr/kaldi:
  [egs] Add chain recipe for Fisher English (kaldi-asr#1803)
  [src] Cosmetic fix to usage message (kaldi-asr#1800)
  [egs] Fix bug RE xent_regularize in Aspire chain recipes. (kaldi-asr#1797)
  • Loading branch information
kronos-cm committed Aug 9, 2017
2 parents bc0ec8a + 53e5e12 commit 3879152
Show file tree
Hide file tree
Showing 6 changed files with 475 additions and 3 deletions.
3 changes: 2 additions & 1 deletion egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ num_data_reps=3


min_seg_len=
xent_regularize=0.1
chunk_width=150
chunk_left_context=40
chunk_right_context=40
Expand Down Expand Up @@ -197,7 +198,7 @@ if [ $stage -le 12 ]; then
--cmd "$decode_cmd" \
--feat.online-ivector-dir exp/nnet3/ivectors_train_min${min_seg_len} \
--feat.cmvn-opts "--norm-means=false --norm-vars=false" \
--chain.xent-regularize 0.1 \
--chain.xent-regularize $xent_regularize \
--chain.leaky-hmm-coefficient 0.1 \
--chain.l2-regularize 0.00005 \
--chain.apply-deriv-weights false \
Expand Down
3 changes: 2 additions & 1 deletion egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ num_data_reps=3


min_seg_len=
xent_regularize=0.1
frames_per_eg=150
# End configuration section.
echo "$0 $@" # Print the command line for logging
Expand Down Expand Up @@ -191,7 +192,7 @@ if [ $stage -le 12 ]; then
--cmd "$decode_cmd" \
--feat.online-ivector-dir exp/nnet3/ivectors_train_min${min_seg_len} \
--feat.cmvn-opts "--norm-means=false --norm-vars=false" \
--chain.xent-regularize 0.1 \
--chain.xent-regularize $xent_regularize \
--chain.leaky-hmm-coefficient 0.1 \
--chain.l2-regularize 0.00005 \
--chain.apply-deriv-weights false \
Expand Down
106 changes: 106 additions & 0 deletions egs/fisher_english/s5/local/chain/compare_wer_general.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#!/bin/bash

# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn_{c,d}_sp
# For use with discriminatively trained systems you specify the epochs after a colon:
# for instance,
# local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn_c_sp exp/chain_cleaned/tdnn_c_sp_smbr:{1,2,3}


echo "# $0 $*"

include_looped=false
if [ "$1" == "--looped" ]; then
include_looped=true
shift
fi

used_epochs=false

# this function set_names is used to separate the epoch-related parts of the name
# [for discriminative training] and the regular parts of the name.
# If called with a colon-free directory name, like:
# set_names exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbr
# it will set dir=exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbr and epoch_infix=""
# If called with something like:
# set_names exp/chain_cleaned/tdnn_d_sp_smbr:3
# it will set dir=exp/chain_cleaned/tdnn_d_sp_smbr and epoch_infix="_epoch3"


set_names() {
if [ $# != 1 ]; then
echo "compare_wer_general.sh: internal error"
exit 1 # exit the program
fi
dirname=$(echo $1 | cut -d: -f1)
epoch=$(echo $1 | cut -s -d: -f2)
if [ -z $epoch ]; then
epoch_infix=""
else
used_epochs=true
epoch_infix=_epoch${epoch}
fi
}



echo -n "# System "
for x in $*; do printf "% 10s" " $(basename $x)"; done
echo

strings=("# WER on dev " "# WER on test ")

for n in 0 1; do
echo -n "${strings[$n]}"
for x in $*; do
set_names $x # sets $dirname and $epoch_infix
decode_names=(dev${epoch_infix} test${epoch_infix})
wer=$(grep WER $dirname/decode_${decode_names[$n]}/wer* | utils/best_wer.sh | awk '{print $2}')
printf "% 10s" $wer
done
echo
if $include_looped; then
echo -n "# [looped:] "
for x in $*; do
set_names $x # sets $dirname and $epoch_infix
decode_names=(dev${epoch_infix} test${epoch_infix})
wer=$(grep WER $dirname/decode_looped_${decode_names[$n]}/wer* | utils/best_wer.sh | awk '{print $2}')
printf "% 10s" $wer
done
echo
fi
done


if $used_epochs; then
exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems.
fi

echo -n "# Final train prob "
for x in $*; do
prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo

echo -n "# Final valid prob "
for x in $*; do
prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo

echo -n "# Final train prob (xent)"
for x in $*; do
prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo

echo -n "# Final valid prob (xent)"
for x in $*; do
prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done

echo
217 changes: 217 additions & 0 deletions egs/fisher_english/s5/local/chain/run_tdnn.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
#!/bin/bash
set -e

# Based on run_tdnn_7b.sh in the fisher swbd recipe

# Results on a 350hr random subset of fisher english:
# local/chain/compare_wer_general.sh exp/chain_350k/tdnn7b_sp
# System tdnn7b_sp
# WER on dev 17.74
# WER on test 17.57
# Final train prob -0.1128
# Final valid prob -0.1251
# Final train prob (xent) -1.7908
# Final valid prob (xent) -1.7712

# steps/info/nnet3_dir_info.pl exp/chain_350k/tdnn7b_sp
# exp/chain_350k/tdnn7b_sp: num-iters=319 nj=3..16 num-params=22.1M dim=40+100->8617 combine=-0.14->-0.13



# configs for 'chain'
stage=0
tdnn_affix=7b
train_stage=-10
get_egs_stage=-10
decode_iter=
train_set=train
tree_affix=
nnet3_affix=
xent_regularize=0.1
hidden_dim=725
num_leaves=11000

# training options
num_epochs=4
remove_egs=false
common_egs_dir=
minibatch_size=128
num_jobs_initial=3
num_jobs_final=16
initial_effective_lrate=0.001
final_effective_lrate=0.0001
frames_per_iter=1500000

gmm=tri5a
build_tree_ali_dir=exp/tri4a_ali # used to make a new tree for chain topology, should match train data
# End configuration section.
echo "$0 $@" # Print the command line for logging

. ./cmd.sh
. ./path.sh
. ./utils/parse_options.sh

if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
fi


gmm_dir=exp/$gmm # used to get training lattices (for chain supervision)
treedir=exp/chain${nnet3_affix}/tree_${tree_affix}
lat_dir=exp/chain${nnet3_affix}/tri5a_${train_set}_sp_lats # training lattices directory
dir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
train_data_dir=data/${train_set}_sp_hires
lores_train_data_dir=data/${train_set}_sp
build_tree_train_data_dir=data/${train_set}
train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
lang=data/lang_chain


# The iVector-extraction and feature-dumping parts are the same as the standard
# nnet3 setup, and you can skip them by setting "--stage 8" if you have already
# run those things.
local/nnet3/run_ivector_common.sh --stage $stage \
--speed-perturb true \
--train-set $train_set \
--nnet3-affix $nnet3_affix \
--generate-alignments false || exit 1;

if [ $stage -le 9 ]; then
# Get the alignments as lattices (gives the chain training more freedom).
# use the same num-jobs as the alignments
nj=$(cat $build_tree_ali_dir/num_jobs) || exit 1;
steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" $lores_train_data_dir \
data/lang $gmm_dir $lat_dir || exit 1;
rm $lat_dir/fsts.*.gz # save space
fi

if [ $stage -le 10 ]; then
# Create a version of the lang/ directory that has one state per phone in the
# topo file. [note, it really has two states.. the first one is only repeated
# once, the second one has zero or more repeats.]
rm -rf $lang
cp -r data/lang $lang
silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
# Use our special topology... note that later on may have to tune this
# topology.
steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
fi

if [ $stage -le 11 ]; then
# Build a tree using our new topology.
steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
--leftmost-questions-truncate -1 \
--cmd "$train_cmd" $num_leaves $build_tree_train_data_dir $lang $build_tree_ali_dir $treedir || exit 1;
fi

if [ $stage -le 12 ]; then
echo "$0: creating neural net configs using the xconfig parser";

num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)

mkdir -p $dir/configs
cat <<EOF > $dir/configs/network.xconfig
input dim=100 name=ivector
input dim=40 name=input
# please note that it is important to have input layer with the name=input
# as the layer immediately preceding the fixed-affine-layer to enable
# the use of short notation for the descriptor
fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
# the first splicing is moved before the lda layer, so no splicing here
relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
## adding the layers for chain branch
relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
# adding the layers for xent branch
# This block prints the configs for a separate output that will be
# trained with a cross-entropy objective in the 'chain' models... this
# has the effect of regularizing the hidden parts of the model. we use
# 0.5 / args.xent_regularize as the learning rate factor- the factor of
# 0.5 / args.xent_regularize is suitable as it means the xent
# final-layer learns at a rate independent of the regularization
# constant; and the 0.5 was tuned so as to make the relative progress
# similar in the xent and regular final layers.
relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
EOF
steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
fi

if [ $stage -le 13 ]; then
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
utils/create_split_dir.pl \
/export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage
fi

touch $dir/egs/.nodelete # keep egs around when that run dies.

steps/nnet3/chain/train.py --stage $train_stage \
--egs.dir "$common_egs_dir" \
--cmd "$decode_cmd" \
--feat.online-ivector-dir $train_ivector_dir \
--feat.cmvn-opts "--norm-means=false --norm-vars=false" \
--chain.xent-regularize 0.1 \
--chain.leaky-hmm-coefficient 0.1 \
--chain.l2-regularize 0.00005 \
--chain.apply-deriv-weights false \
--chain.lm-opts="--num-extra-lm-states=2000" \
--egs.stage $get_egs_stage \
--egs.opts "--frames-overlap-per-eg 0" \
--egs.chunk-width 150 \
--trainer.num-chunk-per-minibatch $minibatch_size \
--trainer.frames-per-iter $frames_per_iter \
--trainer.num-epochs $num_epochs \
--trainer.optimization.num-jobs-initial $num_jobs_initial \
--trainer.optimization.num-jobs-final $num_jobs_final \
--trainer.optimization.initial-effective-lrate $initial_effective_lrate \
--trainer.optimization.final-effective-lrate $final_effective_lrate \
--trainer.max-param-change 2.0 \
--cleanup.remove-egs $remove_egs \
--feat-dir $train_data_dir \
--tree-dir $treedir \
--lat-dir $lat_dir \
--dir $dir || exit 1;
fi

graph_dir=$dir/graph
if [ $stage -le 14 ]; then
# Note: it might appear that this $lang directory is mismatched, and it is as
# far as the 'topo' is concerned, but this script doesn't read the 'topo' from
# the lang directory.
utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
fi

decode_suff=
if [ $stage -le 15 ]; then
iter_opts=
if [ ! -z $decode_iter ]; then
iter_opts=" --iter $decode_iter "
fi
for decode_set in dev test; do
(
num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
--nj $num_jobs --cmd "$decode_cmd" $iter_opts \
--online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
$graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1;
) &
done
fi
wait;
exit 0;
Loading

0 comments on commit 3879152

Please sign in to comment.