Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nnet1 dropout ivec #1090

Merged
merged 3 commits into from
Oct 4, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 26 additions & 10 deletions egs/ami/s5/RESULTS_ihm
Original file line number Diff line number Diff line change
Expand Up @@ -23,30 +23,46 @@ exp/ihm/tri4a_mmi_b0.1/decode_eval_3.mdl_ami_fsh.o3g.kn.pr1-7/ascore_12/eval_o4.

# Karel, JSALT 2015, (21.7.2015)

# dev,
## GMM,
# GMM - dev,
%WER 38.1 | 13098 94489 | 67.1 20.6 12.2 5.2 38.1 67.0 | exp/ihm/tri2a/decode_dev_ami_fsh.o3g.kn.pr1-7/ascore_13/dev.ctm.filt.sys
%WER 35.5 | 13098 94487 | 69.6 19.0 11.4 5.1 35.5 65.8 | exp/ihm/tri3a/decode_dev_ami_fsh.o3g.kn.pr1-7/ascore_13/dev.ctm.filt.sys
%WER 32.2 | 13098 94483 | 72.5 17.2 10.3 4.8 32.2 63.8 | exp/ihm/tri4a/decode_dev_ami_fsh.o3g.kn.pr1-7/ascore_14/dev.ctm.filt.sys #0.1% worse than Pawel!
%WER 30.2 | 13098 94479 | 74.0 15.6 10.4 4.2 30.2 61.9 | exp/ihm/tri4a_mmi_b0.1/decode_dev_3.mdl_ami_fsh.o3g.kn.pr1-7/ascore_12/dev.ctm.filt.sys
## DNN-Xent,
%WER 26.0 | 13098 94483 | 77.9 13.5 8.5 4.0 26.0 58.4 | exp/ihm/dnn4_pretrain-dbn_dnn/decode_dev_ami_fsh.o3g.kn.pr1-7/ascore_11/dev.ctm.filt.sys
## DNN-sMBR,
%WER 24.9 | 13098 94484 | 79.2 13.2 7.6 4.1 24.9 57.1 | exp/ihm/dnn4_pretrain-dbn_dnn_smbr/decode_dev_ami_fsh.o3g.kn.pr1-7_it1/ascore_11/dev.ctm.filt.sys
%WER 24.3 | 13098 94481 | 79.6 12.6 7.8 3.9 24.3 56.3 | exp/ihm/dnn4_pretrain-dbn_dnn_smbr/decode_dev_ami_fsh.o3g.kn.pr1-7_it4/ascore_12/dev.ctm.filt.sys

# eval,
## GMM,
# GMM - eval,
%WER 43.9 | 12643 89978 | 60.8 25.3 13.9 4.8 43.9 65.6 | exp/ihm/tri2a/decode_eval_ami_fsh.o3g.kn.pr1-7/ascore_14/eval.ctm.filt.sys
%WER 40.8 | 12643 89985 | 63.8 23.6 12.6 4.7 40.8 64.6 | exp/ihm/tri3a/decode_eval_ami_fsh.o3g.kn.pr1-7/ascore_14/eval.ctm.filt.sys
%WER 35.1 | 12643 89975 | 69.1 19.8 11.1 4.2 35.1 61.8 | exp/ihm/tri4a/decode_eval_ami_fsh.o3g.kn.pr1-7/ascore_14/eval.ctm.filt.sys #0.1% worse than Pawel!
%WER 31.7 | 12643 89986 | 72.1 18.0 9.9 3.8 31.7 59.4 | exp/ihm/tri4a_mmi_b0.1/decode_eval_4.mdl_ami_fsh.o3g.kn.pr1-7/ascore_11/eval.ctm.filt.sys

# nnet1 DNN on 'fmllr' system, RBMs 6x2048 neurons (21.7.2015):
## DNN-Xent,
%WER 26.0 | 13098 94483 | 77.9 13.5 8.5 4.0 26.0 58.4 | exp/ihm/dnn4_pretrain-dbn_dnn/decode_dev_ami_fsh.o3g.kn.pr1-7/ascore_11/dev.ctm.filt.sys
%WER 27.1 | 12643 89971 | 76.4 15.5 8.1 3.5 27.1 57.2 | exp/ihm/dnn4_pretrain-dbn_dnn/decode_eval_ami_fsh.o3g.kn.pr1-7/ascore_10/eval.ctm.filt.sys
## DNN-sMBR,
%WER 25.4 | 12643 89974 | 77.9 14.7 7.4 3.3 25.4 55.1 | exp/ihm/dnn4_pretrain-dbn_dnn_smbr/decode_eval_ami_fsh.o3g.kn.pr1-7_it1/ascore_11/eval.ctm.filt.sys
%WER 24.3 | 13098 94481 | 79.6 12.6 7.8 3.9 24.3 56.3 | exp/ihm/dnn4_pretrain-dbn_dnn_smbr/decode_dev_ami_fsh.o3g.kn.pr1-7_it4/ascore_12/dev.ctm.filt.sys
%WER 24.6 | 12643 89972 | 78.8 14.1 7.1 3.3 24.6 54.4 | exp/ihm/dnn4_pretrain-dbn_dnn_smbr/decode_eval_ami_fsh.o3g.kn.pr1-7_it4/ascore_11/eval.ctm.filt.sys

# nnet1 'fmllr + relu + dropout' system (27.9.2016):
## DNN-Xent,
%WER 26.3 | 13098 94489 | 77.6 13.6 8.7 3.9 26.3 59.2 | 0.005 | exp/ihm/dnn4d-6L1024-relu/decode_dev/ascore_11/dev.ctm.filt.sys
%WER 27.2 | 12643 89970 | 75.9 15.0 9.1 3.1 27.2 57.4 | 0.053 | exp/ihm/dnn4d-6L1024-relu/decode_eval/ascore_11/eval.ctm.filt.sys
# => about the same as the larger RBM system,
## DNN-sMBR,
%WER 25.1 | 13098 94474 | 78.6 12.7 8.6 3.7 25.1 57.6 | -0.193 | exp/ihm/dnn4d-6L1024-relu_smbr/decode_dev_ami_fsh.o3g.kn.pr1-7_it4/ascore_15/dev.ctm.filt.sys
%WER 25.2 | 12643 89977 | 77.7 14.1 8.2 2.9 25.2 55.1 | -0.138 | exp/ihm/dnn4d-6L1024-relu_smbr/decode_eval_ami_fsh.o3g.kn.pr1-7_it4/ascore_15/eval.ctm.filt.sys
# => on sMBR worse than the larger RBM system (tuning learning rate did not help),

# nnet1 DNN on 'fmllr + kaldi i-vector per-spk' system (3.10.2016), RBMs 6x2048 neurons:
## DNN-Xent,
%WER 24.7 | 13098 94475 | 79.2 12.7 8.2 3.9 24.7 58.1 | -0.018 | exp/ihm/dnn4_pretrain-dbn-ivec_dnn/decode_dev_ami_fsh.o3g.kn.pr1-7/ascore_11/dev.ctm.filt.sys
%WER 25.2 | 12643 89972 | 78.0 13.8 8.2 3.2 25.2 56.4 | 0.057 | exp/ihm/dnn4_pretrain-dbn-ivec_dnn/decode_eval_ami_fsh.o3g.kn.pr1-7/ascore_11/eval.ctm.filt.sys
## DNN-sMBR,
%WER 23.2 | 13098 94477 | 80.3 11.5 8.2 3.6 23.2 56.3 | 0.010 | exp/ihm/dnn4_pretrain-dbn-ivec_dnn_smbr/decode_dev_ami_fsh.o3g.kn.pr1-7_it4/ascore_13/dev.ctm.filt.sys
%WER 22.8 | 12643 89982 | 80.2 12.8 7.1 3.0 22.8 53.8 | 0.060 | exp/ihm/dnn4_pretrain-dbn-ivec_dnn_smbr/decode_eval_ami_fsh.o3g.kn.pr1-7_it4/ascore_12/eval.ctm.filt.sys
# => The kaldi i-vectors on fMLLR feats are helping nicely,
# (WER not too far from the chain systems, even w/o data augmentation, but with fMLLR),


# Vijay, TDNN results,
for x in exp/$mic/nnet2_online/*/decode*; do [ -d $x ] && [[ $x =~ "$1" ]] && grep Sum $x/ascore_*/*.sys | utils/best_wer.sh; done
Expand Down
69 changes: 69 additions & 0 deletions egs/ami/s5/local/nnet/prepare_ivectors.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/bin/bash

# Copyright 2016, Brno University of Technology (Author: Karel Vesely)
# Apache 2.0

. path.sh
. cmd.sh

train=data_ihm-fmllr-tri4/ihm/train
dev=data_ihm-fmllr-tri4/ihm/dev
eval=data_ihm-fmllr-tri4/ihm/eval
gmm=exp/ihm/tri4a

# Output directory for models and i-vectors,
ivec_absdir=$(readlink -m data_ihm-fmllr-tri4/ihm/ivector)

. utils/parse_options.sh

set -euxo pipefail

# UBM training (1024 components),
ubm=$ivec_absdir/ubm
steps/nnet/ivector/train_diag_ubm.sh --cmd "$train_cmd" --nj 10 \
$train 1024 $ubm

# Training i-vector extractor (100 dims),
iextractor=$ivec_absdir/iextractor
steps/nnet/ivector/train_ivector_extractor.sh --cmd "$train_cmd --mem 5G" --nj 10 \
--ivector-dim 100 $train $ubm $iextractor

# Extracting the i-vectors (per speaker, as the per-utterance copies),
steps/nnet/ivector/extract_ivectors.sh --cmd "$train_cmd" --nj 80 \
$train data/lang $iextractor \
${gmm}_ali $ivec_absdir/ivec_train
steps/nnet/ivector/extract_ivectors.sh --cmd "$train_cmd" --nj 20 \
$dev data/lang $iextractor \
$gmm/decode_dev_ami_fsh.o3g.kn.pr1-7 $ivec_absdir/ivec_dev
steps/nnet/ivector/extract_ivectors.sh --cmd "$train_cmd" --nj 20 \
$eval data/lang $iextractor \
$gmm/decode_eval_ami_fsh.o3g.kn.pr1-7 $ivec_absdir/ivec_eval


# POST-PROCESS PER-SPEAKER I-VECTORS:

# Get the global mean of the i-vectors (train),
ivector-mean scp:$ivec_absdir/ivec_train/ivectors_spk.scp $iextractor/global_mean

# Merge the sets, normalize means, apply length normalization,
cat $ivec_absdir/ivec_train/ivectors_spk-as-utt.scp \
$ivec_absdir/ivec_dev/ivectors_spk-as-utt.scp \
$ivec_absdir/ivec_eval/ivectors_spk-as-utt.scp | \
ivector-subtract-global-mean $iextractor/global_mean scp:- ark:- | \
ivector-normalize-length --scaleup=false ark:- ark,scp:$ivec_absdir/ivectors_spk-as-utt_normalized.ark,$ivec_absdir/ivectors_spk-as-utt_normalized.scp


# POST-PROCESS PER-SENTENCE I-VECTORS:

# Get the global mean of the i-vectors (train, per-sentence),
ivector-mean scp:$ivec_absdir/ivec_train/ivectors_utt.scp $iextractor/global_mean_utt

# Merge the sets, normalize means, apply length normalization,
cat $ivec_absdir/ivec_train/ivectors_utt.scp \
$ivec_absdir/ivec_dev/ivectors_utt.scp \
$ivec_absdir/ivec_eval/ivectors_utt.scp | \
ivector-subtract-global-mean $iextractor/global_mean_utt scp:- ark:- | \
ivector-normalize-length --scaleup=false ark:- ark,scp:$ivec_absdir/ivectors_utt_normalized.ark,$ivec_absdir/ivectors_utt_normalized.scp


exit 0 # Done!
121 changes: 121 additions & 0 deletions egs/ami/s5/local/nnet/run_dnn_ivec.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#!/bin/bash -u

. ./cmd.sh
. ./path.sh

# DNN training. This script is based on egs/swbd/s5b/local/run_dnn.sh
# Shinji Watanabe, Karel Vesely,

# Config:
nj=80
nj_decode=30
stage=0 # resume training with --stage=N
. utils/parse_options.sh || exit 1;
#

if [ $# -ne 1 ]; then
printf "\nUSAGE: %s [opts] <mic condition(ihm|sdm|mdm)>\n\n" `basename $0`
exit 1;
fi
mic=$1

gmmdir=exp/$mic/tri4a
data_fmllr=data_${mic}-fmllr-tri4

final_lm=`cat data/local/lm/final_lm`
LM=$final_lm.pr1-7
graph_dir=$gmmdir/graph_${LM}

set -euxo pipefail

# Store fMLLR features, so we can train on them easily,
if [ $stage -le 0 ]; then
# eval
dir=$data_fmllr/$mic/eval
steps/nnet/make_fmllr_feats.sh --nj 15 --cmd "$train_cmd" \
--transform-dir $gmmdir/decode_eval_${LM} \
$dir data/$mic/eval $gmmdir $dir/log $dir/data
# dev
dir=$data_fmllr/$mic/dev
steps/nnet/make_fmllr_feats.sh --nj 15 --cmd "$train_cmd" \
--transform-dir $gmmdir/decode_dev_${LM} \
$dir data/$mic/dev $gmmdir $dir/log $dir/data
# train
dir=$data_fmllr/$mic/train
steps/nnet/make_fmllr_feats.sh --nj 15 --cmd "$train_cmd" \
--transform-dir ${gmmdir}_ali \
$dir data/$mic/train $gmmdir $dir/log $dir/data
# split the data : 90% train 10% cross-validation (held-out)
utils/subset_data_dir_tr_cv.sh $dir ${dir}_tr90 ${dir}_cv10
fi

# Prepare the i-vectors,
if [ $stage -le 1 ]; then
local/nnet/prepare_ivectors.sh
fi

# Pre-train DBN, i.e. a stack of RBMs,
ivector=scp:$data_fmllr/ihm/ivector/ivectors_spk-as-utt_normalized.scp
if [ $stage -le 2 ]; then
dir=exp/$mic/dnn4_pretrain-dbn-ivec
$cuda_cmd $dir/log/pretrain_dbn.log \
steps/nnet/pretrain_dbn.sh --rbm-iter 1 --ivector $ivector \
$data_fmllr/$mic/train $dir
fi

# Train the DNN optimizing per-frame cross-entropy,
if [ $stage -le 3 ]; then
dir=exp/$mic/dnn4_pretrain-dbn-ivec_dnn
ali=${gmmdir}_ali
feature_transform=exp/$mic/dnn4_pretrain-dbn-ivec/final.feature_transform
dbn=exp/$mic/dnn4_pretrain-dbn-ivec/6.dbn
# Train
$cuda_cmd $dir/log/train_nnet.log \
steps/nnet/train.sh --feature-transform $feature_transform --ivector $ivector \
--dbn $dbn --hid-layers 0 --learn-rate 0.008 \
$data_fmllr/$mic/train_tr90 $data_fmllr/$mic/train_cv10 data/lang $ali $ali $dir
# Decode (reuse HCLG graph)
steps/nnet/decode.sh --nj $nj_decode --cmd "$decode_cmd" --config conf/decode_dnn.conf --acwt 0.1 \
--num-threads 3 --ivector $ivector \
$graph_dir $data_fmllr/$mic/dev $dir/decode_dev_${LM}
steps/nnet/decode.sh --nj $nj_decode --cmd "$decode_cmd" --config conf/decode_dnn.conf --acwt 0.1 \
--num-threads 3 --ivector $ivector \
$graph_dir $data_fmllr/$mic/eval $dir/decode_eval_${LM}
fi

# Sequence training using sMBR criterion, we do Stochastic-GD with
# per-utterance updates. We use usually good acwt 0.1.
# Lattices are not regenerated (it is faster).

dir=exp/$mic/dnn4_pretrain-dbn-ivec_dnn_smbr
srcdir=exp/$mic/dnn4_pretrain-dbn-ivec_dnn
acwt=0.1

# Generate lattices and alignments,
if [ $stage -le 4 ]; then
steps/nnet/align.sh --nj $nj --cmd "$train_cmd" --ivector $ivector \
$data_fmllr/$mic/train data/lang $srcdir ${srcdir}_ali
steps/nnet/make_denlats.sh --nj $nj --cmd "$decode_cmd" --ivector $ivector \
--config conf/decode_dnn.conf --acwt $acwt \
$data_fmllr/$mic/train data/lang $srcdir ${srcdir}_denlats
fi

# Re-train the DNN by 4 epochs of sMBR,
if [ $stage -le 5 ]; then
steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 4 --acwt $acwt --do-smbr true \
--ivector $ivector \
$data_fmllr/$mic/train data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir
# Decode (reuse HCLG graph)
for ITER in 4 1; do
steps/nnet/decode.sh --nj $nj_decode --cmd "$decode_cmd" --config conf/decode_dnn.conf \
--nnet $dir/${ITER}.nnet --acwt $acwt --ivector $ivector \
$graph_dir $data_fmllr/$mic/dev $dir/decode_dev_${LM}_it${ITER}
steps/nnet/decode.sh --nj $nj_decode --cmd "$decode_cmd" --config conf/decode_dnn.conf \
--nnet $dir/${ITER}.nnet --acwt $acwt --ivector $ivector \
$graph_dir $data_fmllr/$mic/eval $dir/decode_eval_${LM}_it${ITER}
done
fi

# Getting results [see RESULTS file]
# for x in exp/$mic/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done

119 changes: 119 additions & 0 deletions egs/ami/s5/local/nnet/run_relu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#!/bin/bash -u

. ./cmd.sh
. ./path.sh

# DNN training. This script is based on egs/swbd/s5b/local/run_dnn.sh
# Shinji Watanabe, Karel Vesely,

# Config:
nj=80
nj_decode=30
stage=0 # resume training with --stage=N
. utils/parse_options.sh || exit 1;
#

if [ $# -ne 1 ]; then
printf "\nUSAGE: %s [opts] <mic condition(ihm|sdm|mdm)>\n\n" `basename $0`
exit 1;
fi
mic=$1

gmmdir=exp/$mic/tri4a
data_fmllr=data_${mic}-fmllr-tri4

final_lm=`cat data/local/lm/final_lm`
LM=$final_lm.pr1-7
graph_dir=$gmmdir/graph_${LM}

set -euxo pipefail

# Store fMLLR features, so we can train on them easily,
if [ $stage -le 0 -a ! -e $data_fmllr/$mic/eval ]; then
# eval
dir=$data_fmllr/$mic/eval
steps/nnet/make_fmllr_feats.sh --nj 15 --cmd "$train_cmd" \
--transform-dir $gmmdir/decode_eval_${LM} \
$dir data/$mic/eval $gmmdir $dir/log $dir/data
# dev
dir=$data_fmllr/$mic/dev
steps/nnet/make_fmllr_feats.sh --nj 15 --cmd "$train_cmd" \
--transform-dir $gmmdir/decode_dev_${LM} \
$dir data/$mic/dev $gmmdir $dir/log $dir/data
# train
dir=$data_fmllr/$mic/train
steps/nnet/make_fmllr_feats.sh --nj 15 --cmd "$train_cmd" \
--transform-dir ${gmmdir}_ali \
$dir data/$mic/train $gmmdir $dir/log $dir/data
# split the data : 90% train 10% cross-validation (held-out)
utils/subset_data_dir_tr_cv.sh $dir ${dir}_tr90 ${dir}_cv10
fi

train=data_ihm-fmllr-tri4/ihm/train
dev=data_ihm-fmllr-tri4/ihm/dev
eval=data_ihm-fmllr-tri4/ihm/eval

lrate=0.00025
param_std=0.02
lr_alpha=1.0
lr_beta=0.75
dropout_schedule=0.2,0.2,0.2,0.2,0.2,0.0
gmm=$gmmdir
graph=$graph_dir

# Train 6 layer DNN from random initialization,
# - Parametric RELU, alphas+betas trained,
# - Dropout retention 0.8 in 5 initial epochs with fixed learning rate,
if [ $stage -le 1 ]; then
# Train the DNN optimizing per-frame cross-entropy.
dir=exp/$mic/dnn4d-6L1024-relu
ali=${gmm}_ali
# Train
$cuda_cmd $dir/log/train_nnet.log \
steps/nnet/train.sh --learn-rate $lrate \
--splice 5 --hid-layers 6 --hid-dim 1024 \
--proto-opts "--activation-type=<ParametricRelu> --activation-opts=<AlphaLearnRateCoef>_${lr_alpha}_<BetaLearnRateCoef>_${lr_beta} --param-stddev-factor $param_std --hid-bias-mean 0 --hid-bias-range 0 --with-dropout --no-glorot-scaled-stddev --no-smaller-input-weights" \
--scheduler-opts "--keep-lr-iters 5 --dropout-schedule $dropout_schedule" \
${train}_tr90 ${train}_cv10 data/lang $ali $ali $dir
# Decode (reuse HCLG graph)
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --acwt 0.1 \
$graph $dev $dir/decode_$(basename $dev)
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --acwt 0.1 \
$graph $eval $dir/decode_$(basename $eval)
fi

# Sequence training using sMBR criterion, we do Stochastic-GD with
# per-utterance updates. We use usually good acwt 0.1.
# Lattices are not regenerated (it is faster).

dir=exp/$mic/dnn4d-6L1024-relu_smbr
srcdir=exp/$mic/dnn4d-6L1024-relu
acwt=0.1

# Generate lattices and alignments,
if [ $stage -le 3 ]; then
steps/nnet/align.sh --nj $nj --cmd "$train_cmd" \
$data_fmllr/$mic/train data/lang $srcdir ${srcdir}_ali
steps/nnet/make_denlats.sh --nj $nj --cmd "$decode_cmd" --config conf/decode_dnn.conf \
--acwt $acwt $data_fmllr/$mic/train data/lang $srcdir ${srcdir}_denlats
fi

# Re-train the DNN by 4 epochs of sMBR,
if [ $stage -le 4 ]; then
steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 4 --acwt $acwt --do-smbr true \
--learn-rate 0.0000003 \
$data_fmllr/$mic/train data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir
# Decode (reuse HCLG graph)
for ITER in 4 1; do
steps/nnet/decode.sh --nj $nj_decode --cmd "$decode_cmd" --config conf/decode_dnn.conf \
--nnet $dir/${ITER}.nnet --acwt $acwt \
$graph_dir $data_fmllr/$mic/dev $dir/decode_dev_${LM}_it${ITER}
steps/nnet/decode.sh --nj $nj_decode --cmd "$decode_cmd" --config conf/decode_dnn.conf \
--nnet $dir/${ITER}.nnet --acwt $acwt \
$graph_dir $data_fmllr/$mic/eval $dir/decode_eval_${LM}_it${ITER}
done
fi

# Getting results [see RESULTS file]
# for x in exp/$mic/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done

Loading