Merge branch 'master' of https://github.com/kaldi-asr/kaldi

* 'master' of https://github.com/kaldi-asr/kaldi: [src,scripts] nnet1-related changes: (kaldi-asr#1998) [egs] rotation for image augmentation in CIFAR example (kaldi-asr#1955) [egs] Minor fixes to the SRE16 v2 recipe (kaldi-asr#1986) [egs] Remove deprecated non-working scripts [scripts] Fixes to segment_long_utterances.sh (thanks @christophfeinauer) and train_raw_dnn.py (kaldi-asr#1993) [src] Minor fix: change to error message (kaldi-asr#1980) [egs] Add example of component-level l2-regularize for WSJ scripts [egs] Small fix to Chime4 RE data location (kaldi-asr#1966) [build] Remove download of ATLAS header files from tools/. (kaldi-asr#1974)
kronos-cm · Nov 4, 2017 · af99921 · af99921
2 parents a56c11c + c0c213f
commit af99921
Show file tree

Hide file tree

Showing 33 changed files with 1,602 additions and 474 deletions.
diff --git a/egs/chime4/s5_1ch/local/real_noisy_chime4_data_prep.sh b/egs/chime4/s5_1ch/local/real_noisy_chime4_data_prep.sh
@@ -31,7 +31,7 @@ echo "$0 $@"  # Print the command line for logging
 audio_dir=$1/data/audio/16kHz/isolated
 trans_dir=$1/data/transcriptions
 
-echo "extract 5th channel (CH5.wav, the center bottom edge in the front of the tablet) for noisy data"
+echo "extract all channels (CH[1-6].wav) for noisy data"
 
 dir=`pwd`/data/local/data
 lmdir=`pwd`/data/local/nist_lm

diff --git a/egs/chime4/s5_1ch/local/simu_noisy_chime4_data_prep.sh b/egs/chime4/s5_1ch/local/simu_noisy_chime4_data_prep.sh
@@ -31,7 +31,7 @@ echo "$0 $@"  # Print the command line for logging
 audio_dir=$1/data/audio/16kHz/isolated
 trans_dir=$1/data/transcriptions
 
-echo "extract 5th channel (CH5.wav, the center bottom edge in the front of the tablet) for noisy data"
+echo "extract all channels (CH[1-6].wav) for noisy data"
 
 dir=`pwd`/data/local/data
 lmdir=`pwd`/data/local/nist_lm

diff --git a/egs/chime4/s5_1ch/run.sh b/egs/chime4/s5_1ch/run.sh
@@ -40,8 +40,8 @@ chime4_data=`pwd`/../..
 # Otherwise, please specify it, e.g.,
 chime4_data=/db/laputa1/data/processed/public/CHiME4
 
-case $(hostname) in *.clsp.jhu.edu)
-  chime4_data=/export/corpora4/CHiME4/CHiME3 ;; # JHU,
+case $(hostname -f) in
+  *.clsp.jhu.edu) chime4_data=/export/corpora4/CHiME4/CHiME3 ;; # JHU,
 esac 
 
 if [ ! -d $chime4_data ]; then

diff --git a/egs/chime4/s5_2ch/run.sh b/egs/chime4/s5_2ch/run.sh
@@ -41,8 +41,8 @@ chime4_data=`pwd`/../..
 # Otherwise, please specify it, e.g.,
 chime4_data=/db/laputa1/data/processed/public/CHiME4
 
-case $(hostname) in *.clsp.jhu.edu)
-  chime4_data=/export/corpora4/CHiME4/CHiME3 ;; # JHU,
+case $(hostname -f) in
+  *.clsp.jhu.edu) chime4_data=/export/corpora4/CHiME4/CHiME3 ;; # JHU,
 esac 
 
 if [ ! -d $chime4_data ]; then

diff --git a/egs/chime4/s5_6ch/run.sh b/egs/chime4/s5_6ch/run.sh
@@ -40,8 +40,8 @@ chime4_data=`pwd`/../..
 # Otherwise, please specify it, e.g.,
 chime4_data=/db/laputa1/data/processed/public/CHiME4
 
-case $(hostname) in *.clsp.jhu.edu)
-  chime4_data=/export/corpora4/CHiME4/CHiME3 ;; # JHU,
+case $(hostname -f) in
+  *.clsp.jhu.edu) chime4_data=/export/corpora4/CHiME4/CHiME3 ;; # JHU,
 esac 
 
 if [ ! -d $chime4_data ]; then

diff --git a/egs/cifar/v1/local/nnet3/run_resnet_1d.sh b/egs/cifar/v1/local/nnet3/run_resnet_1d.sh
@@ -0,0 +1,142 @@
+#!/bin/bash
+
+# 1d is as 1c but adding rotation in image augmentation.
+
+# local/nnet3/compare.sh exp/resnet1c_cifar10 exp/resnet1d_cifar10
+# System                 resnet1c_cifar10   resnet1d_cifar10
+# final test accuracy:        0.9514           0.9537      
+# final train accuracy:            1           0.9966
+# final test objf:         -0.157244          -0.139607
+# final train objf:      -0.00751868         -0.0219607
+# num-parameters:           1322730            1322730
+
+# local/nnet3/compare.sh exp/resnet1c_cifar100 exp/resnet1d_cifar100
+# System                 resnet1c_cifar100 resnet1d_cifar100
+# final test accuracy:       0.7627        0.7687
+# final train accuracy:       0.96         0.9276
+# final test objf:         -0.862205    -0.812203
+# final train objf:        -0.174973    -0.265734
+# num-parameters:           1345860       1345860
+# steps/info/nnet3_dir_info.pl exp/resnet1c_cifar10{,0}
+# exp/resnet1d_cifar10: num-iters=133 nj=1..2 num-params=1.3M dim=96->10 combine=-0.04->-0.03 loglike:train/valid[87,132,final]=(-0.153,-0.044,-0.022/-0.25,-0.173,-0.140) accuracy:train/valid[87,132,final]=(0.946,0.9880,0.9966/0.921,0.946,0.954)
+# exp/resnet1d_cifar100: num-iters=133 nj=1..2 num-params=1.3M dim=96->100 combine=-0.33->-0.29 loglike:train/valid[87,132,final]=(-0.81,-0.37,-0.27/-1.15,-0.95,-0.81) accuracy:train/valid[87,132,final]=(0.760,0.897,0.928/0.68,0.737,0.769)
+
+# Set -e here so that we catch if any executable fails immediately
+set -euo pipefail
+
+
+
+# training options
+stage=0
+train_stage=-10
+dataset=cifar10
+srand=0
+reporting_email=
+affix=1d
+
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+
+
+dir=exp/resnet${affix}_${dataset}
+
+egs=exp/${dataset}_egs2
+
+if [ ! -d $egs ]; then
+  echo "$0: expected directory $egs to exist.  Run the get_egs.sh commands in the"
+  echo "    run.sh before this script."
+  exit 1
+fi
+
+# check that the expected files are in the egs directory.
+
+for f in $egs/egs.1.ark $egs/train_diagnostic.egs $egs/valid_diagnostic.egs $egs/combine.egs \
+         $egs/info/feat_dim $egs/info/left_context $egs/info/right_context \
+         $egs/info/output_dim; do
+  if [ ! -e $f ]; then
+    echo "$0: expected file $f to exist."
+    exit 1;
+  fi
+done
+
+
+mkdir -p $dir/log
+
+
+if [ $stage -le 1 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(cat $egs/info/output_dim)
+
+  # Note: we hardcode in the CNN config that we are dealing with 32x3x color
+  # images.
+
+
+  nf1=48
+  nf2=96
+  nf3=256
+  nb3=128
+
+  a="num-minibatches-history=40.0"
+  common="$a required-time-offsets=0 height-offsets=-1,0,1"
+  res_opts="$a bypass-source=batchnorm"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=96 name=input
+  conv-layer name=conv1 $a height-in=32 height-out=32 time-offsets=-1,0,1 required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=$nf1
+  res-block name=res2 num-filters=$nf1 height=32 time-period=1 $res_opts
+  res-block name=res3 num-filters=$nf1 height=32 time-period=1 $res_opts
+  conv-layer name=conv4 height-in=32 height-out=16 height-subsample-out=2 time-offsets=-1,0,1 $common num-filters-out=$nf2
+  res-block name=res5 num-filters=$nf2 height=16 time-period=2 $res_opts
+  res-block name=res6 num-filters=$nf2 height=16 time-period=2 $res_opts
+  conv-layer name=conv7 height-in=16 height-out=8 height-subsample-out=2 time-offsets=-2,0,2 $common num-filters-out=$nf3
+  res-block name=res8 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+  res-block name=res9 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+  res-block name=res10 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+  channel-average-layer name=channel-average input=Append(2,6,10,14,18,22,24,28) dim=$nf3
+  output-layer name=output learning-rate-factor=0.1 dim=$num_targets
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 2 ]; then
+
+  steps/nnet3/train_raw_dnn.py --stage=$train_stage \
+    --cmd="$train_cmd" \
+    --image.augmentation-opts="--horizontal-flip-prob=0.5 --horizontal-shift=0.1 --vertical-shift=0.1 --rotation-degree=30 --rotation-prob=0.5 --num-channels=3" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=100 \
+    --egs.frames-per-eg=1 \
+    --trainer.optimization.num-jobs-initial=1 \
+    --trainer.optimization.num-jobs-final=2 \
+    --trainer.optimization.initial-effective-lrate=0.003 \
+    --trainer.optimization.final-effective-lrate=0.0003 \
+    --trainer.optimization.minibatch-size=256,128,64 \
+    --trainer.optimization.proportional-shrink=50.0 \
+    --trainer.shuffle-buffer-size=2000 \
+    --egs.dir="$egs" \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --dir=$dir  || exit 1;
+fi
+
+
+exit 0;
diff --git a/egs/cifar/v1/local/nnet3/run_resnet_1e.sh b/egs/cifar/v1/local/nnet3/run_resnet_1e.sh
@@ -0,0 +1,142 @@
+#!/bin/bash
+
+# 1e is as 1d but with more filters and epochs.
+
+# local/nnet3/compare.sh exp/resnet1d_cifar10/ exp/resnet1e_cifar10/
+# System                resnet1d_cifar10 resnet1e_cifar10
+# final test accuracy:       0.9537      0.9583
+# final train accuracy:       0.9966      0.9994
+# final test objf:         -0.139607   -0.124945
+# final train objf:       -0.0219607 -0.00603407
+# num-parameters:           1322730     3465194
+
+# local/nnet3/compare.sh exp/resnet1d_cifar100 exp/resnet1e_cifar100
+# System                resnet1d_cifar100 resnet1e_cifar100
+# final test accuracy:       0.7687      0.7914
+# final train accuracy:       0.9276     0.9922
+# final test objf:         -0.812203   -0.786857
+# final train objf:        -0.265734   -0.0514912
+# num-parameters:           1345860     3511364
+# steps/info/nnet3_dir_info.pl exp/resnet1c_cifar10{,0}
+# exp/resnet1e_cifar10: num-iters=186 nj=1..2 num-params=3.5M dim=96->10 combine=-0.01->-0.01 loglike:train/valid[123,185,final]=(-0.109,-0.026,-0.0060/-0.21,-0.167,-0.125) accuracy:train/valid[123,185,final]=(0.963,0.9936,0.9994/0.930,0.949,0.958)
+# exp/resnet1e_cifar100/: num-iters=186 nj=1..2 num-params=3.5M dim=96->100 combine=-0.09->-0.07 loglike:train/valid[123,185,final]=(-0.53,-0.109,-0.051/-1.06,-0.93,-0.79) accuracy:train/valid[123,185,final]=(0.844,0.9730,0.9922/0.713,0.760,0.791)
+
+# Set -e here so that we catch if any executable fails immediately
+set -euo pipefail
+
+
+
+# training options
+stage=0
+train_stage=-10
+dataset=cifar10
+srand=0
+reporting_email=
+affix=1e
+
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+
+
+dir=exp/resnet${affix}_${dataset}
+
+egs=exp/${dataset}_egs2
+
+if [ ! -d $egs ]; then
+  echo "$0: expected directory $egs to exist.  Run the get_egs.sh commands in the"
+  echo "    run.sh before this script."
+  exit 1
+fi
+
+# check that the expected files are in the egs directory.
+
+for f in $egs/egs.1.ark $egs/train_diagnostic.egs $egs/valid_diagnostic.egs $egs/combine.egs \
+         $egs/info/feat_dim $egs/info/left_context $egs/info/right_context \
+         $egs/info/output_dim; do
+  if [ ! -e $f ]; then
+    echo "$0: expected file $f to exist."
+    exit 1;
+  fi
+done
+
+
+mkdir -p $dir/log
+
+
+if [ $stage -le 1 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(cat $egs/info/output_dim)
+
+  # Note: we hardcode in the CNN config that we are dealing with 32x3x color
+  # images.
+
+
+  nf1=48
+  nf2=96
+  nf3=512
+  nb3=256
+
+  a="num-minibatches-history=40.0"
+  common="$a required-time-offsets=0 height-offsets=-1,0,1"
+  res_opts="$a bypass-source=batchnorm"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=96 name=input
+  conv-layer name=conv1 $a height-in=32 height-out=32 time-offsets=-1,0,1 required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=$nf1
+  res-block name=res2 num-filters=$nf1 height=32 time-period=1 $res_opts
+  res-block name=res3 num-filters=$nf1 height=32 time-period=1 $res_opts
+  conv-layer name=conv4 height-in=32 height-out=16 height-subsample-out=2 time-offsets=-1,0,1 $common num-filters-out=$nf2
+  res-block name=res5 num-filters=$nf2 height=16 time-period=2 $res_opts
+  res-block name=res6 num-filters=$nf2 height=16 time-period=2 $res_opts
+  conv-layer name=conv7 height-in=16 height-out=8 height-subsample-out=2 time-offsets=-2,0,2 $common num-filters-out=$nf3
+  res-block name=res8 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+  res-block name=res9 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+  res-block name=res10 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+  channel-average-layer name=channel-average input=Append(2,6,10,14,18,22,24,28) dim=$nf3
+  output-layer name=output learning-rate-factor=0.1 dim=$num_targets
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 2 ]; then
+
+  steps/nnet3/train_raw_dnn.py --stage=$train_stage \
+    --cmd="$train_cmd" \
+    --image.augmentation-opts="--horizontal-flip-prob=0.5 --horizontal-shift=0.1 --vertical-shift=0.1 --rotation-degree=30 --rotation-prob=0.5 --num-channels=3" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=140 \
+    --egs.frames-per-eg=1 \
+    --trainer.optimization.num-jobs-initial=1 \
+    --trainer.optimization.num-jobs-final=2 \
+    --trainer.optimization.initial-effective-lrate=0.003 \
+    --trainer.optimization.final-effective-lrate=0.0003 \
+    --trainer.optimization.minibatch-size=256,128,64 \
+    --trainer.optimization.proportional-shrink=50.0 \
+    --trainer.shuffle-buffer-size=2000 \
+    --egs.dir="$egs" \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --dir=$dir  || exit 1;
+fi
+
+
+exit 0;
diff --git a/egs/sre16/v1/local/nnet3/xvector/prepare_feats_for_egs.sh b/egs/sre16/v1/local/nnet3/xvector/prepare_feats_for_egs.sh
@@ -41,30 +41,42 @@ done
 # Set various variables.
 mkdir -p $dir/log
 mkdir -p $data_out
-featdir=${PWD}/$dir
+featdir=$(utils/make_absolute.sh $dir)
 
-cp $data_in/utt2spk $data_out/utt2spk
-cp $data_in/spk2utt $data_out/spk2utt
-cp $data_in/wav.scp $data_out/wav.scp
+if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+  utils/create_split_dir.pl \
+    /export/b{14,15,16,17}/$USER/kaldi-data/egs/sre16/v2/xvector-$(date +'%m_%d_%H_%M')/xvector_feats/storage $featdir/storage
+fi
 
 for n in $(seq $nj); do
   # the next command does nothing unless $featdir/storage/ exists, see
   # utils/create_data_link.pl for more info.
-  utils/create_data_link.pl $featdir/xvector_feats_${name}.$n.ark
+  utils/create_data_link.pl $featdir/xvector_feats_${name}.${n}.ark
 done
 
+cp $data_in/utt2spk $data_out/utt2spk
+cp $data_in/spk2utt $data_out/spk2utt
+cp $data_in/wav.scp $data_out/wav.scp
+
+write_num_frames_opt="--write-num-frames=ark,t:$featdir/log/utt2num_frames.JOB"
+
 sdata_in=$data_in/split$nj;
 utils/split_data.sh $data_in $nj || exit 1;
 
 $cmd JOB=1:$nj $dir/log/create_xvector_feats_${name}.JOB.log \
   apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=$cmn_window \
   scp:${sdata_in}/JOB/feats.scp ark:- \| \
   select-voiced-frames ark:- scp,s,cs:${sdata_in}/JOB/vad.scp ark:- \| \
-  copy-feats --compress=$compress ark:- \
+  copy-feats --compress=$compress $write_num_frames_opt ark:- \
   ark,scp:$featdir/xvector_feats_${name}.JOB.ark,$featdir/xvector_feats_${name}.JOB.scp || exit 1;
 
 for n in $(seq $nj); do
   cat $featdir/xvector_feats_${name}.$n.scp || exit 1;
 done > ${data_out}/feats.scp || exit 1
 
+for n in $(seq $nj); do
+  cat $featdir/log/utt2num_frames.$n || exit 1;
+done > $data_out/utt2num_frames || exit 1
+rm $featdir/log/utt2num_frames.*
+
 echo "$0: Succeeded creating xvector features for $name"