small changes

kaldi-asr · danpovey · Nov 23, 2017 · Sep 18, 2017 · Sep 20, 2017 · Sep 22, 2017
commit dc4970981377e926f979f094e51d7cd07636a085
diff --git a/egs/swbd/s5/local/rnnlm/run_rescoring.sh b/egs/swbd/s5/local/rnnlm/run_rescoring.sh
@@ -11,36 +11,21 @@ id=rnn
 
 set -e
 
-#[ ! -f $rnndir/rnnlm ] && echo "Can't find RNNLM model" && exit 1;
-
 LM=fsh_sw1_tg
-rnndir=exp/rnnlm_lstm_h650_a
+rnndir=exp/rnnlm_lstm_d
 
-#ln -s final.raw $rnndir/rnnlm 2>/dev/null
-touch $rnndir/unk.probs
 
 for decode_set in eval2000; do
   dir=exp/chain/tdnn_lstm_1e_sp
   decode_dir=${dir}/decode_${decode_set}_$LM
 
-  # N-best rescoring
-#  steps/rnnlmrescore.sh \
-#    --rnnlm-ver nnet3 \
-#    --N $n --cmd "$decode_cmd --mem 16G" --inv-acwt 10 0.5 \
-#    data/lang_$LM $rnndir \
-#    data/$mic/$decode_set ${decode_dir} \
-#    ${decode_dir}.$id.$n-best  &
-#
-#  continue
-
-  # will implement later
   # Lattice rescoring
   steps/lmrescore_rnnlm_lat.sh \
     --cmd "$decode_cmd --mem 16G" \
     --rnnlm-ver kaldirnnlm  --weight 0.5 --max-ngram-order $ngram_order \
     data/lang_$LM $rnndir \
     data/${decode_set}_hires ${decode_dir} \
-    ${decode_dir}.rnnlm.keli.nnet3rnnlm.lat.${ngram_order}gram
+    ${decode_dir}.nnet3rnnlm.lat.${ngram_order}gram
 
 done
 

diff --git a/egs/swbd/s5/local/score.sh b/egs/swbd/s5/local/score.sh
@@ -31,7 +31,7 @@ data=$1
 
 if [ -f $data/stm ]; then # use sclite scoring.
   echo "$data/stm exists: using local/score_sclite.sh"
-  eval local/score_sclite.sh $orig_args
+  eval local/score_sclite.sh "$orig_args"
 else
   echo "$data/stm does not exist: using local/score_basic.sh"
   eval local/score_basic.sh $orig_args

diff --git a/scripts/rnnlm/train_rnnlm.sh b/scripts/rnnlm/train_rnnlm.sh
@@ -64,16 +64,19 @@ num_splits=$(cat $dir/text/info/num_splits)
 num_repeats=$(cat $dir/text/info/num_repeats)
 text_files=$(for n in $(seq $num_splits); do echo $dir/text/$n.txt; done)
 vocab_size=$(tail -n 1 $dir/config/words.txt | awk '{print $NF + 1}')
+embedding_type=
 
 if [ -f $dir/feat_embedding.0.mat ]; then
   sparse_features=true
+  embedding_type=feat_embedding
   if [ -f $dir/word_embedding.0.mat ]; then
     echo "$0: error: $dir/feat_embedding.0.mat and $dir/word_embedding.0.mat both exist."
     exit 1;
   fi
   ! [ -f $dir/word_feats.txt ] && echo "$0: expected $0/word_feats.txt to exist" && exit 1;
 else
   sparse_features=false
+  embedding_type=word_embedding
   ! [ -f $dir/word_embedding.0.mat ] && \
     echo "$0: expected $dir/word_embedding.0.mat to exist" && exit 1
 fi
@@ -192,7 +195,7 @@ while [ $x -lt $num_iters ]; do
       [ -f $dir/.train_error ] && \
         echo "$0: failure on iteration $x of training, see $dir/log/train.$x.*.log for details." && exit 1
       if [ $this_num_jobs -gt 1 ]; then
-        # average the models and the embedding matrces.  Use run.pl as we don't
+        # average the models and the embedding matrces.  Use run.pl as we don\'t
         # want this to wait on the queue (if there is a queue).
         src_models=$(for n in $(seq $this_num_jobs); do echo $dir/$[x+1].$n.raw; done)
         src_matrices=$(for n in $(seq $this_num_jobs); do echo $dir/${embedding_type}.$[x+1].$n.mat; done)
@@ -218,8 +221,11 @@ if [ $stage -le $num_iters ]; then
   echo "$0: best iteration (out of $num_iters) was $best_iter, linking it to final iteration."
   ln -sf $embedding_type.$best_iter.mat $dir/$embedding_type.final.mat
   ln -sf $best_iter.raw $dir/final.raw
+  ln -sf $best_iter.raw $dir/rnnlm  # to make it consistent with other RNNLMs
 fi
 
+touch $dir/unk.probs
+
 # Now get some diagnostics about the evolution of the objective function.
 if [ $stage -le $[num_iters+1] ]; then
   (

diff --git a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
@@ -40,11 +40,11 @@ int main(int argc, char *argv[]) {
         "composing with the wrapped LM using a special type of composition\n"
         "algorithm. Determinization will be applied on the composed lattice.\n"
         "\n"
-        "Usage: lattice-lmrescore-nnet3-rnnlm [options] <rnnlm-wordlist> \\\n"
+        "Usage: lattice-lmrescore-kaldi-rnnlm [options] <embedding-file> <rnnlm-wordlist> \\\n"
         "             <word-symbol-table-rxfilename> <lattice-rspecifier> \\\n"
-        "             <rnnlm-rxfilename> <lattice-wspecifier>\n"
-        " e.g.: lattice-lmrescore-nnet3-rnnlm --lm-scale=-1.0 words.txt \\\n"
-        "                     ark:in.lats rnnlm ark:out.lats\n";
+        "             <raw-rnnlm-rxfilename> <lattice-wspecifier>\n"
+        " e.g.: lattice-lmrescore-kaldi-rnnlm --lm-scale=-1.0 word_embedding.mat \\\n"
+        "       rnn_words.txt fst_words.txt ark:in.lats rnnlm ark:out.lats\n";
 
     ParseOptions po(usage);
     int32 max_ngram_order = 3;

diff --git a/src/rnnlm/rnnlm-decodable-simple-looped.cc b/src/rnnlm/rnnlm-decodable-simple-looped.cc
@@ -2,6 +2,7 @@
 
 // Copyright      2017  Johns Hopkins University (author: Daniel Povey)
 //                2017  Yiming Wang
+//                2017  Hainan Xu
 
 // See ../../COPYING for clarification regarding multiple authors
 //

diff --git a/src/rnnlm/rnnlm-decodable-simple-looped.h b/src/rnnlm/rnnlm-decodable-simple-looped.h
@@ -2,6 +2,7 @@
 
 // Copyright 2017 Johns Hopkins University (author: Daniel Povey)
 //           2017 Yiming Wang
+//           2017 Hainan Xu
 
 // See ../../COPYING for clarification regarding multiple authors
 //