diff --git a/egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh b/egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh index 56ad4d043c3..8dd876c2b2c 100755 --- a/egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh +++ b/egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh @@ -34,18 +34,25 @@ final_lm=ami_fsh.o3g.kn LM=$final_lm.pr1-7 if [ $stage -le 3 ]; then -# for decode_set in dev; do for decode_set in dev eval; do basedir=exp/$mic/nnet3/tdnn_sp/ decode_dir=${basedir}/decode_${decode_set} - # Lattice rescoring - steps/lmrescore_rnnlm_lat.sh \ - --cmd "$tfrnnlm_cmd --mem 16G" \ - --rnnlm-ver tensorflow --weight $weight --max-ngram-order $ngram_order \ +# pruned lattice rescoring + steps/tfrnnlm/lmrescore_rnnlm_lat_pruned.sh \ + --cmd "$tfrnnlm_cmd --mem 4G" \ + --weight $weight --max-ngram-order $ngram_order \ data/lang_$LM $dir \ data/$mic/${decode_set}_hires ${decode_dir} \ - ${decode_dir}.unk.fast.tfrnnlm.lat.${ngram_order}gram.$weight & + ${decode_dir}_tfrnnlm_lat_${ngram_order}gram & + +# Lattice rescoring, unpruned (slow) version +# steps/tfrnnlm/lmrescore_rnnlm_lat.sh \ +# --cmd "$tfrnnlm_cmd --mem 4G" \ +# --weight $weight --max-ngram-order $ngram_order \ +# data/lang_$LM $dir \ +# data/$mic/${decode_set}_hires ${decode_dir} \ +# ${decode_dir}_lat_${ngram_order}gram_unpruned & done fi diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1e.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1e.sh index fead2a12152..8367029adaa 100755 --- a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1e.sh +++ b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1e.sh @@ -23,6 +23,7 @@ ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-orde # if it's set, it merges histories in the lattice if they share # the same ngram history and this prevents the lattice from # exploding exponentially +pruned_rescore=true . cmd.sh . utils/parse_options.sh @@ -95,12 +96,17 @@ fi if [ $stage -le 4 ] && $run_rescore; then echo "$0: Perform lattice-rescoring on $ac_model_dir" - LM=sw1_fsh_fg + LM=sw1_fsh_fg # using the 4-gram const arpa file as old lm +# LM=sw1_tg # if using the original 3-gram G.fst as old lm + pruned= + if $pruned_rescore; then + pruned=_pruned + fi for decode_set in eval2000; do decode_dir=${ac_model_dir}/decode_${decode_set}_${LM}_looped # Lattice rescoring - rnnlm/lmrescore.sh \ + rnnlm/lmrescore$pruned.sh \ --cmd "$decode_cmd --mem 4G" \ --weight 0.5 --max-ngram-order $ngram_order \ data/lang_$LM $dir \ diff --git a/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh b/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh index f38fb2628c8..1dbcbe1a192 100755 --- a/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh +++ b/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh @@ -11,12 +11,10 @@ cmd=run.pl skip_scoring=false max_ngram_order=4 -N=10 -inv_acwt=12 -weight=1.0 # Interpolation weight for RNNLM. -# End configuration section. +acwt=0.1 +weight=0.5 # Interpolation weight for RNNLM. rnnlm_ver= -#layer_string= +# End configuration section. echo "$0 $@" # Print the command line for logging @@ -56,11 +54,6 @@ if [ "$rnnlm_ver" == "cuedrnnlm" ]; then first_arg=$rnnlm_dir/rnn.wlist fi -if [ "$rnnlm_ver" == "tensorflow" ]; then - rescoring_binary="lattice-lmrescore-tf-rnnlm" - first_arg="$rnnlm_dir/unk.probs $rnnlm_dir/wordlist.rnn.final" -fi - oldlm=$oldlang/G.fst if [ -f $oldlang/G.carpa ]; then oldlm=$oldlang/G.carpa @@ -70,7 +63,7 @@ elif [ ! -f $oldlm ]; then fi [ ! -f $oldlm ] && echo "$0: Missing file $oldlm" && exit 1; -[ ! -f $rnnlm_dir/rnnlm ] && [ ! -d $rnnlm_dir/rnnlm ] && echo "$0: Missing file $rnnlm_dir/rnnlm" && exit 1; +[ ! -f $rnnlm_dir/rnnlm ] && echo "$0: Missing file $rnnlm_dir/rnnlm" && exit 1; [ ! -f $rnnlm_dir/unk.probs ] &&\ echo "$0: Missing file $rnnlm_dir/unk.probs" && exit 1; [ ! -f $oldlang/words.txt ] &&\ @@ -83,8 +76,6 @@ awk -v n=$0 -v w=$weight 'BEGIN {if (w < 0 || w > 1) { oldlm_command="fstproject --project_output=true $oldlm |" -acwt=`perl -e "print (1.0/$inv_acwt);"` - mkdir -p $outdir/log nj=`cat $indir/num_jobs` || exit 1; cp $indir/num_jobs $outdir @@ -112,7 +103,7 @@ if ! $skip_scoring ; then [ ! -x local/score.sh ] && echo $err_msg && exit 1; local/score.sh --cmd "$cmd" $data $oldlang $outdir else - echo "Not scoring because requested so..." + echo "$0: Not scoring because --skip-scoring was specified." fi exit 0; diff --git a/egs/wsj/s5/steps/tfrnnlm/lmrescore_rnnlm_lat.sh b/egs/wsj/s5/steps/tfrnnlm/lmrescore_rnnlm_lat.sh new file mode 100644 index 00000000000..b84d9a0eef7 --- /dev/null +++ b/egs/wsj/s5/steps/tfrnnlm/lmrescore_rnnlm_lat.sh @@ -0,0 +1,101 @@ +#!/bin/bash + +# Copyright 2015 Guoguo Chen +# 2017 Hainan Xu +# Apache 2.0 + +# This script rescores lattices with RNNLM trained with TensorFlow. +# A faster and more accurate version of the algorithm is at +# steps/tfrnnlm/lmrescore_rnnlm_lat_pruned.sh which is prefered +# One example recipe of this script is at egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh + +# Begin configuration section. +cmd=run.pl +skip_scoring=false +max_ngram_order=4 # Approximate the lattice-rescoring by limiting the max-ngram-order + # if it's set, it merges histories in the lattice if they share + # the same ngram history and this prevents the lattice from + # exploding exponentially. Details of the n-gram approximation + # method are described in section 2.3 of the paper + # http://www.danielpovey.com/files/2018_icassp_lattice_pruning.pdf +weight=0.5 # Interpolation weight for RNNLM. +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +. ./utils/parse_options.sh + +if [ $# != 5 ]; then + echo "Does language model rescoring of lattices (remove old LM, add new LM)" + echo "with TensorFlow RNNLM." + echo "" + echo "Usage: $0 [options] \\" + echo " " + echo " e.g.: $0 data/lang_tg data/tensorflow_lstm data/test \\" + echo " exp/tri3/test_tg exp/tri3/test_tfrnnlm" + echo "options: [--cmd (run.pl|queue.pl [queue opts])]" + exit 1; +fi + +[ -f path.sh ] && . ./path.sh; + +oldlang=$1 +rnnlm_dir=$2 +data=$3 +indir=$4 +outdir=$5 + +oldlm=$oldlang/G.fst +if [ -f $oldlang/G.carpa ]; then + oldlm=$oldlang/G.carpa +elif [ ! -f $oldlm ]; then + echo "$0: expecting either $oldlang/G.fst or $oldlang/G.carpa to exist" &&\ + exit 1; +fi + +echo "$0: using $oldlm as old LM" + +[ ! -d $rnnlm_dir/rnnlm ] && echo "$0: Missing tf model folder $rnnlm_dir/rnnlm" && exit 1; + +for f in $rnnlm_dir/unk.probs $oldlang/words.txt $indir/lat.1.gz; do + [ ! -f $f ] && echo "$0: Missing file $f" && exit 1 +done + +awk -v n=$0 -v w=$weight 'BEGIN {if (w < 0 || w > 1) { + print n": Interpolation weight should be in the range of [0, 1]"; exit 1;}}' \ + || exit 1; + +oldlm_command="fstproject --project_output=true $oldlm |" + +mkdir -p $outdir/log +nj=`cat $indir/num_jobs` || exit 1; +cp $indir/num_jobs $outdir + +oldlm_weight=`perl -e "print -1.0 * $weight;"` +if [ "$oldlm" == "$oldlang/G.fst" ]; then + $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \ + lattice-lmrescore --lm-scale=$oldlm_weight \ + "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm_command" ark:- \| \ + lattice-lmrescore-tf-rnnlm --lm-scale=$weight \ + --max-ngram-order=$max_ngram_order \ + $rnnlm_dir/unk.probs $rnnlm_dir/wordlist.rnn.final $oldlang/words.txt ark:- "$rnnlm_dir/rnnlm" \ + "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1; +else + $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \ + lattice-lmrescore-const-arpa --lm-scale=$oldlm_weight \ + "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm" ark:- \| \ + lattice-lmrescore-tf-rnnlm --lm-scale=$weight \ + --max-ngram-order=$max_ngram_order \ + $rnnlm_dir/unk.probs $rnnlm_dir/wordlist.rnn.final $oldlang/words.txt ark:- "$rnnlm_dir/rnnlm" \ + "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1; +fi +if ! $skip_scoring ; then + err_msg="$0: Not scoring because local/score.sh does not exist or not executable." + [ ! -x local/score.sh ] && echo $err_msg && exit 1; + local/score.sh --cmd "$cmd" $data $oldlang $outdir +else + echo "$0: Not scoring because --skip-scoring was specified." +fi + +exit 0; + diff --git a/egs/wsj/s5/steps/tfrnnlm/lmrescore_rnnlm_lat_pruned.sh b/egs/wsj/s5/steps/tfrnnlm/lmrescore_rnnlm_lat_pruned.sh new file mode 100755 index 00000000000..e098aef85df --- /dev/null +++ b/egs/wsj/s5/steps/tfrnnlm/lmrescore_rnnlm_lat_pruned.sh @@ -0,0 +1,94 @@ +#!/bin/bash + +# Copyright 2015 Guoguo Chen +# 2017 Hainan Xu +# Apache 2.0 + +# This script rescores lattices with RNNLM trained with TensorFlow. +# It uses a pruned algorithm to speed up the runtime and improve the accuracy. +# which is an improved version over steps/tfrnnlm/lmrescore_rnnlm_lat.sh, +# which uses the exact same interface +# The details of the pruning algorithm is described in +# http://www.danielpovey.com/files/2018_icassp_lattice_pruning.pdf +# One example recipe of this script is at egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh + +# Begin configuration section. +cmd=run.pl +skip_scoring=false +max_ngram_order=4 # Approximate the lattice-rescoring by limiting the max-ngram-order + # if it's set, it merges histories in the lattice if they share + # the same ngram history and this prevents the lattice from + # exploding exponentially. Details of the n-gram approximation + # method are described in section 2.3 of the paper + # http://www.danielpovey.com/files/2018_icassp_lattice_pruning.pdf +acwt=0.1 +weight=0.5 # Interpolation weight for RNNLM. +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +. ./utils/parse_options.sh + +if [ $# != 5 ]; then + echo "Does language model rescoring of lattices (remove old LM, add new LM)" + echo "with RNNLM." + echo "" + echo "Usage: $0 [options] \\" + echo " " + echo " e.g.: $0 data/lang_tg data/tensorflow_lstm data/test \\" + echo " exp/tri3/test_tg exp/tri3/test_tfrnnlm" + echo "options: [--cmd (run.pl|queue.pl [queue opts])]" + exit 1; +fi + +[ -f path.sh ] && . ./path.sh; + +oldlang=$1 +rnnlm_dir=$2 +data=$3 +indir=$4 +outdir=$5 + +oldlm=$oldlang/G.fst +carpa_option= + +if [ -f $oldlang/G.carpa ]; then + oldlm=$oldlang/G.carpa + carpa_option="--use-const-arpa=true" +elif [ ! -f $oldlm ]; then + echo "$0: expecting either $oldlang/G.fst or $oldlang/G.carpa to exist" &&\ + exit 1; +fi + +echo "$0: using $oldlm as old LM" + +[ ! -d $rnnlm_dir/rnnlm ] && echo "$0: Missing tf model folder $rnnlm_dir/rnnlm" && exit 1; + +for f in $rnnlm_dir/unk.probs $oldlang/words.txt $indir/lat.1.gz; do + [ ! -f $f ] && echo "$0: Missing file $f" && exit 1 +done + +awk -v n=$0 -v w=$weight 'BEGIN {if (w < 0 || w > 1) { + print n": Interpolation weight should be in the range of [0, 1]"; exit 1;}}' \ + || exit 1; + +mkdir -p $outdir/log +nj=`cat $indir/num_jobs` || exit 1; +cp $indir/num_jobs $outdir + +$cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \ + lattice-lmrescore-tf-rnnlm-pruned --lm-scale=$weight \ + --acoustic-scale=$acwt --max-ngram-order=$max_ngram_order \ + $carpa_option $oldlm $oldlang/words.txt \ + $rnnlm_dir/unk.probs $rnnlm_dir/wordlist.rnn.final "$rnnlm_dir/rnnlm" \ + "ark:gunzip -c $indir/lat.JOB.gz|" "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1; + +if ! $skip_scoring ; then + err_msg="$0: Not scoring because local/score.sh does not exist or not executable." + [ ! -x local/score.sh ] && echo $err_msg && exit 1; + local/score.sh --cmd "$cmd" $data $oldlang $outdir +else + echo "$0: Not scoring because --skip-scoring was specified." +fi + +exit 0; diff --git a/scripts/rnnlm/lmrescore.sh b/scripts/rnnlm/lmrescore.sh index 84f42443710..cd0cf793d8d 100755 --- a/scripts/rnnlm/lmrescore.sh +++ b/scripts/rnnlm/lmrescore.sh @@ -5,6 +5,9 @@ # Apache 2.0 # This script rescores lattices with KALDI RNNLM. +# It uses a simple n-gram approximation to limit the search space; +# A faster and more accurate way to rescore is at rnnlm/lmrescore_pruned.sh +# which is more prefered # Begin configuration section. cmd=run.pl @@ -14,14 +17,14 @@ max_ngram_order=4 # Approximate the lattice-rescoring by limiting the max-ngram- # the same ngram history and this prevents the lattice from # exploding exponentially. Details of the n-gram approximation # method are described in section 2.3 of the paper - # http://www.cs.jhu.edu/~hxu/tf.pdf + # http://www.danielpovey.com/files/2018_icassp_lattice_pruning.pdf weight=0.5 # Interpolation weight for RNNLM. normalize=false # If true, we add a normalization step to the output of the RNNLM # so that it adds up to *exactly* 1. Note that this is not necessary # as in our RNNLM setup, a properly trained network would automatically # have its normalization term close to 1. The details of this - # could be found at http://www.cs.jhu.edu/~hxu/rnnlm.pdf + # could be found at http://www.danielpovey.com/files/2018_icassp_rnnlm.pdf # End configuration section. @@ -109,11 +112,11 @@ else fi if ! $skip_scoring ; then - err_msg="Not scoring because local/score.sh does not exist or not executable." + err_msg="$0: Not scoring because local/score.sh does not exist or not executable." [ ! -x local/score.sh ] && echo $err_msg && exit 1; local/score.sh --cmd "$cmd" $data $oldlang $outdir else - echo "Not scoring because requested so..." + echo "$0: Not scoring because --skip-scoring was specified." fi exit 0; diff --git a/scripts/rnnlm/lmrescore_pruned.sh b/scripts/rnnlm/lmrescore_pruned.sh new file mode 100755 index 00000000000..e757508990b --- /dev/null +++ b/scripts/rnnlm/lmrescore_pruned.sh @@ -0,0 +1,107 @@ +#!/bin/bash + +# Copyright 2017 Hainan Xu +# Apache 2.0 + +# This script rescores lattices with KALDI RNNLM using a pruned algorithm. +# The details of the algorithm could be found at +# http://www.danielpovey.com/files/2018_icassp_lattice_pruning.pdf +# One example script for this is at egs/swbd/s5c/local/rnnlm/run_lstm.sh + +# Begin configuration section. +cmd=run.pl +skip_scoring=false +max_ngram_order=4 # Approximate the lattice-rescoring by limiting the max-ngram-order + # if it's set, it merges histories in the lattice if they share + # the same ngram history and this prevents the lattice from + # exploding exponentially. Details of the n-gram approximation + # method are described in section 2.3 of the paper + # http://www.danielpovey.com/files/2018_icassp_lattice_pruning.pdm + +acwt=0.1 +weight=0.5 # Interpolation weight for RNNLM. +normalize=false # If true, we add a normalization step to the output of the RNNLM + # so that it adds up to *exactly* 1. Note that this is not necessary + # as in our RNNLM setup, a properly trained network would automatically + # have its normalization term close to 1. The details of this + # could be found at http://www.danielpovey.com/files/2018_icassp_rnnlm.pdf + +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +. ./utils/parse_options.sh + +if [ $# != 5 ]; then + echo "Does language model rescoring of lattices (remove old LM, add new LM)" + echo "with Kaldi RNNLM using a pruned algorithm. See comments in file for details" + echo "" + echo "Usage: $0 [options] \\" + echo " " + echo " e.g.: $0 data/lang_tg exp/rnnlm_lstm/ data/test \\" + echo " exp/tri3/test_tg exp/tri3/test_rnnlm_4gram" + echo "options: [--cmd (run.pl|queue.pl [queue opts])]" + exit 1; +fi + +[ -f path.sh ] && . ./path.sh; + +oldlang=$1 +rnnlm_dir=$2 +data=$3 +indir=$4 +outdir=$5 + +oldlm=$oldlang/G.fst +carpa_option= +if [ ! -f $oldlm ]; then + echo "$0: file $oldlm not found; looking for $oldlang/G.carpa" + oldlm=$oldlang/G.carpa + carpa_option="--use-const-arpa=true" +fi + +[ ! -f $oldlm ] && echo "$0: Missing file $oldlm" && exit 1; +[ ! -f $rnnlm_dir/final.raw ] && echo "$0: Missing file $rnnlm_dir/final.raw" && exit 1; +[ ! -f $rnnlm_dir/feat_embedding.final.mat ] && [ ! -f $rnnlm_dir/word_embedding.final.mat ] && echo "$0: Missing word embedding file" && exit 1; + +[ ! -f $oldlang/words.txt ] &&\ + echo "$0: Missing file $oldlang/words.txt" && exit 1; +! ls $indir/lat.*.gz >/dev/null &&\ + echo "$0: No lattices input directory $indir" && exit 1; +awk -v n=$0 -v w=$weight 'BEGIN {if (w < 0 || w > 1) { + print n": Interpolation weight should be in the range of [0, 1]"; exit 1;}}' \ + || exit 1; + +normalize_opt= +if $normalize; then + normalize_opt="--normalize-probs=true" +fi +special_symbol_opts=$(cat $rnnlm_dir/special_symbol_opts.txt) + +word_embedding= +if [ -f $rnnlm_dir/word_embedding.final.mat ]; then + word_embedding=$rnnlm_dir/word_embedding.final.mat +else + word_embedding="'rnnlm-get-word-embedding $rnnlm_dir/word_feats.txt $rnnlm_dir/feat_embedding.final.mat -|'" +fi + +mkdir -p $outdir/log +nj=`cat $indir/num_jobs` || exit 1; +cp $indir/num_jobs $outdir + +$cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \ + lattice-lmrescore-kaldi-rnnlm-pruned --lm-scale=$weight $special_symbol_opts \ + --acoustic-scale=$acwt --max-ngram-order=$max_ngram_order $normalize_opt \ + $carpa_option $oldlm $word_embedding "$rnnlm_dir/final.raw" \ + "ark:gunzip -c $indir/lat.JOB.gz|" "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1; + +if ! $skip_scoring ; then + err_msg="$0: Not scoring because local/score.sh does not exist or not executable." + [ ! -x local/score.sh ] && echo $err_msg && exit 1; + echo local/score.sh --cmd "$cmd" $data $oldlang $outdir + local/score.sh --cmd "$cmd" $data $oldlang $outdir +else + echo "$0: Not scoring because --skip-scoring was specified." +fi + +exit 0; diff --git a/src/fstext/deterministic-fst-inl.h b/src/fstext/deterministic-fst-inl.h index bbf8cf0bce1..c6f99697e00 100644 --- a/src/fstext/deterministic-fst-inl.h +++ b/src/fstext/deterministic-fst-inl.h @@ -160,7 +160,8 @@ template bool ComposeDeterministicOnDemandFst::GetArc(StateId s, Label ilabel, Arc *oarc) { typedef typename MapType::iterator IterType; - KALDI_ASSERT(ilabel != 0); + KALDI_ASSERT(ilabel != 0 && + "This program expects epsilon-free compact lattices as input"); KALDI_ASSERT(s < static_cast(state_vec_.size())); const std::pair pr (state_vec_[s]); diff --git a/src/fstext/kaldi-fst-io.cc b/src/fstext/kaldi-fst-io.cc index a863428be6c..cda146104d0 100644 --- a/src/fstext/kaldi-fst-io.cc +++ b/src/fstext/kaldi-fst-io.cc @@ -123,4 +123,23 @@ void WriteFstKaldi(const VectorFst &fst, fst.Write(ko.Stream(), wopts); } +fst::VectorFst *ReadAndPrepareLmFst(std::string rxfilename) { + // ReadFstKaldi() will die with exception on failure. + fst::VectorFst *ans = fst::ReadFstKaldi(rxfilename); + if (ans->Properties(fst::kAcceptor, true) == 0) { + // If it's not already an acceptor, project on the output, i.e. copy olabels + // to ilabels. Generally the G.fst's on disk will have the disambiguation + // symbol #0 on the input symbols of the backoff arc, and projection will + // replace them with epsilons which is what is on the output symbols of + // those arcs. + fst::Project(ans, fst::PROJECT_OUTPUT); + } + if (ans->Properties(fst::kILabelSorted, true) == 0) { + // Make sure LM is sorted on ilabel. + fst::ILabelCompare ilabel_comp; + fst::ArcSort(ans, ilabel_comp); + } + return ans; +} + } // end namespace fst diff --git a/src/fstext/kaldi-fst-io.h b/src/fstext/kaldi-fst-io.h index 74f84efeea2..9715d81941e 100644 --- a/src/fstext/kaldi-fst-io.h +++ b/src/fstext/kaldi-fst-io.h @@ -81,6 +81,10 @@ template void ReadFstKaldi(std::istream &is, bool binary, VectorFst *fst); +// Read an FST file for LM (G.fst) and make it an acceptor, +// and make sure it is sorted on labels +fst::VectorFst *ReadAndPrepareLmFst(std::string rxfilename); + // This is a Holder class with T = VectorFst, that meets the requirements // of a Holder class as described in ../util/kaldi-holder.h. This enables us to // read/write collections of FSTs indexed by strings, using the Table comcpet ( diff --git a/src/latbin/Makefile b/src/latbin/Makefile index 7bab32bf25e..bcffbb43168 100644 --- a/src/latbin/Makefile +++ b/src/latbin/Makefile @@ -25,7 +25,7 @@ BINFILES = lattice-best-path lattice-prune lattice-equivalent lattice-to-nbest \ lattice-determinize-phone-pruned-parallel lattice-expand-ngram \ lattice-lmrescore-const-arpa lattice-lmrescore-rnnlm nbest-to-prons \ lattice-arc-post lattice-determinize-non-compact lattice-lmrescore-kaldi-rnnlm \ - lattice-lmrescore-pruned + lattice-lmrescore-pruned lattice-lmrescore-kaldi-rnnlm-pruned OBJFILES = diff --git a/src/latbin/lattice-lmrescore-kaldi-rnnlm-pruned.cc b/src/latbin/lattice-lmrescore-kaldi-rnnlm-pruned.cc new file mode 100644 index 00000000000..73895e7203f --- /dev/null +++ b/src/latbin/lattice-lmrescore-kaldi-rnnlm-pruned.cc @@ -0,0 +1,209 @@ +// latbin/lattice-lmrescore-kaldi-rnnlm-pruned.cc + +// Copyright 2017 Johns Hopkins University (author: Daniel Povey) +// 2017 Hainan Xu + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#include "base/kaldi-common.h" +#include "fstext/fstext-lib.h" +#include "rnnlm/rnnlm-lattice-rescoring.h" +#include "lm/const-arpa-lm.h" +#include "util/common-utils.h" +#include "nnet3/nnet-utils.h" +#include "lat/kaldi-lattice.h" +#include "lat/lattice-functions.h" +#include "lat/compose-lattice-pruned.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + typedef kaldi::int32 int32; + typedef kaldi::int64 int64; + using fst::SymbolTable; + using fst::VectorFst; + using fst::StdArc; + using fst::ReadFstKaldi; + + const char *usage = + "Rescores lattice with kaldi-rnnlm. This script is called from \n" + "scripts/rnnlm/lmrescore_pruned.sh. An example for rescoring \n" + "lattices is at egs/swbd/s5c/local/rnnlm/run_lstm.sh \n" + "\n" + "Usage: lattice-lmrescore-kaldi-rnnlm-pruned [options] \\\n" + " \\\n" + " \\\n" + " \n" + " e.g.: lattice-lmrescore-kaldi-rnnlm-pruned --lm-scale=-1.0 fst_words.txt \\\n" + " --bos-symbol=1 --eos-symbol=2 \\\n" + " data/lang_test/G.fst word_embedding.mat \\\n" + " final.raw ark:in.lats ark:out.lats\n\n" + " lattice-lmrescore-kaldi-rnnlm-pruned --lm-scale=-1.0 fst_words.txt \\\n" + " --bos-symbol=1 --eos-symbol=2 \\\n" + " data/lang_test_fg/G.carpa word_embedding.mat \\\n" + " final.raw ark:in.lats ark:out.lats\n"; + + ParseOptions po(usage); + rnnlm::RnnlmComputeStateComputationOptions opts; + ComposeLatticePrunedOptions compose_opts; + + int32 max_ngram_order = 3; + BaseFloat lm_scale = 0.5; + BaseFloat acoustic_scale = 0.1; + bool use_carpa = false; + + po.Register("lm-scale", &lm_scale, "Scaling factor for ; its negative " + "will be applied to ."); + po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic " + "probabilities (e.g. 0.1 for non-chain systems); important because " + "of its effect on pruning."); + po.Register("max-ngram-order", &max_ngram_order, + "If positive, allow RNNLM histories longer than this to be identified " + "with each other for rescoring purposes (an approximation that " + "saves time and reduces output lattice size)."); + po.Register("use-const-arpa", &use_carpa, "If true, read the old-LM file " + "as a const-arpa file as opposed to an FST file"); + + opts.Register(&po); + compose_opts.Register(&po); + + po.Read(argc, argv); + + if (po.NumArgs() != 5) { + po.PrintUsage(); + exit(1); + } + + if (opts.bos_index == -1 || opts.eos_index == -1) { + KALDI_ERR << "must set --bos-symbol and --eos-symbol options"; + } + + std::string lm_to_subtract_rxfilename, lats_rspecifier, + word_embedding_rxfilename, rnnlm_rxfilename, lats_wspecifier; + + lm_to_subtract_rxfilename = po.GetArg(1), + word_embedding_rxfilename = po.GetArg(2); + rnnlm_rxfilename = po.GetArg(3); + lats_rspecifier = po.GetArg(4); + lats_wspecifier = po.GetArg(5); + + // for G.fst + fst::ScaleDeterministicOnDemandFst *lm_to_subtract_det_scale = NULL; + fst::BackoffDeterministicOnDemandFst *lm_to_subtract_det_backoff = NULL; + VectorFst *lm_to_subtract_fst = NULL; + + // for G.carpa + ConstArpaLm* const_arpa = NULL; + fst::DeterministicOnDemandFst *carpa_lm_to_subtract_fst = NULL; + + KALDI_LOG << "Reading old LMs..."; + if (use_carpa) { + const_arpa = new ConstArpaLm(); + ReadKaldiObject(lm_to_subtract_rxfilename, const_arpa); + carpa_lm_to_subtract_fst = new ConstArpaLmDeterministicFst(*const_arpa); + lm_to_subtract_det_scale + = new fst::ScaleDeterministicOnDemandFst(-lm_scale, + carpa_lm_to_subtract_fst); + } else { + lm_to_subtract_fst = fst::ReadAndPrepareLmFst( + lm_to_subtract_rxfilename); + lm_to_subtract_det_backoff = + new fst::BackoffDeterministicOnDemandFst(*lm_to_subtract_fst); + lm_to_subtract_det_scale = + new fst::ScaleDeterministicOnDemandFst(-lm_scale, + lm_to_subtract_det_backoff); + } + + kaldi::nnet3::Nnet rnnlm; + ReadKaldiObject(rnnlm_rxfilename, &rnnlm); + + KALDI_ASSERT(IsSimpleNnet(rnnlm)); + + CuMatrix word_embedding_mat; + ReadKaldiObject(word_embedding_rxfilename, &word_embedding_mat); + + const rnnlm::RnnlmComputeStateInfo info(opts, rnnlm, word_embedding_mat); + + // Reads and writes as compact lattice. + SequentialCompactLatticeReader compact_lattice_reader(lats_rspecifier); + CompactLatticeWriter compact_lattice_writer(lats_wspecifier); + + int32 num_done = 0, num_err = 0; + + rnnlm::KaldiRnnlmDeterministicFst* lm_to_add_orig = + new rnnlm::KaldiRnnlmDeterministicFst(max_ngram_order, info); + + for (; !compact_lattice_reader.Done(); compact_lattice_reader.Next()) { + fst::DeterministicOnDemandFst *lm_to_add = + new fst::ScaleDeterministicOnDemandFst(lm_scale, lm_to_add_orig); + + std::string key = compact_lattice_reader.Key(); + CompactLattice clat = compact_lattice_reader.Value(); + compact_lattice_reader.FreeCurrent(); + + // Before composing with the LM FST, we scale the lattice weights + // by the inverse of "lm_scale". We'll later scale by "lm_scale". + // We do it this way so we can determinize and it will give the + // right effect (taking the "best path" through the LM) regardless + // of the sign of lm_scale. + if (acoustic_scale != 1.0) { + fst::ScaleLattice(fst::AcousticLatticeScale(acoustic_scale), &clat); + } + TopSortCompactLatticeIfNeeded(&clat); + + fst::ComposeDeterministicOnDemandFst combined_lms( + lm_to_subtract_det_scale, lm_to_add); + + // Composes lattice with language model. + CompactLattice composed_clat; + ComposeCompactLatticePruned(compose_opts, clat, + &combined_lms, &composed_clat); + + lm_to_add_orig->Clear(); + + if (composed_clat.NumStates() == 0) { + // Something went wrong. A warning will already have been printed. + num_err++; + } else { + if (acoustic_scale != 1.0) { + if (acoustic_scale == 0.0) + KALDI_ERR << "Acoustic scale cannot be zero."; + fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), + &composed_clat); + } + compact_lattice_writer.Write(key, composed_clat); + num_done++; + } + delete lm_to_add; + } + + delete lm_to_subtract_fst; + delete lm_to_add_orig; + delete lm_to_subtract_det_backoff; + delete lm_to_subtract_det_scale; + + delete const_arpa; + delete carpa_lm_to_subtract_fst; + + KALDI_LOG << "Overall, succeeded for " << num_done + << " lattices, failed for " << num_err; + return (num_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} diff --git a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc index 22f713b5620..fc1034ac5c7 100644 --- a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc +++ b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc @@ -36,8 +36,8 @@ int main(int argc, char *argv[]) { const char *usage = "Rescores lattice with kaldi-rnnlm. This script is called from \n" - "scripts/rnnlm/lmrescore_rnnlm_lat.sh. An example for rescoring \n" - "lattices is at egs/swbd/s5/local/rnnlm/run_rescoring.sh \n" + "scripts/rnnlm/lmrescore.sh. An example for rescoring \n" + "lattices is at egs/swbd/s5c/local/rnnlm/run_lstm.sh \n" "\n" "Usage: lattice-lmrescore-kaldi-rnnlm [options] \\\n" " \\\n" diff --git a/src/latbin/lattice-lmrescore-pruned.cc b/src/latbin/lattice-lmrescore-pruned.cc index 567f70a5129..3f4347ee709 100644 --- a/src/latbin/lattice-lmrescore-pruned.cc +++ b/src/latbin/lattice-lmrescore-pruned.cc @@ -27,31 +27,6 @@ #include "lat/lattice-functions.h" #include "lat/compose-lattice-pruned.h" -namespace kaldi { - -fst::VectorFst *ReadAndPrepareLmFst(std::string rxfilename) { - // ReadFstKaldi() will die with exception on failure. - fst::VectorFst *ans = fst::ReadFstKaldi(rxfilename); - if (ans->Properties(fst::kAcceptor, true) == 0) { - // If it's not already an acceptor, project on the output, i.e. copy olabels - // to ilabels. Generally the G.fst's on disk will have the disambiguation - // symbol #0 on the input symbols of the backoff arc, and projection will - // replace them with epsilons which is what is on the output symbols of - // those arcs. - fst::Project(ans, fst::PROJECT_OUTPUT); - } - if (ans->Properties(fst::kILabelSorted, true) == 0) { - // Make sure LM is sorted on ilabel. - fst::ILabelCompare ilabel_comp; - fst::ArcSort(ans, ilabel_comp); - } - return ans; -} - - -} // namespace kaldi - - int main(int argc, char *argv[]) { try { using namespace kaldi; @@ -61,7 +36,6 @@ int main(int argc, char *argv[]) { using fst::VectorFst; using fst::StdArc; using fst::ReadFstKaldi; - using std::unique_ptr; const char *usage = "This program can be used to subtract scores from one language model and\n" @@ -110,14 +84,14 @@ int main(int argc, char *argv[]) { lats_wspecifier = po.GetArg(4); KALDI_LOG << "Reading LMs..."; - VectorFst *lm_to_subtract_fst = ReadAndPrepareLmFst( + VectorFst *lm_to_subtract_fst = fst::ReadAndPrepareLmFst( lm_to_subtract_rxfilename); VectorFst *lm_to_add_fst = NULL; ConstArpaLm const_arpa; if (add_const_arpa) { ReadKaldiObject(lm_to_add_rxfilename, &const_arpa); } else { - lm_to_add_fst = ReadAndPrepareLmFst(lm_to_add_rxfilename); + lm_to_add_fst = fst::ReadAndPrepareLmFst(lm_to_add_rxfilename); } fst::BackoffDeterministicOnDemandFst lm_to_subtract_det_backoff( *lm_to_subtract_fst); diff --git a/src/tfrnnlm/tensorflow-rnnlm.cc b/src/tfrnnlm/tensorflow-rnnlm.cc index f4bb8d8941b..4842d3fbaa8 100644 --- a/src/tfrnnlm/tensorflow-rnnlm.cc +++ b/src/tfrnnlm/tensorflow-rnnlm.cc @@ -307,6 +307,24 @@ TfRnnlmDeterministicFst::~TfRnnlmDeterministicFst() { } } +void TfRnnlmDeterministicFst::Clear() { + // similar to the destructor but we retain the 0-th entries in each map + // which corresponds to the state + for (int i = 1; i < state_to_context_.size(); i++) { + delete state_to_context_[i]; + } + for (int i = 1; i < state_to_cell_.size(); i++) { + delete state_to_cell_[i]; + } + + state_to_context_.resize(1); + state_to_cell_.resize(1); + state_to_wseq_.resize(1); + wseq_to_state_.clear(); + wseq_to_state_[state_to_wseq_[0]] = 0; +} + + fst::StdArc::Weight TfRnnlmDeterministicFst::Final(StateId s) { // At this point, we should have created the state. KALDI_ASSERT(static_cast(s) < state_to_wseq_.size()); diff --git a/src/tfrnnlm/tensorflow-rnnlm.h b/src/tfrnnlm/tensorflow-rnnlm.h index 33ac75fa093..4c15229fe9d 100644 --- a/src/tfrnnlm/tensorflow-rnnlm.h +++ b/src/tfrnnlm/tensorflow-rnnlm.h @@ -149,6 +149,7 @@ class TfRnnlmDeterministicFst: // Does not take ownership. TfRnnlmDeterministicFst(int32 max_ngram_order, KaldiTfRnnlmWrapper *rnnlm); ~TfRnnlmDeterministicFst(); + void Clear(); // We cannot use "const" because the pure virtual function in the interface is // not const. diff --git a/src/tfrnnlmbin/Makefile b/src/tfrnnlmbin/Makefile index 2fb6014b036..f2a353c918c 100644 --- a/src/tfrnnlmbin/Makefile +++ b/src/tfrnnlmbin/Makefile @@ -21,7 +21,7 @@ EXTRA_CXXFLAGS = -Wno-sign-compare -I$(TENSORFLOW)/bazel-tensorflow/external/pro -I$(TENSORFLOW)/tensorflow/contrib/makefile/downloads/protobuf/src include ../kaldi.mk -BINFILES = lattice-lmrescore-tf-rnnlm +BINFILES = lattice-lmrescore-tf-rnnlm lattice-lmrescore-tf-rnnlm-pruned OBJFILES = diff --git a/src/tfrnnlmbin/lattice-lmrescore-tf-rnnlm-pruned.cc b/src/tfrnnlmbin/lattice-lmrescore-tf-rnnlm-pruned.cc new file mode 100644 index 00000000000..b707ca85977 --- /dev/null +++ b/src/tfrnnlmbin/lattice-lmrescore-tf-rnnlm-pruned.cc @@ -0,0 +1,201 @@ +// tfrnnlmbin/lattice-lmrescore-tf-rnnlm-pruned.cc + +// Copyright (C) 2017 Intellisist, Inc. (Author: Hainan Xu) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#include "base/kaldi-common.h" +#include "fstext/fstext-lib.h" +#include "tfrnnlm/tensorflow-rnnlm.h" +#include "util/common-utils.h" +#include "lm/const-arpa-lm.h" +#include "lat/kaldi-lattice.h" +#include "lat/lattice-functions.h" +#include "lat/compose-lattice-pruned.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace kaldi::tf_rnnlm; + typedef kaldi::int32 int32; + typedef kaldi::int64 int64; + using fst::SymbolTable; + using fst::VectorFst; + using fst::StdArc; + using fst::ReadFstKaldi; + + const char *usage = + "Rescores lattice with rnnlm that is trained with TensorFlow.\n" + "An example script for training and rescoring with the TensorFlow\n" + "RNNLM is at egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh\n" + "\n" + "Usage: lattice-lmrescore-tf-rnnlm-pruned [options] [unk-file] \\\n" + " \\\n" + " \n" + " e.g.: lattice-lmrescore-tf-rnnlm-pruned --lm-scale=0.5 data/tensorflow_lstm/unkcounts.txt \\\n" + " data/test/G.fst data/lang/words.txt data/tensorflow_lstm/rnnwords.txt \\\n" + " data/tensorflow_lstm/rnnlm ark:in.lats ark:out.lats\n\n" + " e.g.: lattice-lmrescore-tf-rnnlm-pruned --lm-scale=0.5 data/tensorflow_lstm/unkcounts.txt \\\n" + " data/test_fg/G.carpa data/lang/words.txt data/tensorflow_lstm/rnnwords.txt \\\n" + " data/tensorflow_lstm/rnnlm ark:in.lats ark:out.lats\n"; + + ParseOptions po(usage); + int32 max_ngram_order = 3; + BaseFloat lm_scale = 0.5; + BaseFloat acoustic_scale = 0.1; + bool use_carpa = false; + + po.Register("lm-scale", &lm_scale, "Scaling factor for ; its negative " + "will be applied to ."); + po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic " + "probabilities (e.g. 0.1 for non-chain systems); important because " + "of its effect on pruning."); + po.Register("max-ngram-order", &max_ngram_order, + "If positive, allow RNNLM histories longer than this to be identified " + "with each other for rescoring purposes (an approximation that " + "saves time and reduces output lattice size)."); + po.Register("use-const-arpa", &use_carpa, "If true, read the old-LM file " + "as a const-arpa file as opposed to an FST file"); + + KaldiTfRnnlmWrapperOpts opts; + ComposeLatticePrunedOptions compose_opts; + opts.Register(&po); + compose_opts.Register(&po); + + po.Read(argc, argv); + + if (po.NumArgs() != 7 && po.NumArgs() != 6) { + po.PrintUsage(); + exit(1); + } + + std::string lm_to_subtract_rxfilename, lats_rspecifier, rnn_word_list, + word_symbols_rxfilename, rnnlm_rxfilename, lats_wspecifier, unk_prob_file; + if (po.NumArgs() == 6) { + lm_to_subtract_rxfilename = po.GetArg(1), + word_symbols_rxfilename = po.GetArg(2); + rnn_word_list = po.GetArg(3); + rnnlm_rxfilename = po.GetArg(4); + lats_rspecifier = po.GetArg(5); + lats_wspecifier = po.GetArg(6); + } else { + lm_to_subtract_rxfilename = po.GetArg(1), + word_symbols_rxfilename = po.GetArg(2); + unk_prob_file = po.GetArg(3); + rnn_word_list = po.GetArg(4); + rnnlm_rxfilename = po.GetArg(5); + lats_rspecifier = po.GetArg(6); + lats_wspecifier = po.GetArg(7); + } + + // for G.fst + fst::ScaleDeterministicOnDemandFst *lm_to_subtract_det_scale = NULL; + fst::BackoffDeterministicOnDemandFst *lm_to_subtract_det_backoff = NULL; + VectorFst *lm_to_subtract_fst = NULL; + + // for G.carpa + ConstArpaLm* const_arpa = NULL; + fst::DeterministicOnDemandFst *carpa_lm_to_subtract_fst = NULL; + + KALDI_LOG << "Reading old LMs..."; + if (use_carpa) { + const_arpa = new ConstArpaLm(); + ReadKaldiObject(lm_to_subtract_rxfilename, const_arpa); + carpa_lm_to_subtract_fst = new ConstArpaLmDeterministicFst(*const_arpa); + lm_to_subtract_det_scale + = new fst::ScaleDeterministicOnDemandFst(-lm_scale, + carpa_lm_to_subtract_fst); + } else { + lm_to_subtract_fst = fst::ReadAndPrepareLmFst( + lm_to_subtract_rxfilename); + lm_to_subtract_det_backoff = + new fst::BackoffDeterministicOnDemandFst(*lm_to_subtract_fst); + lm_to_subtract_det_scale = + new fst::ScaleDeterministicOnDemandFst(-lm_scale, + lm_to_subtract_det_backoff); + } + + // Reads the TF language model. + KaldiTfRnnlmWrapper rnnlm(opts, rnn_word_list, word_symbols_rxfilename, + unk_prob_file, rnnlm_rxfilename); + + // Reads and writes as compact lattice. + SequentialCompactLatticeReader compact_lattice_reader(lats_rspecifier); + CompactLatticeWriter compact_lattice_writer(lats_wspecifier); + + int32 n_done = 0, n_fail = 0; + + TfRnnlmDeterministicFst* lm_to_add_orig = + new TfRnnlmDeterministicFst(max_ngram_order, &rnnlm); + + for (; !compact_lattice_reader.Done(); compact_lattice_reader.Next()) { + fst::DeterministicOnDemandFst *lm_to_add = + new fst::ScaleDeterministicOnDemandFst(lm_scale, lm_to_add_orig); + + std::string key = compact_lattice_reader.Key(); + CompactLattice clat = compact_lattice_reader.Value(); + compact_lattice_reader.FreeCurrent(); + + // Before composing with the LM FST, we scale the lattice weights + // by the inverse of "lm_scale". We'll later scale by "lm_scale". + // We do it this way so we can determinize and it will give the + // right effect (taking the "best path" through the LM) regardless + // of the sign of lm_scale. + if (acoustic_scale != 1.0) { + fst::ScaleLattice(fst::AcousticLatticeScale(acoustic_scale), &clat); + } + TopSortCompactLatticeIfNeeded(&clat); + + fst::ComposeDeterministicOnDemandFst combined_lms( + lm_to_subtract_det_scale, lm_to_add); + + // Composes lattice with language model. + CompactLattice composed_clat; + ComposeCompactLatticePruned(compose_opts, clat, + &combined_lms, &composed_clat); + lm_to_add_orig->Clear(); + + if (composed_clat.NumStates() == 0) { + // Something went wrong. A warning will already have been printed. + n_fail++; + } else { + if (acoustic_scale != 1.0) { + if (acoustic_scale == 0.0) + KALDI_ERR << "Acoustic scale cannot be zero."; + fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), + &composed_clat); + } + compact_lattice_writer.Write(key, composed_clat); + n_done++; + } + delete lm_to_add; + } + delete lm_to_subtract_fst; + delete lm_to_add_orig; + delete lm_to_subtract_det_backoff; + delete lm_to_subtract_det_scale; + + delete const_arpa; + delete carpa_lm_to_subtract_fst; + + KALDI_LOG << "Done " << n_done << " lattices, failed for " << n_fail; + return (n_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} diff --git a/src/tfrnnlmbin/lattice-lmrescore-tf-rnnlm.cc b/src/tfrnnlmbin/lattice-lmrescore-tf-rnnlm.cc index 26ad4ab95ff..178674a3a8e 100644 --- a/src/tfrnnlmbin/lattice-lmrescore-tf-rnnlm.cc +++ b/src/tfrnnlmbin/lattice-lmrescore-tf-rnnlm.cc @@ -35,17 +35,18 @@ int main(int argc, char *argv[]) { const char *usage = "Rescores lattice with rnnlm that is trained with TensorFlow.\n" "An example script for training and rescoring with the TensorFlow\n" - "RNNLM is at egs/ami/s5/local/tfrnnlm/run_lstm.sh\n" + "RNNLM is at egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh\n" "\n" "Usage: lattice-lmrescore-tf-rnnlm [options] [unk-file] \\\n" " \\\n" " \n" - " e.g.: lattice-lmrescore-tf-rnnlm --lm-scale=-1.0 unkcounts.txt rnnwords.txt \\\n" - " words.txt ark:in.lats rnnlm ark:out.lats\n"; + " e.g.: lattice-lmrescore-tf-rnnlm --lm-scale=0.5 " + " data/tensorflow_lstm/unkcounts.txt data/tensorflow_lstm/rnnwords.txt \\\n" + " data/lang/words.txt ark:in.lats data/tensorflow_lstm/rnnlm ark:out.lats\n"; ParseOptions po(usage); int32 max_ngram_order = 3; - BaseFloat lm_scale = 1.0; + BaseFloat lm_scale = 0.5; po.Register("lm-scale", &lm_scale, "Scaling factor for language model " "costs");