diff --git a/egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh b/egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh
index 56ad4d043c3..8dd876c2b2c 100755
--- a/egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh
+++ b/egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh
@@ -34,18 +34,25 @@ final_lm=ami_fsh.o3g.kn
 LM=$final_lm.pr1-7
 
 if [ $stage -le 3 ]; then
-#  for decode_set in dev; do
   for decode_set in dev eval; do
     basedir=exp/$mic/nnet3/tdnn_sp/
     decode_dir=${basedir}/decode_${decode_set}
 
-    # Lattice rescoring
-    steps/lmrescore_rnnlm_lat.sh \
-      --cmd "$tfrnnlm_cmd --mem 16G" \
-      --rnnlm-ver tensorflow  --weight $weight --max-ngram-order $ngram_order \
+# pruned lattice rescoring
+    steps/tfrnnlm/lmrescore_rnnlm_lat_pruned.sh \
+      --cmd "$tfrnnlm_cmd --mem 4G" \
+      --weight $weight --max-ngram-order $ngram_order \
       data/lang_$LM $dir \
       data/$mic/${decode_set}_hires ${decode_dir} \
-      ${decode_dir}.unk.fast.tfrnnlm.lat.${ngram_order}gram.$weight  &
+      ${decode_dir}_tfrnnlm_lat_${ngram_order}gram  &
+
+# Lattice rescoring, unpruned (slow) version
+#    steps/tfrnnlm/lmrescore_rnnlm_lat.sh \
+#      --cmd "$tfrnnlm_cmd --mem 4G" \
+#      --weight $weight --max-ngram-order $ngram_order \
+#      data/lang_$LM $dir \
+#      data/$mic/${decode_set}_hires ${decode_dir} \
+#      ${decode_dir}_lat_${ngram_order}gram_unpruned  &
 
   done
 fi
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1e.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1e.sh
index fead2a12152..8367029adaa 100755
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1e.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1e.sh
@@ -23,6 +23,7 @@ ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-orde
               # if it's set, it merges histories in the lattice if they share
               # the same ngram history and this prevents the lattice from 
               # exploding exponentially
+pruned_rescore=true
 
 . cmd.sh
 . utils/parse_options.sh
@@ -95,12 +96,17 @@ fi
 
 if [ $stage -le 4 ] && $run_rescore; then
   echo "$0: Perform lattice-rescoring on $ac_model_dir"
-  LM=sw1_fsh_fg
+  LM=sw1_fsh_fg # using the 4-gram const arpa file as old lm
+#  LM=sw1_tg # if using the original 3-gram G.fst as old lm
+  pruned=
+  if $pruned_rescore; then
+    pruned=_pruned
+  fi
   for decode_set in eval2000; do
     decode_dir=${ac_model_dir}/decode_${decode_set}_${LM}_looped
 
     # Lattice rescoring
-    rnnlm/lmrescore.sh \
+    rnnlm/lmrescore$pruned.sh \
       --cmd "$decode_cmd --mem 4G" \
       --weight 0.5 --max-ngram-order $ngram_order \
       data/lang_$LM $dir \
diff --git a/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh b/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh
index f38fb2628c8..1dbcbe1a192 100755
--- a/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh
+++ b/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh
@@ -11,12 +11,10 @@
 cmd=run.pl
 skip_scoring=false
 max_ngram_order=4
-N=10
-inv_acwt=12
-weight=1.0  # Interpolation weight for RNNLM.
-# End configuration section.
+acwt=0.1
+weight=0.5  # Interpolation weight for RNNLM.
 rnnlm_ver=
-#layer_string=
+# End configuration section.
 
 echo "$0 $@"  # Print the command line for logging
 
@@ -56,11 +54,6 @@ if [ "$rnnlm_ver" == "cuedrnnlm" ]; then
   first_arg=$rnnlm_dir/rnn.wlist
 fi
 
-if [ "$rnnlm_ver" == "tensorflow" ]; then
-  rescoring_binary="lattice-lmrescore-tf-rnnlm"
-  first_arg="$rnnlm_dir/unk.probs $rnnlm_dir/wordlist.rnn.final"
-fi
-
 oldlm=$oldlang/G.fst
 if [ -f $oldlang/G.carpa ]; then
   oldlm=$oldlang/G.carpa
@@ -70,7 +63,7 @@ elif [ ! -f $oldlm ]; then
 fi
 
 [ ! -f $oldlm ] && echo "$0: Missing file $oldlm" && exit 1;
-[ ! -f $rnnlm_dir/rnnlm ] && [ ! -d $rnnlm_dir/rnnlm ] && echo "$0: Missing file $rnnlm_dir/rnnlm" && exit 1;
+[ ! -f $rnnlm_dir/rnnlm ] && echo "$0: Missing file $rnnlm_dir/rnnlm" && exit 1;
 [ ! -f $rnnlm_dir/unk.probs ] &&\
   echo "$0: Missing file $rnnlm_dir/unk.probs" && exit 1;
 [ ! -f $oldlang/words.txt ] &&\
@@ -83,8 +76,6 @@ awk -v n=$0 -v w=$weight 'BEGIN {if (w < 0 || w > 1) {
 
 oldlm_command="fstproject --project_output=true $oldlm |"
 
-acwt=`perl -e "print (1.0/$inv_acwt);"`
-
 mkdir -p $outdir/log
 nj=`cat $indir/num_jobs` || exit 1;
 cp $indir/num_jobs $outdir
@@ -112,7 +103,7 @@ if ! $skip_scoring ; then
   [ ! -x local/score.sh ] && echo $err_msg && exit 1;
   local/score.sh --cmd "$cmd" $data $oldlang $outdir
 else
-  echo "Not scoring because requested so..."
+  echo "$0: Not scoring because --skip-scoring was specified."
 fi
 
 exit 0;
diff --git a/egs/wsj/s5/steps/tfrnnlm/lmrescore_rnnlm_lat.sh b/egs/wsj/s5/steps/tfrnnlm/lmrescore_rnnlm_lat.sh
new file mode 100644
index 00000000000..b84d9a0eef7
--- /dev/null
+++ b/egs/wsj/s5/steps/tfrnnlm/lmrescore_rnnlm_lat.sh
@@ -0,0 +1,101 @@
+#!/bin/bash
+
+# Copyright 2015  Guoguo Chen
+#           2017  Hainan Xu
+# Apache 2.0
+
+# This script rescores lattices with RNNLM trained with TensorFlow.
+# A faster and more accurate version of the algorithm is at
+# steps/tfrnnlm/lmrescore_rnnlm_lat_pruned.sh which is prefered
+# One example recipe of this script is at egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh
+
+# Begin configuration section.
+cmd=run.pl
+skip_scoring=false
+max_ngram_order=4 # Approximate the lattice-rescoring by limiting the max-ngram-order
+                  # if it's set, it merges histories in the lattice if they share
+                  # the same ngram history and this prevents the lattice from 
+                  # exploding exponentially. Details of the n-gram approximation
+                  # method are described in section 2.3 of the paper
+                  # http://www.danielpovey.com/files/2018_icassp_lattice_pruning.pdf
+weight=0.5  # Interpolation weight for RNNLM.
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+. ./utils/parse_options.sh
+
+if [ $# != 5 ]; then
+   echo "Does language model rescoring of lattices (remove old LM, add new LM)"
+   echo "with TensorFlow RNNLM."
+   echo ""
+   echo "Usage: $0 [options] <old-lang-dir> <rnnlm-dir> \\"
+   echo "                   <data-dir> <input-decode-dir> <output-decode-dir>"
+   echo " e.g.: $0 data/lang_tg data/tensorflow_lstm data/test \\"
+   echo "                   exp/tri3/test_tg exp/tri3/test_tfrnnlm"
+   echo "options: [--cmd (run.pl|queue.pl [queue opts])]"
+   exit 1;
+fi
+
+[ -f path.sh ] && . ./path.sh;
+
+oldlang=$1
+rnnlm_dir=$2
+data=$3
+indir=$4
+outdir=$5
+
+oldlm=$oldlang/G.fst
+if [ -f $oldlang/G.carpa ]; then
+  oldlm=$oldlang/G.carpa
+elif [ ! -f $oldlm ]; then
+  echo "$0: expecting either $oldlang/G.fst or $oldlang/G.carpa to exist" &&\
+    exit 1;
+fi
+
+echo "$0: using $oldlm as old LM"
+
+[ ! -d $rnnlm_dir/rnnlm ] && echo "$0: Missing tf model folder $rnnlm_dir/rnnlm" && exit 1;
+
+for f in $rnnlm_dir/unk.probs $oldlang/words.txt $indir/lat.1.gz; do
+  [ ! -f $f ] && echo "$0: Missing file $f" && exit 1
+done
+
+awk -v n=$0 -v w=$weight 'BEGIN {if (w < 0 || w > 1) {
+  print n": Interpolation weight should be in the range of [0, 1]"; exit 1;}}' \
+  || exit 1;
+
+oldlm_command="fstproject --project_output=true $oldlm |"
+
+mkdir -p $outdir/log
+nj=`cat $indir/num_jobs` || exit 1;
+cp $indir/num_jobs $outdir
+
+oldlm_weight=`perl -e "print -1.0 * $weight;"`
+if [ "$oldlm" == "$oldlang/G.fst" ]; then
+  $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
+    lattice-lmrescore --lm-scale=$oldlm_weight \
+    "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm_command" ark:-  \| \
+    lattice-lmrescore-tf-rnnlm --lm-scale=$weight \
+    --max-ngram-order=$max_ngram_order \
+    $rnnlm_dir/unk.probs $rnnlm_dir/wordlist.rnn.final $oldlang/words.txt ark:- "$rnnlm_dir/rnnlm" \
+    "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1;
+else
+  $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
+    lattice-lmrescore-const-arpa --lm-scale=$oldlm_weight \
+    "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm" ark:-  \| \
+    lattice-lmrescore-tf-rnnlm --lm-scale=$weight \
+    --max-ngram-order=$max_ngram_order \
+    $rnnlm_dir/unk.probs $rnnlm_dir/wordlist.rnn.final $oldlang/words.txt ark:- "$rnnlm_dir/rnnlm" \
+    "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1;
+fi
+if ! $skip_scoring ; then
+  err_msg="$0: Not scoring because local/score.sh does not exist or not executable."
+  [ ! -x local/score.sh ] && echo $err_msg && exit 1;
+  local/score.sh --cmd "$cmd" $data $oldlang $outdir
+else
+  echo "$0: Not scoring because --skip-scoring was specified."
+fi
+
+exit 0;
+
diff --git a/egs/wsj/s5/steps/tfrnnlm/lmrescore_rnnlm_lat_pruned.sh b/egs/wsj/s5/steps/tfrnnlm/lmrescore_rnnlm_lat_pruned.sh
new file mode 100755
index 00000000000..e098aef85df
--- /dev/null
+++ b/egs/wsj/s5/steps/tfrnnlm/lmrescore_rnnlm_lat_pruned.sh
@@ -0,0 +1,94 @@
+#!/bin/bash
+
+# Copyright 2015  Guoguo Chen
+#           2017  Hainan Xu
+# Apache 2.0
+
+# This script rescores lattices with RNNLM trained with TensorFlow.
+# It uses a pruned algorithm to speed up the runtime and improve the accuracy.
+# which is an improved version over steps/tfrnnlm/lmrescore_rnnlm_lat.sh,
+# which uses the exact same interface
+# The details of the pruning algorithm is described in
+# http://www.danielpovey.com/files/2018_icassp_lattice_pruning.pdf
+# One example recipe of this script is at egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh
+
+# Begin configuration section.
+cmd=run.pl
+skip_scoring=false
+max_ngram_order=4 # Approximate the lattice-rescoring by limiting the max-ngram-order
+                  # if it's set, it merges histories in the lattice if they share
+                  # the same ngram history and this prevents the lattice from 
+                  # exploding exponentially. Details of the n-gram approximation
+                  # method are described in section 2.3 of the paper
+                  # http://www.danielpovey.com/files/2018_icassp_lattice_pruning.pdf
+acwt=0.1
+weight=0.5  # Interpolation weight for RNNLM.
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+. ./utils/parse_options.sh
+
+if [ $# != 5 ]; then
+   echo "Does language model rescoring of lattices (remove old LM, add new LM)"
+   echo "with RNNLM."
+   echo ""
+   echo "Usage: $0 [options] <old-lang-dir> <rnnlm-dir> \\"
+   echo "                   <data-dir> <input-decode-dir> <output-decode-dir>"
+   echo " e.g.: $0 data/lang_tg data/tensorflow_lstm data/test \\"
+   echo "                   exp/tri3/test_tg exp/tri3/test_tfrnnlm"
+   echo "options: [--cmd (run.pl|queue.pl [queue opts])]"
+   exit 1;
+fi
+
+[ -f path.sh ] && . ./path.sh;
+
+oldlang=$1
+rnnlm_dir=$2
+data=$3
+indir=$4
+outdir=$5
+
+oldlm=$oldlang/G.fst
+carpa_option=
+
+if [ -f $oldlang/G.carpa ]; then
+  oldlm=$oldlang/G.carpa
+  carpa_option="--use-const-arpa=true"
+elif [ ! -f $oldlm ]; then
+  echo "$0: expecting either $oldlang/G.fst or $oldlang/G.carpa to exist" &&\
+    exit 1;
+fi
+
+echo "$0: using $oldlm as old LM"
+
+[ ! -d $rnnlm_dir/rnnlm ] && echo "$0: Missing tf model folder $rnnlm_dir/rnnlm" && exit 1;
+
+for f in $rnnlm_dir/unk.probs $oldlang/words.txt $indir/lat.1.gz; do
+  [ ! -f $f ] && echo "$0: Missing file $f" && exit 1
+done
+
+awk -v n=$0 -v w=$weight 'BEGIN {if (w < 0 || w > 1) {
+  print n": Interpolation weight should be in the range of [0, 1]"; exit 1;}}' \
+  || exit 1;
+
+mkdir -p $outdir/log
+nj=`cat $indir/num_jobs` || exit 1;
+cp $indir/num_jobs $outdir
+
+$cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
+  lattice-lmrescore-tf-rnnlm-pruned --lm-scale=$weight \
+  --acoustic-scale=$acwt --max-ngram-order=$max_ngram_order \
+  $carpa_option $oldlm $oldlang/words.txt \
+  $rnnlm_dir/unk.probs $rnnlm_dir/wordlist.rnn.final "$rnnlm_dir/rnnlm" \
+  "ark:gunzip -c $indir/lat.JOB.gz|" "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1;
+
+if ! $skip_scoring ; then
+  err_msg="$0: Not scoring because local/score.sh does not exist or not executable."
+  [ ! -x local/score.sh ] && echo $err_msg && exit 1;
+  local/score.sh --cmd "$cmd" $data $oldlang $outdir
+else
+  echo "$0: Not scoring because --skip-scoring was specified."
+fi
+
+exit 0;
diff --git a/scripts/rnnlm/lmrescore.sh b/scripts/rnnlm/lmrescore.sh
index 84f42443710..cd0cf793d8d 100755
--- a/scripts/rnnlm/lmrescore.sh
+++ b/scripts/rnnlm/lmrescore.sh
@@ -5,6 +5,9 @@
 # Apache 2.0
 
 # This script rescores lattices with KALDI RNNLM.
+# It uses a simple n-gram approximation to limit the search space;
+# A faster and more accurate way to rescore is at rnnlm/lmrescore_pruned.sh
+# which is more prefered
 
 # Begin configuration section.
 cmd=run.pl
@@ -14,14 +17,14 @@ max_ngram_order=4 # Approximate the lattice-rescoring by limiting the max-ngram-
                   # the same ngram history and this prevents the lattice from 
                   # exploding exponentially. Details of the n-gram approximation
                   # method are described in section 2.3 of the paper
-                  # http://www.cs.jhu.edu/~hxu/tf.pdf
+                  # http://www.danielpovey.com/files/2018_icassp_lattice_pruning.pdf
 
 weight=0.5  # Interpolation weight for RNNLM.
 normalize=false # If true, we add a normalization step to the output of the RNNLM
                 # so that it adds up to *exactly* 1. Note that this is not necessary
                 # as in our RNNLM setup, a properly trained network would automatically
                 # have its normalization term close to 1. The details of this
-                # could be found at http://www.cs.jhu.edu/~hxu/rnnlm.pdf
+                # could be found at http://www.danielpovey.com/files/2018_icassp_rnnlm.pdf
 
 # End configuration section.
 
@@ -109,11 +112,11 @@ else
 fi
 
 if ! $skip_scoring ; then
-  err_msg="Not scoring because local/score.sh does not exist or not executable."
+  err_msg="$0: Not scoring because local/score.sh does not exist or not executable."
   [ ! -x local/score.sh ] && echo $err_msg && exit 1;
   local/score.sh --cmd "$cmd" $data $oldlang $outdir
 else
-  echo "Not scoring because requested so..."
+  echo "$0: Not scoring because --skip-scoring was specified."
 fi
 
 exit 0;
diff --git a/scripts/rnnlm/lmrescore_pruned.sh b/scripts/rnnlm/lmrescore_pruned.sh
new file mode 100755
index 00000000000..e757508990b
--- /dev/null
+++ b/scripts/rnnlm/lmrescore_pruned.sh
@@ -0,0 +1,107 @@
+#!/bin/bash
+
+# Copyright 2017   Hainan Xu
+# Apache 2.0
+
+# This script rescores lattices with KALDI RNNLM using a pruned algorithm.
+# The details of the algorithm could be found at
+# http://www.danielpovey.com/files/2018_icassp_lattice_pruning.pdf
+# One example script for this is at egs/swbd/s5c/local/rnnlm/run_lstm.sh
+
+# Begin configuration section.
+cmd=run.pl
+skip_scoring=false
+max_ngram_order=4 # Approximate the lattice-rescoring by limiting the max-ngram-order
+                  # if it's set, it merges histories in the lattice if they share
+                  # the same ngram history and this prevents the lattice from 
+                  # exploding exponentially. Details of the n-gram approximation
+                  # method are described in section 2.3 of the paper
+                  # http://www.danielpovey.com/files/2018_icassp_lattice_pruning.pdm
+
+acwt=0.1
+weight=0.5  # Interpolation weight for RNNLM.
+normalize=false # If true, we add a normalization step to the output of the RNNLM
+                # so that it adds up to *exactly* 1. Note that this is not necessary
+                # as in our RNNLM setup, a properly trained network would automatically
+                # have its normalization term close to 1. The details of this
+                # could be found at http://www.danielpovey.com/files/2018_icassp_rnnlm.pdf
+
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+. ./utils/parse_options.sh
+
+if [ $# != 5 ]; then
+   echo "Does language model rescoring of lattices (remove old LM, add new LM)"
+   echo "with Kaldi RNNLM using a pruned algorithm. See comments in file for details"
+   echo ""
+   echo "Usage: $0 [options] <old-lang-dir> <rnnlm-dir> \\"
+   echo "                   <data-dir> <input-decode-dir> <output-decode-dir>"
+   echo " e.g.: $0 data/lang_tg exp/rnnlm_lstm/ data/test \\"
+   echo "                   exp/tri3/test_tg exp/tri3/test_rnnlm_4gram"
+   echo "options: [--cmd (run.pl|queue.pl [queue opts])]"
+   exit 1;
+fi
+
+[ -f path.sh ] && . ./path.sh;
+
+oldlang=$1
+rnnlm_dir=$2
+data=$3
+indir=$4
+outdir=$5
+
+oldlm=$oldlang/G.fst
+carpa_option=
+if [ ! -f $oldlm ]; then
+  echo "$0: file $oldlm not found; looking for $oldlang/G.carpa"
+  oldlm=$oldlang/G.carpa
+  carpa_option="--use-const-arpa=true"
+fi
+
+[ ! -f $oldlm ] && echo "$0: Missing file $oldlm" && exit 1;
+[ ! -f $rnnlm_dir/final.raw ] && echo "$0: Missing file $rnnlm_dir/final.raw" && exit 1;
+[ ! -f $rnnlm_dir/feat_embedding.final.mat ] && [ ! -f $rnnlm_dir/word_embedding.final.mat ] && echo "$0: Missing word embedding file" && exit 1;
+
+[ ! -f $oldlang/words.txt ] &&\
+  echo "$0: Missing file $oldlang/words.txt" && exit 1;
+! ls $indir/lat.*.gz >/dev/null &&\
+  echo "$0: No lattices input directory $indir" && exit 1;
+awk -v n=$0 -v w=$weight 'BEGIN {if (w < 0 || w > 1) {
+  print n": Interpolation weight should be in the range of [0, 1]"; exit 1;}}' \
+  || exit 1;
+
+normalize_opt=
+if $normalize; then
+  normalize_opt="--normalize-probs=true"
+fi
+special_symbol_opts=$(cat $rnnlm_dir/special_symbol_opts.txt)
+
+word_embedding=
+if [ -f $rnnlm_dir/word_embedding.final.mat ]; then
+  word_embedding=$rnnlm_dir/word_embedding.final.mat
+else
+  word_embedding="'rnnlm-get-word-embedding $rnnlm_dir/word_feats.txt $rnnlm_dir/feat_embedding.final.mat -|'"
+fi
+
+mkdir -p $outdir/log
+nj=`cat $indir/num_jobs` || exit 1;
+cp $indir/num_jobs $outdir
+
+$cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
+  lattice-lmrescore-kaldi-rnnlm-pruned --lm-scale=$weight $special_symbol_opts \
+    --acoustic-scale=$acwt --max-ngram-order=$max_ngram_order $normalize_opt \
+    $carpa_option $oldlm $word_embedding "$rnnlm_dir/final.raw" \
+    "ark:gunzip -c $indir/lat.JOB.gz|" "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1;
+
+if ! $skip_scoring ; then
+  err_msg="$0: Not scoring because local/score.sh does not exist or not executable."
+  [ ! -x local/score.sh ] && echo $err_msg && exit 1;
+  echo local/score.sh --cmd "$cmd" $data $oldlang $outdir
+  local/score.sh --cmd "$cmd" $data $oldlang $outdir
+else
+  echo "$0: Not scoring because --skip-scoring was specified."
+fi
+
+exit 0;
diff --git a/src/fstext/deterministic-fst-inl.h b/src/fstext/deterministic-fst-inl.h
index bbf8cf0bce1..c6f99697e00 100644
--- a/src/fstext/deterministic-fst-inl.h
+++ b/src/fstext/deterministic-fst-inl.h
@@ -160,7 +160,8 @@ template<class Arc>
 bool ComposeDeterministicOnDemandFst<Arc>::GetArc(StateId s, Label ilabel,
                                                   Arc *oarc) {
   typedef typename MapType::iterator IterType;
-  KALDI_ASSERT(ilabel != 0);
+  KALDI_ASSERT(ilabel != 0 &&
+         "This program expects epsilon-free compact lattices as input");
   KALDI_ASSERT(s < static_cast<StateId>(state_vec_.size()));
   const std::pair<StateId, StateId> pr (state_vec_[s]);
 
diff --git a/src/fstext/kaldi-fst-io.cc b/src/fstext/kaldi-fst-io.cc
index a863428be6c..cda146104d0 100644
--- a/src/fstext/kaldi-fst-io.cc
+++ b/src/fstext/kaldi-fst-io.cc
@@ -123,4 +123,23 @@ void WriteFstKaldi(const VectorFst<StdArc> &fst,
   fst.Write(ko.Stream(), wopts);
 }
 
+fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename) {
+  // ReadFstKaldi() will die with exception on failure.
+  fst::VectorFst<fst::StdArc> *ans = fst::ReadFstKaldi(rxfilename);
+  if (ans->Properties(fst::kAcceptor, true) == 0) {
+    // If it's not already an acceptor, project on the output, i.e. copy olabels
+    // to ilabels.  Generally the G.fst's on disk will have the disambiguation
+    // symbol #0 on the input symbols of the backoff arc, and projection will
+    // replace them with epsilons which is what is on the output symbols of
+    // those arcs.
+    fst::Project(ans, fst::PROJECT_OUTPUT);
+  }
+  if (ans->Properties(fst::kILabelSorted, true) == 0) {
+    // Make sure LM is sorted on ilabel.
+    fst::ILabelCompare<fst::StdArc> ilabel_comp;
+    fst::ArcSort(ans, ilabel_comp);
+  }
+  return ans;
+}
+
 } // end namespace fst
diff --git a/src/fstext/kaldi-fst-io.h b/src/fstext/kaldi-fst-io.h
index 74f84efeea2..9715d81941e 100644
--- a/src/fstext/kaldi-fst-io.h
+++ b/src/fstext/kaldi-fst-io.h
@@ -81,6 +81,10 @@ template <class Arc>
 void ReadFstKaldi(std::istream &is, bool binary,
                   VectorFst<Arc> *fst);
 
+// Read an FST file for LM (G.fst) and make it an acceptor,
+// and make sure it is sorted on labels
+fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename);
+
 // This is a Holder class with T = VectorFst<Arc>, that meets the requirements
 // of a Holder class as described in ../util/kaldi-holder.h. This enables us to
 // read/write collections of FSTs indexed by strings, using the Table comcpet (
diff --git a/src/latbin/Makefile b/src/latbin/Makefile
index 7bab32bf25e..bcffbb43168 100644
--- a/src/latbin/Makefile
+++ b/src/latbin/Makefile
@@ -25,7 +25,7 @@ BINFILES = lattice-best-path lattice-prune lattice-equivalent lattice-to-nbest \
            lattice-determinize-phone-pruned-parallel lattice-expand-ngram \
            lattice-lmrescore-const-arpa lattice-lmrescore-rnnlm nbest-to-prons \
            lattice-arc-post lattice-determinize-non-compact lattice-lmrescore-kaldi-rnnlm \
-           lattice-lmrescore-pruned
+           lattice-lmrescore-pruned lattice-lmrescore-kaldi-rnnlm-pruned
 
 OBJFILES =
 
diff --git a/src/latbin/lattice-lmrescore-kaldi-rnnlm-pruned.cc b/src/latbin/lattice-lmrescore-kaldi-rnnlm-pruned.cc
new file mode 100644
index 00000000000..73895e7203f
--- /dev/null
+++ b/src/latbin/lattice-lmrescore-kaldi-rnnlm-pruned.cc
@@ -0,0 +1,209 @@
+// latbin/lattice-lmrescore-kaldi-rnnlm-pruned.cc
+
+// Copyright 2017 Johns Hopkins University (author: Daniel Povey)
+//           2017 Hainan Xu
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "base/kaldi-common.h"
+#include "fstext/fstext-lib.h"
+#include "rnnlm/rnnlm-lattice-rescoring.h"
+#include "lm/const-arpa-lm.h"
+#include "util/common-utils.h"
+#include "nnet3/nnet-utils.h"
+#include "lat/kaldi-lattice.h"
+#include "lat/lattice-functions.h"
+#include "lat/compose-lattice-pruned.h"
+
+int main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+    typedef kaldi::int32 int32;
+    typedef kaldi::int64 int64;
+    using fst::SymbolTable;
+    using fst::VectorFst;
+    using fst::StdArc;
+    using fst::ReadFstKaldi;
+
+    const char *usage =
+        "Rescores lattice with kaldi-rnnlm. This script is called from \n"
+        "scripts/rnnlm/lmrescore_pruned.sh. An example for rescoring \n"
+        "lattices is at egs/swbd/s5c/local/rnnlm/run_lstm.sh \n"
+        "\n"
+        "Usage: lattice-lmrescore-kaldi-rnnlm-pruned [options] \\\n"
+        "             <old-lm-rxfilename> <embedding-file> \\\n"
+        "             <raw-rnnlm-rxfilename> \\\n"
+        "             <lattice-rspecifier> <lattice-wspecifier>\n"
+        " e.g.: lattice-lmrescore-kaldi-rnnlm-pruned --lm-scale=-1.0 fst_words.txt \\\n"
+        "              --bos-symbol=1 --eos-symbol=2 \\\n"
+        "              data/lang_test/G.fst word_embedding.mat \\\n"
+        "              final.raw ark:in.lats ark:out.lats\n\n"
+        "       lattice-lmrescore-kaldi-rnnlm-pruned --lm-scale=-1.0 fst_words.txt \\\n"
+        "              --bos-symbol=1 --eos-symbol=2 \\\n"
+        "              data/lang_test_fg/G.carpa word_embedding.mat \\\n"
+        "              final.raw ark:in.lats ark:out.lats\n";
+
+    ParseOptions po(usage);
+    rnnlm::RnnlmComputeStateComputationOptions opts;
+    ComposeLatticePrunedOptions compose_opts;
+
+    int32 max_ngram_order = 3;
+    BaseFloat lm_scale = 0.5;
+    BaseFloat acoustic_scale = 0.1;
+    bool use_carpa = false;
+
+    po.Register("lm-scale", &lm_scale, "Scaling factor for <lm-to-add>; its negative "
+                "will be applied to <lm-to-subtract>.");
+    po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic "
+                "probabilities (e.g. 0.1 for non-chain systems); important because "
+                "of its effect on pruning.");
+    po.Register("max-ngram-order", &max_ngram_order,
+        "If positive, allow RNNLM histories longer than this to be identified "
+        "with each other for rescoring purposes (an approximation that "
+        "saves time and reduces output lattice size).");
+    po.Register("use-const-arpa", &use_carpa, "If true, read the old-LM file "
+                "as a const-arpa file as opposed to an FST file");
+
+    opts.Register(&po);
+    compose_opts.Register(&po);
+
+    po.Read(argc, argv);
+
+    if (po.NumArgs() != 5) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    if (opts.bos_index == -1 || opts.eos_index == -1) {
+      KALDI_ERR << "must set --bos-symbol and --eos-symbol options";
+    }
+
+    std::string lm_to_subtract_rxfilename, lats_rspecifier,
+                word_embedding_rxfilename, rnnlm_rxfilename, lats_wspecifier;
+
+    lm_to_subtract_rxfilename = po.GetArg(1),
+    word_embedding_rxfilename = po.GetArg(2);
+    rnnlm_rxfilename = po.GetArg(3);
+    lats_rspecifier = po.GetArg(4);
+    lats_wspecifier = po.GetArg(5);
+
+    // for G.fst
+    fst::ScaleDeterministicOnDemandFst *lm_to_subtract_det_scale = NULL;
+    fst::BackoffDeterministicOnDemandFst<StdArc> *lm_to_subtract_det_backoff = NULL;
+    VectorFst<StdArc> *lm_to_subtract_fst = NULL;
+
+    // for G.carpa
+    ConstArpaLm* const_arpa = NULL;
+    fst::DeterministicOnDemandFst<StdArc> *carpa_lm_to_subtract_fst = NULL;
+
+    KALDI_LOG << "Reading old LMs...";
+    if (use_carpa) {
+      const_arpa = new ConstArpaLm();
+      ReadKaldiObject(lm_to_subtract_rxfilename, const_arpa);
+      carpa_lm_to_subtract_fst = new ConstArpaLmDeterministicFst(*const_arpa);
+      lm_to_subtract_det_scale
+        = new fst::ScaleDeterministicOnDemandFst(-lm_scale,
+                                                 carpa_lm_to_subtract_fst);
+    } else {
+      lm_to_subtract_fst = fst::ReadAndPrepareLmFst(
+          lm_to_subtract_rxfilename);
+      lm_to_subtract_det_backoff =
+        new fst::BackoffDeterministicOnDemandFst<StdArc>(*lm_to_subtract_fst);
+      lm_to_subtract_det_scale =
+           new fst::ScaleDeterministicOnDemandFst(-lm_scale,
+                                                  lm_to_subtract_det_backoff);
+    }
+
+    kaldi::nnet3::Nnet rnnlm;
+    ReadKaldiObject(rnnlm_rxfilename, &rnnlm);
+
+    KALDI_ASSERT(IsSimpleNnet(rnnlm));
+
+    CuMatrix<BaseFloat> word_embedding_mat;
+    ReadKaldiObject(word_embedding_rxfilename, &word_embedding_mat);
+
+    const rnnlm::RnnlmComputeStateInfo info(opts, rnnlm, word_embedding_mat);
+
+    // Reads and writes as compact lattice.
+    SequentialCompactLatticeReader compact_lattice_reader(lats_rspecifier);
+    CompactLatticeWriter compact_lattice_writer(lats_wspecifier);
+
+    int32 num_done = 0, num_err = 0;
+
+    rnnlm::KaldiRnnlmDeterministicFst* lm_to_add_orig = 
+         new rnnlm::KaldiRnnlmDeterministicFst(max_ngram_order, info);
+
+    for (; !compact_lattice_reader.Done(); compact_lattice_reader.Next()) {
+      fst::DeterministicOnDemandFst<StdArc> *lm_to_add =
+         new fst::ScaleDeterministicOnDemandFst(lm_scale, lm_to_add_orig);
+
+      std::string key = compact_lattice_reader.Key();
+      CompactLattice clat = compact_lattice_reader.Value();
+      compact_lattice_reader.FreeCurrent();
+
+      // Before composing with the LM FST, we scale the lattice weights
+      // by the inverse of "lm_scale".  We'll later scale by "lm_scale".
+      // We do it this way so we can determinize and it will give the
+      // right effect (taking the "best path" through the LM) regardless
+      // of the sign of lm_scale.
+      if (acoustic_scale != 1.0) {
+        fst::ScaleLattice(fst::AcousticLatticeScale(acoustic_scale), &clat);
+      }
+      TopSortCompactLatticeIfNeeded(&clat);
+
+      fst::ComposeDeterministicOnDemandFst<StdArc> combined_lms(
+          lm_to_subtract_det_scale, lm_to_add);
+
+      // Composes lattice with language model.
+      CompactLattice composed_clat;
+      ComposeCompactLatticePruned(compose_opts, clat,
+                                  &combined_lms, &composed_clat);
+
+      lm_to_add_orig->Clear();
+
+      if (composed_clat.NumStates() == 0) {
+        // Something went wrong.  A warning will already have been printed.
+        num_err++;
+      } else {
+        if (acoustic_scale != 1.0) {
+          if (acoustic_scale == 0.0)
+            KALDI_ERR << "Acoustic scale cannot be zero.";
+          fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale),
+                            &composed_clat);
+        }
+        compact_lattice_writer.Write(key, composed_clat);
+        num_done++;
+      }
+      delete lm_to_add;
+    }
+
+    delete lm_to_subtract_fst;
+    delete lm_to_add_orig;
+    delete lm_to_subtract_det_backoff;
+    delete lm_to_subtract_det_scale;
+
+    delete const_arpa;
+    delete carpa_lm_to_subtract_fst;
+
+    KALDI_LOG << "Overall, succeeded for " << num_done
+              << " lattices, failed for " << num_err;
+    return (num_done != 0 ? 0 : 1);
+  } catch(const std::exception &e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}
diff --git a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
index 22f713b5620..fc1034ac5c7 100644
--- a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
+++ b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
@@ -36,8 +36,8 @@ int main(int argc, char *argv[]) {
 
     const char *usage =
         "Rescores lattice with kaldi-rnnlm. This script is called from \n"
-        "scripts/rnnlm/lmrescore_rnnlm_lat.sh. An example for rescoring \n"
-        "lattices is at egs/swbd/s5/local/rnnlm/run_rescoring.sh \n"
+        "scripts/rnnlm/lmrescore.sh. An example for rescoring \n"
+        "lattices is at egs/swbd/s5c/local/rnnlm/run_lstm.sh \n"
         "\n"
         "Usage: lattice-lmrescore-kaldi-rnnlm [options] \\\n"
         "             <embedding-file> <raw-rnnlm-rxfilename> \\\n"
diff --git a/src/latbin/lattice-lmrescore-pruned.cc b/src/latbin/lattice-lmrescore-pruned.cc
index 567f70a5129..3f4347ee709 100644
--- a/src/latbin/lattice-lmrescore-pruned.cc
+++ b/src/latbin/lattice-lmrescore-pruned.cc
@@ -27,31 +27,6 @@
 #include "lat/lattice-functions.h"
 #include "lat/compose-lattice-pruned.h"
 
-namespace kaldi {
-
-fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename) {
-  // ReadFstKaldi() will die with exception on failure.
-  fst::VectorFst<fst::StdArc> *ans = fst::ReadFstKaldi(rxfilename);
-  if (ans->Properties(fst::kAcceptor, true) == 0) {
-    // If it's not already an acceptor, project on the output, i.e. copy olabels
-    // to ilabels.  Generally the G.fst's on disk will have the disambiguation
-    // symbol #0 on the input symbols of the backoff arc, and projection will
-    // replace them with epsilons which is what is on the output symbols of
-    // those arcs.
-    fst::Project(ans, fst::PROJECT_OUTPUT);
-  }
-  if (ans->Properties(fst::kILabelSorted, true) == 0) {
-    // Make sure LM is sorted on ilabel.
-    fst::ILabelCompare<fst::StdArc> ilabel_comp;
-    fst::ArcSort(ans, ilabel_comp);
-  }
-  return ans;
-}
-
-
-}  // namespace kaldi
-
-
 int main(int argc, char *argv[]) {
   try {
     using namespace kaldi;
@@ -61,7 +36,6 @@ int main(int argc, char *argv[]) {
     using fst::VectorFst;
     using fst::StdArc;
     using fst::ReadFstKaldi;
-    using std::unique_ptr;
 
     const char *usage =
         "This program can be used to subtract scores from one language model and\n"
@@ -110,14 +84,14 @@ int main(int argc, char *argv[]) {
         lats_wspecifier = po.GetArg(4);
 
     KALDI_LOG << "Reading LMs...";
-    VectorFst<StdArc> *lm_to_subtract_fst = ReadAndPrepareLmFst(
+    VectorFst<StdArc> *lm_to_subtract_fst = fst::ReadAndPrepareLmFst(
         lm_to_subtract_rxfilename);
     VectorFst<StdArc> *lm_to_add_fst = NULL;
     ConstArpaLm const_arpa;
     if (add_const_arpa) {
       ReadKaldiObject(lm_to_add_rxfilename, &const_arpa);
     } else {
-      lm_to_add_fst = ReadAndPrepareLmFst(lm_to_add_rxfilename);
+      lm_to_add_fst = fst::ReadAndPrepareLmFst(lm_to_add_rxfilename);
     }
     fst::BackoffDeterministicOnDemandFst<StdArc> lm_to_subtract_det_backoff(
         *lm_to_subtract_fst);
diff --git a/src/tfrnnlm/tensorflow-rnnlm.cc b/src/tfrnnlm/tensorflow-rnnlm.cc
index f4bb8d8941b..4842d3fbaa8 100644
--- a/src/tfrnnlm/tensorflow-rnnlm.cc
+++ b/src/tfrnnlm/tensorflow-rnnlm.cc
@@ -307,6 +307,24 @@ TfRnnlmDeterministicFst::~TfRnnlmDeterministicFst() {
   }
 }
 
+void TfRnnlmDeterministicFst::Clear() {
+  // similar to the destructor but we retain the 0-th entries in each map
+  // which corresponds to the <bos> state
+  for (int i = 1; i < state_to_context_.size(); i++) {
+    delete state_to_context_[i];
+  }
+  for (int i = 1; i < state_to_cell_.size(); i++) {
+    delete state_to_cell_[i];
+  }
+  
+  state_to_context_.resize(1);
+  state_to_cell_.resize(1);
+  state_to_wseq_.resize(1);
+  wseq_to_state_.clear();
+  wseq_to_state_[state_to_wseq_[0]] = 0;
+}
+
+
 fst::StdArc::Weight TfRnnlmDeterministicFst::Final(StateId s) {
   // At this point, we should have created the state.
   KALDI_ASSERT(static_cast<size_t>(s) < state_to_wseq_.size());
diff --git a/src/tfrnnlm/tensorflow-rnnlm.h b/src/tfrnnlm/tensorflow-rnnlm.h
index 33ac75fa093..4c15229fe9d 100644
--- a/src/tfrnnlm/tensorflow-rnnlm.h
+++ b/src/tfrnnlm/tensorflow-rnnlm.h
@@ -149,6 +149,7 @@ class TfRnnlmDeterministicFst:
   // Does not take ownership.
   TfRnnlmDeterministicFst(int32 max_ngram_order, KaldiTfRnnlmWrapper *rnnlm);
   ~TfRnnlmDeterministicFst();
+  void Clear();
 
   // We cannot use "const" because the pure virtual function in the interface is
   // not const.
diff --git a/src/tfrnnlmbin/Makefile b/src/tfrnnlmbin/Makefile
index 2fb6014b036..f2a353c918c 100644
--- a/src/tfrnnlmbin/Makefile
+++ b/src/tfrnnlmbin/Makefile
@@ -21,7 +21,7 @@ EXTRA_CXXFLAGS = -Wno-sign-compare -I$(TENSORFLOW)/bazel-tensorflow/external/pro
                  -I$(TENSORFLOW)/tensorflow/contrib/makefile/downloads/protobuf/src
 include ../kaldi.mk
 
-BINFILES = lattice-lmrescore-tf-rnnlm
+BINFILES = lattice-lmrescore-tf-rnnlm lattice-lmrescore-tf-rnnlm-pruned
 
 OBJFILES =
 
diff --git a/src/tfrnnlmbin/lattice-lmrescore-tf-rnnlm-pruned.cc b/src/tfrnnlmbin/lattice-lmrescore-tf-rnnlm-pruned.cc
new file mode 100644
index 00000000000..b707ca85977
--- /dev/null
+++ b/src/tfrnnlmbin/lattice-lmrescore-tf-rnnlm-pruned.cc
@@ -0,0 +1,201 @@
+// tfrnnlmbin/lattice-lmrescore-tf-rnnlm-pruned.cc
+
+// Copyright (C) 2017 Intellisist, Inc. (Author: Hainan Xu)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "base/kaldi-common.h"
+#include "fstext/fstext-lib.h"
+#include "tfrnnlm/tensorflow-rnnlm.h"
+#include "util/common-utils.h"
+#include "lm/const-arpa-lm.h"
+#include "lat/kaldi-lattice.h"
+#include "lat/lattice-functions.h"
+#include "lat/compose-lattice-pruned.h"
+
+int main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+    using namespace kaldi::tf_rnnlm;
+    typedef kaldi::int32 int32;
+    typedef kaldi::int64 int64;
+    using fst::SymbolTable;
+    using fst::VectorFst;
+    using fst::StdArc;
+    using fst::ReadFstKaldi;
+
+    const char *usage =
+        "Rescores lattice with rnnlm that is trained with TensorFlow.\n"
+        "An example script for training and rescoring with the TensorFlow\n"
+        "RNNLM is at egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh\n"
+        "\n"
+        "Usage: lattice-lmrescore-tf-rnnlm-pruned [options] [unk-file] \\\n"
+        "             <old-lm> <fst-wordlist> <rnnlm-wordlist> \\\n"
+        "             <rnnlm-rxfilename> <lattice-rspecifier> <lattice-wspecifier>\n"
+        " e.g.: lattice-lmrescore-tf-rnnlm-pruned --lm-scale=0.5 data/tensorflow_lstm/unkcounts.txt \\\n"
+        "              data/test/G.fst data/lang/words.txt data/tensorflow_lstm/rnnwords.txt \\\n"
+        "              data/tensorflow_lstm/rnnlm ark:in.lats ark:out.lats\n\n"
+        " e.g.: lattice-lmrescore-tf-rnnlm-pruned --lm-scale=0.5 data/tensorflow_lstm/unkcounts.txt \\\n"
+        "              data/test_fg/G.carpa data/lang/words.txt data/tensorflow_lstm/rnnwords.txt \\\n"
+        "              data/tensorflow_lstm/rnnlm ark:in.lats ark:out.lats\n";
+
+    ParseOptions po(usage);
+    int32 max_ngram_order = 3;
+    BaseFloat lm_scale = 0.5;
+    BaseFloat acoustic_scale = 0.1;
+    bool use_carpa = false;
+
+    po.Register("lm-scale", &lm_scale, "Scaling factor for <lm-to-add>; its negative "
+                "will be applied to <lm-to-subtract>.");
+    po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic "
+                "probabilities (e.g. 0.1 for non-chain systems); important because "
+                "of its effect on pruning.");
+    po.Register("max-ngram-order", &max_ngram_order,
+        "If positive, allow RNNLM histories longer than this to be identified "
+        "with each other for rescoring purposes (an approximation that "
+        "saves time and reduces output lattice size).");
+    po.Register("use-const-arpa", &use_carpa, "If true, read the old-LM file "
+                "as a const-arpa file as opposed to an FST file");
+
+    KaldiTfRnnlmWrapperOpts opts;
+    ComposeLatticePrunedOptions compose_opts;
+    opts.Register(&po);
+    compose_opts.Register(&po);
+
+    po.Read(argc, argv);
+
+    if (po.NumArgs() != 7 && po.NumArgs() != 6) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    std::string lm_to_subtract_rxfilename, lats_rspecifier, rnn_word_list,
+      word_symbols_rxfilename, rnnlm_rxfilename, lats_wspecifier, unk_prob_file;
+    if (po.NumArgs() == 6) {
+      lm_to_subtract_rxfilename = po.GetArg(1),
+      word_symbols_rxfilename = po.GetArg(2);
+      rnn_word_list = po.GetArg(3);
+      rnnlm_rxfilename = po.GetArg(4);
+      lats_rspecifier = po.GetArg(5);
+      lats_wspecifier = po.GetArg(6);
+    } else {
+      lm_to_subtract_rxfilename = po.GetArg(1),
+      word_symbols_rxfilename = po.GetArg(2);
+      unk_prob_file = po.GetArg(3);
+      rnn_word_list = po.GetArg(4);
+      rnnlm_rxfilename = po.GetArg(5);
+      lats_rspecifier = po.GetArg(6);
+      lats_wspecifier = po.GetArg(7);
+    }
+
+    // for G.fst
+    fst::ScaleDeterministicOnDemandFst *lm_to_subtract_det_scale = NULL;
+    fst::BackoffDeterministicOnDemandFst<StdArc> *lm_to_subtract_det_backoff = NULL;
+    VectorFst<StdArc> *lm_to_subtract_fst = NULL;
+
+    // for G.carpa
+    ConstArpaLm* const_arpa = NULL;
+    fst::DeterministicOnDemandFst<StdArc> *carpa_lm_to_subtract_fst = NULL;
+
+    KALDI_LOG << "Reading old LMs...";
+    if (use_carpa) {
+      const_arpa = new ConstArpaLm();
+      ReadKaldiObject(lm_to_subtract_rxfilename, const_arpa);
+      carpa_lm_to_subtract_fst = new ConstArpaLmDeterministicFst(*const_arpa);
+      lm_to_subtract_det_scale
+        = new fst::ScaleDeterministicOnDemandFst(-lm_scale,
+                                                 carpa_lm_to_subtract_fst);
+    } else {
+      lm_to_subtract_fst = fst::ReadAndPrepareLmFst(
+          lm_to_subtract_rxfilename);
+      lm_to_subtract_det_backoff =
+        new fst::BackoffDeterministicOnDemandFst<StdArc>(*lm_to_subtract_fst);
+      lm_to_subtract_det_scale =
+           new fst::ScaleDeterministicOnDemandFst(-lm_scale,
+                                                  lm_to_subtract_det_backoff);
+    }
+
+    // Reads the TF language model.
+    KaldiTfRnnlmWrapper rnnlm(opts, rnn_word_list, word_symbols_rxfilename,
+                                unk_prob_file, rnnlm_rxfilename);
+
+    // Reads and writes as compact lattice.
+    SequentialCompactLatticeReader compact_lattice_reader(lats_rspecifier);
+    CompactLatticeWriter compact_lattice_writer(lats_wspecifier);
+
+    int32 n_done = 0, n_fail = 0;
+
+    TfRnnlmDeterministicFst* lm_to_add_orig = 
+      new TfRnnlmDeterministicFst(max_ngram_order, &rnnlm);
+
+    for (; !compact_lattice_reader.Done(); compact_lattice_reader.Next()) {
+      fst::DeterministicOnDemandFst<StdArc> *lm_to_add =
+         new fst::ScaleDeterministicOnDemandFst(lm_scale, lm_to_add_orig);
+
+      std::string key = compact_lattice_reader.Key();
+      CompactLattice clat = compact_lattice_reader.Value();
+      compact_lattice_reader.FreeCurrent();
+
+      // Before composing with the LM FST, we scale the lattice weights
+      // by the inverse of "lm_scale".  We'll later scale by "lm_scale".
+      // We do it this way so we can determinize and it will give the
+      // right effect (taking the "best path" through the LM) regardless
+      // of the sign of lm_scale.
+      if (acoustic_scale != 1.0) {
+        fst::ScaleLattice(fst::AcousticLatticeScale(acoustic_scale), &clat);
+      }
+      TopSortCompactLatticeIfNeeded(&clat);
+
+      fst::ComposeDeterministicOnDemandFst<StdArc> combined_lms(
+          lm_to_subtract_det_scale, lm_to_add);
+
+      // Composes lattice with language model.
+      CompactLattice composed_clat;
+      ComposeCompactLatticePruned(compose_opts, clat,
+                                  &combined_lms, &composed_clat);
+      lm_to_add_orig->Clear();
+
+      if (composed_clat.NumStates() == 0) {
+        // Something went wrong.  A warning will already have been printed.
+        n_fail++;
+      } else {
+        if (acoustic_scale != 1.0) {
+          if (acoustic_scale == 0.0)
+            KALDI_ERR << "Acoustic scale cannot be zero.";
+          fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale),
+                            &composed_clat);
+        }
+        compact_lattice_writer.Write(key, composed_clat);
+        n_done++;
+      }
+      delete lm_to_add;
+    }
+    delete lm_to_subtract_fst;
+    delete lm_to_add_orig;
+    delete lm_to_subtract_det_backoff;
+    delete lm_to_subtract_det_scale;
+
+    delete const_arpa;
+    delete carpa_lm_to_subtract_fst;
+
+    KALDI_LOG << "Done " << n_done << " lattices, failed for " << n_fail;
+    return (n_done != 0 ? 0 : 1);
+  } catch(const std::exception &e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}
diff --git a/src/tfrnnlmbin/lattice-lmrescore-tf-rnnlm.cc b/src/tfrnnlmbin/lattice-lmrescore-tf-rnnlm.cc
index 26ad4ab95ff..178674a3a8e 100644
--- a/src/tfrnnlmbin/lattice-lmrescore-tf-rnnlm.cc
+++ b/src/tfrnnlmbin/lattice-lmrescore-tf-rnnlm.cc
@@ -35,17 +35,18 @@ int main(int argc, char *argv[]) {
     const char *usage =
         "Rescores lattice with rnnlm that is trained with TensorFlow.\n"
         "An example script for training and rescoring with the TensorFlow\n"
-        "RNNLM is at egs/ami/s5/local/tfrnnlm/run_lstm.sh\n"
+        "RNNLM is at egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh\n"
         "\n"
         "Usage: lattice-lmrescore-tf-rnnlm [options] [unk-file] <rnnlm-wordlist> \\\n"
         "             <word-symbol-table-rxfilename> <lattice-rspecifier> \\\n"
         "             <rnnlm-rxfilename> <lattice-wspecifier>\n"
-        " e.g.: lattice-lmrescore-tf-rnnlm --lm-scale=-1.0 unkcounts.txt rnnwords.txt \\\n"
-        "              words.txt ark:in.lats rnnlm ark:out.lats\n";
+        " e.g.: lattice-lmrescore-tf-rnnlm --lm-scale=0.5 "
+        "    data/tensorflow_lstm/unkcounts.txt data/tensorflow_lstm/rnnwords.txt \\\n"
+        "    data/lang/words.txt ark:in.lats data/tensorflow_lstm/rnnlm ark:out.lats\n";
 
     ParseOptions po(usage);
     int32 max_ngram_order = 3;
-    BaseFloat lm_scale = 1.0;
+    BaseFloat lm_scale = 0.5;
 
     po.Register("lm-scale", &lm_scale, "Scaling factor for language model "
                 "costs");