From 0d839b0bf66ba09b930328b2fb313343c4d8b18e Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Mon, 18 Sep 2017 16:32:33 -0400
Subject: [PATCH 01/23] draft

---
 src/latbin/lattice-lmrescore-nnet3-rnnlm.cc   | 147 ++++++++++++++
 src/rnnlm/Makefile                            |   2 +-
 .../kaldi-rnnlm-decodable-simple-looped.cc    | 182 +++++++++++++++++
 .../kaldi-rnnlm-decodable-simple-looped.h     | 187 ++++++++++++++++++
 4 files changed, 517 insertions(+), 1 deletion(-)
 create mode 100644 src/latbin/lattice-lmrescore-nnet3-rnnlm.cc
 create mode 100644 src/rnnlm/kaldi-rnnlm-decodable-simple-looped.cc
 create mode 100644 src/rnnlm/kaldi-rnnlm-decodable-simple-looped.h
diff --git a/src/latbin/lattice-lmrescore-nnet3-rnnlm.cc b/src/latbin/lattice-lmrescore-nnet3-rnnlm.cc
new file mode 100644
index 00000000000..26754d26629
--- /dev/null
+++ b/src/latbin/lattice-lmrescore-nnet3-rnnlm.cc
@@ -0,0 +1,147 @@
+// latbin/lattice-lmrescore-nnet3-rnnlm.cc
+
+// Copyright 2017 Johns Hopkins University (author: Daniel Povey)
+//                Yiming Wang
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "base/kaldi-common.h"
+#include "fstext/fstext-lib.h"
+#include "lat/kaldi-lattice.h"
+#include "lat/lattice-functions.h"
+#include "rnnlm/kaldi-rnnlm-rescoring.h"
+#include "util/common-utils.h"
+
+int main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+    typedef kaldi::int32 int32;
+    typedef kaldi::int64 int64;
+
+    const char *usage =
+        "Rescores lattice with rnnlm. The LM will be wrapped into the\n"
+        "DeterministicOnDemandFst interface and the rescoring is done by\n"
+        "composing with the wrapped LM using a special type of composition\n"
+        "algorithm. Determinization will be applied on the composed lattice.\n"
+        "\n"
+        "Usage: lattice-lmrescore-nnet3-rnnlm [options] <rnnlm-wordlist> \\\n"
+        "             <word-symbol-table-rxfilename> <lattice-rspecifier> \\\n"
+        "             <rnnlm-rxfilename> <lattice-wspecifier>\n"
+        " e.g.: lattice-lmrescore-nnet3-rnnlm --lm-scale=-1.0 words.txt \\\n"
+        "                     ark:in.lats rnnlm ark:out.lats\n";
+
+    ParseOptions po(usage);
+    int32 max_ngram_order = 3;
+    BaseFloat lm_scale = 1.0;
+
+    po.Register("lm-scale", &lm_scale, "Scaling factor for language model "
+                "costs; frequently 1.0 or -1.0");
+    po.Register("max-ngram-order", &max_ngram_order, "If positive, limit the "
+                "rnnlm context to the given number, -1 means we are not going "
+                "to limit it.");
+
+    po.Read(argc, argv);
+
+    if (po.NumArgs() != 4 && po.NumArgs() != 5) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    std::string lats_rspecifier, rnn_wordlist,
+        word_symbols_rxfilename, rnnlm_rxfilename, lats_wspecifier;
+    KALDI_ASSERT (po.NumArgs() == 5);
+
+    rnn_wordlist = po.GetArg(1);
+    word_symbols_rxfilename = po.GetArg(2);
+    lats_rspecifier = po.GetArg(3);
+    rnnlm_rxfilename = po.GetArg(4);
+    lats_wspecifier = po.GetArg(5);
+
+    // Reads the language model.
+    kaldi::nnet3::Nnet rnnlm;
+    ReadKaldiObject(rnnlm_rxfilename, &rnnlm);
+
+    if (!IsSimpleNnet(rnnlm))
+      KALDI_ERR << "Input RNNLM in " << rnnlm_rxfilename
+                << " is not the type of neural net we were looking for; "
+          "failed IsSimpleNnet().";
+
+    CuMatrix<BaseFloat> word_embedding_mat;
+    ReadKaldiObject(word_embedding_rxfilename, &word_embedding_mat);
+
+    const nnet3::DecodableRnnlmSimpleLoopedComputationOptions opts;
+    const nnet3::DecodableRnnlmSimpleLoopedInfo info(opts, rnnlm, word_embedding_mat);
+
+    // Reads and writes as compact lattice.
+    SequentialCompactLatticeReader compact_lattice_reader(lats_rspecifier);
+    CompactLatticeWriter compact_lattice_writer(lats_wspecifier);
+
+    int32 n_done = 0, n_fail = 0;
+    for (; !compact_lattice_reader.Done(); compact_lattice_reader.Next()) {
+      std::string key = compact_lattice_reader.Key();
+      CompactLattice clat = compact_lattice_reader.Value();
+      compact_lattice_reader.FreeCurrent();
+
+      if (lm_scale != 0.0) {
+        // Before composing with the LM FST, we scale the lattice weights
+        // by the inverse of "lm_scale".  We'll later scale by "lm_scale".
+        // We do it this way so we can determinize and it will give the
+        // right effect (taking the "best path" through the LM) regardless
+        // of the sign of lm_scale.
+        fst::ScaleLattice(fst::GraphLatticeScale(1.0 / lm_scale), &clat);
+        ArcSort(&clat, fst::OLabelCompare<CompactLatticeArc>());
+
+        // Wraps the rnnlm into FST. We re-create it for each lattice to prevent
+        // memory usage increasing with time.
+        nnet3::KaldiRnnlmDeterministicFst rnnlm_fst(max_ngram_order,
+                                                    rnn_wordlist,
+                                                    word_symbols_rxfilename,
+                                                    info);
+
+        // Composes lattice with language model.
+        CompactLattice composed_clat;
+        ComposeCompactLatticeDeterministic(clat, &rnnlm_fst, &composed_clat);
+
+        // Determinizes the composed lattice.
+        Lattice composed_lat;
+        ConvertLattice(composed_clat, &composed_lat);
+        Invert(&composed_lat);
+        CompactLattice determinized_clat;
+        DeterminizeLattice(composed_lat, &determinized_clat);
+        fst::ScaleLattice(fst::GraphLatticeScale(lm_scale), &determinized_clat);
+        if (determinized_clat.Start() == fst::kNoStateId) {
+          KALDI_WARN << "Empty lattice for utterance " << key
+              << " (incompatible LM?)";
+          n_fail++;
+        } else {
+          compact_lattice_writer.Write(key, determinized_clat);
+          n_done++;
+        }
+      } else {
+        // Zero scale so nothing to do.
+        n_done++;
+        compact_lattice_writer.Write(key, clat);
+      }
+    }
+
+    KALDI_LOG << "Done " << n_done << " lattices, failed for " << n_fail;
+    return (n_done != 0 ? 0 : 1);
+  } catch(const std::exception &e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}
diff --git a/src/rnnlm/Makefile b/src/rnnlm/Makefile
index ac1ca92f8b3..1e57d2f77ad 100644
--- a/src/rnnlm/Makefile
+++ b/src/rnnlm/Makefile
@@ -10,7 +10,7 @@ TESTFILES = sampler-test sampling-lm-test rnnlm-example-test
 OBJFILES = sampler.o rnnlm-example.o rnnlm-example-utils.o \
            rnnlm-core-training.o rnnlm-embedding-training.o rnnlm-core-compute.o \
            rnnlm-utils.o rnnlm-training.o rnnlm-test-utils.o sampling-lm-estimate.o \
-           sampling-lm.o
+           sampling-lm.o kaldi-rnnlm-decodable-simple-looped.o
 
 LIBNAME = kaldi-rnnlm
 
diff --git a/src/rnnlm/kaldi-rnnlm-decodable-simple-looped.cc b/src/rnnlm/kaldi-rnnlm-decodable-simple-looped.cc
new file mode 100644
index 00000000000..ba298e417d3
--- /dev/null
+++ b/src/rnnlm/kaldi-rnnlm-decodable-simple-looped.cc
@@ -0,0 +1,182 @@
+// rnnlm/kaldi-rnnlm-decodable-simple-looped.cc
+
+// Copyright      2017  Johns Hopkins University (author: Daniel Povey)
+//                2017  Yiming Wang
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#include "rnnlm/kaldi-rnnlm-decodable-simple-looped.h"
+#include "nnet3/nnet-utils.h"
+#include "nnet3/nnet-compile-looped.h"
+
+namespace kaldi {
+namespace nnet3 {
+
+
+DecodableRnnlmSimpleLoopedInfo::DecodableRnnlmSimpleLoopedInfo(
+    const DecodableRnnlmSimpleLoopedComputationOptions &opts,
+    const kaldi::nnet3::Nnet &rnnlm,
+    const CuMatrix<BaseFloat> &word_embedding_mat):
+    opts(opts), rnnlm(rnnlm), word_embedding_mat(word_embedding_mat) {
+  Init(opts, rnnlm, word_embedding_mat);
+}
+
+void DecodableRnnlmSimpleLoopedInfo::Init(
+    const DecodableRnnlmSimpleLoopedComputationOptions &opts,
+    const kaldi::nnet3::Nnet &rnnlm,
+    const CuMatrix<BaseFloat> &word_embedding_mat) {
+  opts.Check();
+  KALDI_ASSERT(IsSimpleNnet(rnnlm));
+  int32 left_context, right_context;
+  ComputeSimpleNnetContext(rnnlm, &left_context, &right_context);
+  frames_left_context = opts.extra_left_context_initial + left_context;
+  frames_right_context = right_context;
+  int32 frame_subsampling_factor = 1;
+  frames_per_chunk = GetChunkSize(rnnlm, frame_subsampling_factor,
+                                  opts.frames_per_chunk);
+  KALDI_ASSERT(frames_per_chunk == opts.frames_per_chunk);
+  nnet_output_dim = rnnlm.OutputDim("output");
+  KALDI_ASSERT(nnet_output_dim > 0);
+
+  int32 ivector_period = frames_per_chunk;
+  int32 extra_right_context = 0;
+  int32 num_sequences = 1;  // we're processing one utterance at a time.
+  CreateLoopedComputationRequestSimple(rnnlm, frames_per_chunk,
+                                       frame_subsampling_factor,
+                                       ivector_period,
+                                       opts.extra_left_context_initial,
+                                       extra_right_context,
+                                       num_sequences,
+                                       &request1, &request2, &request3);
+
+  CompileLooped(rnnlm, opts.optimize_config, request1, request2,
+                request3, &computation);
+  computation.ComputeCudaIndexes();
+  if (GetVerboseLevel() >= 3) {
+    KALDI_VLOG(3) << "Computation is:";
+    computation.Print(std::cerr, rnnlm);
+  }
+}
+
+DecodableRnnlmSimpleLooped::DecodableRnnlmSimpleLooped(
+    const DecodableRnnlmSimpleLoopedInfo &info) :
+    info_(info),
+    computer_(info_.opts.compute_config, info_.computation,
+              info_.rnnlm, NULL),  // NULL is 'nnet_to_update'
+    // since everytime we provide one chunk to the decodable object, the size of
+    // feats_ == frames_per_chunk
+    feats_(info_.frames_per_chunk,
+           info_.word_embedding_mat.NumRows()), // or Cols()? TODO(hxu)
+    current_log_post_offset_(-1)
+{
+  num_frames_ = feats_.NumRows();
+}
+
+void DecodableRnnlmSimpleLooped::TakeFeatures(
+    const std::vector<int32> &word_indexes) {
+  KALDI_ASSERT(word_indexes.size() == num_frames_);
+  std::vector<std::vector<std::pair<MatrixIndexT, BaseFloat> > >
+      pairs(word_indexes.size());
+  for (int32 i = 0; i < word_indexes.size(); i++) {
+    std::pair<MatrixIndexT, BaseFloat> one_hot_index(word_indexes[i], 1.0);
+    std::vector<std::pair<MatrixIndexT, BaseFloat> > row(1, one_hot_index);
+    pairs[i] = row;
+  }
+  SparseMatrix<BaseFloat> feats_temp(feats_.NumCols(), pairs);
+  feats_.Swap(&feats_temp);
+  // resets offset so that AdvanceChunk() would be called in GetOutput() and
+  // GetNnetOutputForFrame() after taking new features
+  current_log_post_offset_ = -1;
+}
+
+void DecodableRnnlmSimpleLooped::GetNnetOutputForFrame(
+    int32 frame, VectorBase<BaseFloat> *output) {
+  KALDI_ASSERT(frame >= 0 && frame < feats_.NumRows());
+  if (frame >= current_log_post_offset_ + current_nnet_output_.NumRows())
+    AdvanceChunk();
+  output->CopyFromVec(current_nnet_output_.Row(frame -
+                                               current_log_post_offset_));
+}
+
+BaseFloat DecodableRnnlmSimpleLooped::GetOutput(int32 frame, int32 word_index) {
+  KALDI_ASSERT(frame >= 0 && frame < feats_.NumRows());
+  if (frame >= current_log_post_offset_ + current_nnet_output_.NumRows())
+    AdvanceChunk();
+
+//  int32 embedding_dim = info_.word_embedding_mat.NumCols();
+//  int32 num_words = info_.word_embedding_mat.NumRows();
+
+  const CuMatrix<BaseFloat> &word_embedding_mat = info_.word_embedding_mat;
+
+  CuMatrix<BaseFloat> current_nnet_output_gpu;
+  current_nnet_output_gpu.Swap(&current_nnet_output_);
+  const CuSubVector<BaseFloat> hidden(current_nnet_output_gpu,
+                                      frame - current_log_post_offset_);
+  BaseFloat log_prob =
+    VecVec(hidden, word_embedding_mat.Row(word_index));
+//      output_layer->ComputeLogprobOfWordGivenHistory(hidden, word_index);
+  // swap the pointer back so that this function can be called multiple times
+  // with the same returned value before taking next new feats
+  current_nnet_output_.Swap(&current_nnet_output_gpu);
+  return log_prob;
+}
+
+void DecodableRnnlmSimpleLooped::AdvanceChunk() {
+  int32 begin_input_frame, end_input_frame;
+  begin_input_frame = -info_.frames_left_context;
+  // note: end is last plus one.
+  end_input_frame = info_.frames_per_chunk + info_.frames_right_context;
+  // currently there is no left/right context and frames_per_chunk == 1
+  KALDI_ASSERT(begin_input_frame == 0 && end_input_frame == 1);
+
+  SparseMatrix<BaseFloat> feats_chunk(end_input_frame - begin_input_frame,
+                                      feats_.NumCols());
+  int32 num_features = feats_.NumRows();
+  for (int32 r = begin_input_frame; r < end_input_frame; r++) {
+    int32 input_frame = r;
+    if (input_frame < 0) input_frame = 0;
+    if (input_frame >= num_features) input_frame = num_features - 1;
+    feats_chunk.SetRow(r - begin_input_frame, feats_.Row(input_frame));
+  }
+
+//  const rnnlm::LmInputComponent* input_layer = info_.lm_nnet.InputLayer();
+//  CuMatrix<BaseFloat> new_input(feats_chunk.NumRows(), input_layer->OutputDim());
+//  input_layer->Propagate(feats_chunk, &new_input);
+
+  CuMatrix<BaseFloat> input_embeddings(1, info_.word_embedding_mat.NumRows(), kUndefined);
+  input_embeddings.Row(0).CopyFromVec(info_.word_embedding_mat.Row(feats_chunk.Row(0).Sum()));
+  computer_.AcceptInput("input", &input_embeddings);
+
+  computer_.Run();
+
+  {
+    // Note: here GetOutput() is used instead of GetOutputDestructive(), since
+    // here we have recurrence that goes directly from the output, and the call
+    // to GetOutputDestructive() would cause a crash on the next chunk.
+    CuMatrix<BaseFloat> output(computer_.GetOutput("output"));
+
+    current_nnet_output_.Resize(0, 0);
+    current_nnet_output_.Swap(&output);
+  }
+  KALDI_ASSERT(current_nnet_output_.NumRows() == info_.frames_per_chunk &&
+               current_nnet_output_.NumCols() == info_.nnet_output_dim);
+
+  current_log_post_offset_ = 0;
+}
+
+
+} // namespace nnet3
+} // namespace kaldi
diff --git a/src/rnnlm/kaldi-rnnlm-decodable-simple-looped.h b/src/rnnlm/kaldi-rnnlm-decodable-simple-looped.h
new file mode 100644
index 00000000000..40259999a17
--- /dev/null
+++ b/src/rnnlm/kaldi-rnnlm-decodable-simple-looped.h
@@ -0,0 +1,187 @@
+// rnnlm/kaldi-rnnlm-decodable-simple-looped.h
+
+// Copyright 2017 Johns Hopkins University (author: Daniel Povey)
+//           2017 Yiming Wang
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_RNNLM_DECODABLE_SIMPLE_LOOPED_H_
+#define KALDI_RNNLM_DECODABLE_SIMPLE_LOOPED_H_
+
+#include <vector>
+#include "base/kaldi-common.h"
+#include "gmm/am-diag-gmm.h"
+#include "hmm/transition-model.h"
+#include "itf/decodable-itf.h"
+#include "nnet3/nnet-optimize.h"
+#include "nnet3/nnet-compute.h"
+#include "nnet3/am-nnet-simple.h"
+#include "rnnlm/rnnlm-core-compute.h"
+
+namespace kaldi {
+namespace nnet3 {
+
+// See also nnet-am-decodable-simple.h, which is a decodable object that's based
+// on breaking up the input into fixed chunks.  The decodable object defined here is based on
+// 'looped' computations, which naturally handle infinite left-context (but are
+// only ideal for systems that have only recurrence in the forward direction,
+// i.e. not BLSTMs... because there isn't a natural way to enforce extra right
+// context for each chunk.)
+
+
+// Note: the 'simple' in the name means it applies to networks for which
+// IsSimpleNnet(nnet) would return true.  'looped' means we use looped
+// computations, with a kGotoLabel statement at the end of it.
+struct DecodableRnnlmSimpleLoopedComputationOptions {
+  int32 extra_left_context_initial;
+  int32 frames_per_chunk;
+  bool debug_computation;
+  NnetOptimizeOptions optimize_config;
+  NnetComputeOptions compute_config;
+  DecodableRnnlmSimpleLoopedComputationOptions():
+      extra_left_context_initial(0),
+      frames_per_chunk(1),
+      debug_computation(false) { }
+
+  void Check() const {
+    KALDI_ASSERT(extra_left_context_initial >= 0 && frames_per_chunk > 0);
+  }
+
+  void Register(OptionsItf *opts) {
+    opts->Register("extra-left-context-initial", &extra_left_context_initial,
+                   "Extra left context to use at the first frame of an utterance (note: "
+                   "this will just consist of repeats of the first frame, and should not "
+                   "usually be necessary.");
+    opts->Register("frames-per-chunk", &frames_per_chunk,
+                   "Number of frames in each chunk that is separately evaluated "
+                   "by the neural net.");
+    opts->Register("debug-computation", &debug_computation, "If true, turn on "
+                   "debug for the actual computation (very verbose!)");
+
+    // register the optimization options with the prefix "optimization".
+    ParseOptions optimization_opts("optimization", opts);
+    optimize_config.Register(&optimization_opts);
+
+    // register the compute options with the prefix "computation".
+    ParseOptions compute_opts("computation", opts);
+    compute_config.Register(&compute_opts);
+  }
+};
+
+
+/**
+   When you instantiate class DecodableNnetSimpleLooped, you should give it
+   a const reference to this class, that has been previously initialized.
+ */
+class DecodableRnnlmSimpleLoopedInfo  {
+ public:
+  DecodableRnnlmSimpleLoopedInfo(
+      const DecodableRnnlmSimpleLoopedComputationOptions &opts,
+      const kaldi::nnet3::Nnet &rnnlm,
+      const CuMatrix<BaseFloat> &word_embedding_mat);
+
+  void Init(const DecodableRnnlmSimpleLoopedComputationOptions &opts,
+            const kaldi::nnet3::Nnet &rnnlm,
+            const CuMatrix<BaseFloat> &word_embedding_mat);
+
+  const DecodableRnnlmSimpleLoopedComputationOptions &opts;
+
+  const kaldi::nnet3::Nnet &rnnlm;
+  const CuMatrix<BaseFloat> &word_embedding_mat;
+
+  // frames_left_context equals the model left context plus the value of the
+  // --extra-left-context-initial option.
+  int32 frames_left_context;
+  // frames_right_context is the same as the right-context of the model.
+  int32 frames_right_context;
+  // The frames_per_chunk equals the number of input frames we need for each
+  // chunk (except for the first chunk).
+  int32 frames_per_chunk;
+
+  // The output dimension of the nnet neural network (not the final output).
+  int32 nnet_output_dim;
+
+  // The 3 computation requests that are used to create the looped
+  // computation are stored in the class, as we need them to work out
+  // exactly shich iVectors are needed.
+  ComputationRequest request1, request2, request3;
+
+  // The compiled, 'looped' computation.
+  NnetComputation computation;
+};
+
+/*
+  This class handles the neural net computation; it's mostly accessed
+  via other wrapper classes.
+
+  It accept just input features */
+class DecodableRnnlmSimpleLooped {
+ public:
+  /**
+     This constructor takes features as input.
+     Note: it stores references to all arguments to the constructor, so don't
+     delete them till this goes out of scope.
+
+     @param [in] info   This helper class contains all the static pre-computed information
+                        this class needs, and contains a pointer to the neural net.
+     @param [in] feats  The input feature matrix.
+  */
+  DecodableRnnlmSimpleLooped(const DecodableRnnlmSimpleLoopedInfo &info);
+
+  // returns the number of frames of likelihoods.  The same as feats_.NumRows()
+  inline int32 NumFrames() const { return num_frames_; }
+
+  inline int32 NnetOutputDim() const { return info_.nnet_output_dim; }
+
+  // Gets the nnet's output for a particular frame, with 0 <= frame < NumFrames().
+  // 'output' must be correctly sized (with dimension NnetOutputDim()).  Note:
+  // you're expected to call this, and GetOutput(), in an order of increasing
+  // frames.  If you deviate from this, one of these calls may crash.
+  void GetNnetOutputForFrame(int32 frame, VectorBase<BaseFloat> *output);
+
+  // Updates feats_ with the new incoming word specified in word_indexes
+  void TakeFeatures(const std::vector<int32> &word_indexes);
+
+  // Gets the output for a particular frame and word_index, with
+  // 0 <= frame < NumFrames().
+  BaseFloat GetOutput(int32 frame, int32 word_index);
+
+ private:
+  // This function does the computation for the next chunk.
+  void AdvanceChunk();
+
+  const DecodableRnnlmSimpleLoopedInfo &info_;
+
+  NnetComputer computer_;
+
+  SparseMatrix<BaseFloat> feats_;
+  
+  int32 num_frames_;
+
+  // The current nnet's output that we got from the last time we
+  // ran the computation.
+  Matrix<BaseFloat> current_nnet_output_;
+
+  // The time-offset of the current log-posteriors, equals
+  // -1 when initialized, or 0 once AdvanceChunk() was called
+  int32 current_log_post_offset_;
+};
+
+
+} // namespace nnet3
+} // namespace kaldi
+
+#endif  // KALDI_RNNLM_DECODABLE_SIMPLE_LOOPED_H_

From 699c9566ce7c70eeb588be4a30418ed1998cb3a5 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Wed, 20 Sep 2017 17:18:17 -0400
Subject: [PATCH 02/23] lattice-rescoring draft finished

---
 src/latbin/Makefile                           |  13 +-
 ...lm.cc => lattice-lmrescore-kaldi-rnnlm.cc} |  10 +-
 src/rnnlm/Makefile                            |   2 +-
 ...ed.cc => rnnlm-decodable-simple-looped.cc} |   4 +-
 ...oped.h => rnnlm-decodable-simple-looped.h} |   0
 src/rnnlm/rnnlm-lattice-rescoring.cc          | 161 ++++++++++++++++++
 src/rnnlm/rnnlm-lattice-rescoring.h           |  88 ++++++++++
 7 files changed, 267 insertions(+), 11 deletions(-)
 rename src/latbin/{lattice-lmrescore-nnet3-rnnlm.cc => lattice-lmrescore-kaldi-rnnlm.cc} (95%)
 rename src/rnnlm/{kaldi-rnnlm-decodable-simple-looped.cc => rnnlm-decodable-simple-looped.cc} (98%)
 rename src/rnnlm/{kaldi-rnnlm-decodable-simple-looped.h => rnnlm-decodable-simple-looped.h} (100%)
 create mode 100644 src/rnnlm/rnnlm-lattice-rescoring.cc
 create mode 100644 src/rnnlm/rnnlm-lattice-rescoring.h

diff --git a/src/latbin/Makefile b/src/latbin/Makefile
index 43210c0d3e0..2a21d084f1e 100644
--- a/src/latbin/Makefile
+++ b/src/latbin/Makefile
@@ -4,6 +4,9 @@ all:
 EXTRA_CXXFLAGS = -Wno-sign-compare
 include ../kaldi.mk
 
+LDFLAGS += $(CUDA_LDFLAGS)
+LDLIBS += $(CUDA_LDLIBS)
+
 BINFILES = lattice-best-path lattice-prune lattice-equivalent lattice-to-nbest \
            lattice-lmrescore lattice-scale lattice-union lattice-to-post \
            lattice-determinize lattice-oracle lattice-rmali \
@@ -21,17 +24,19 @@ BINFILES = lattice-best-path lattice-prune lattice-equivalent lattice-to-nbest \
            lattice-confidence lattice-determinize-phone-pruned \
            lattice-determinize-phone-pruned-parallel lattice-expand-ngram \
            lattice-lmrescore-const-arpa lattice-lmrescore-rnnlm nbest-to-prons \
-           lattice-arc-post lattice-determinize-non-compact
+           lattice-arc-post lattice-determinize-non-compact lattice-lmrescore-kaldi-rnnlm
 
 OBJFILES =
 
+cuda-compiled.o: ../kaldi.mk
 
 
 TESTFILES =
 
-ADDLIBS = ../lat/kaldi-lat.a ../lm/kaldi-lm.a ../fstext/kaldi-fstext.a \
-          ../hmm/kaldi-hmm.a ../tree/kaldi-tree.a ../util/kaldi-util.a \
-          ../matrix/kaldi-matrix.a \
+ADDLIBS = ../rnnlm/kaldi-rnnlm.a ../lat/kaldi-lat.a ../nnet3/kaldi-nnet3.a ../lm/kaldi-lm.a \
+          ../fstext/kaldi-fstext.a ../hmm/kaldi-hmm.a ../tree/kaldi-tree.a \
+          ../util/kaldi-util.a \
+          ../cudamatrix/kaldi-cudamatrix.a ../matrix/kaldi-matrix.a \
           ../base/kaldi-base.a
 
 include ../makefiles/default_rules.mk
diff --git a/src/latbin/lattice-lmrescore-nnet3-rnnlm.cc b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
similarity index 95%
rename from src/latbin/lattice-lmrescore-nnet3-rnnlm.cc
rename to src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
index 26754d26629..0ff8789608e 100644
--- a/src/latbin/lattice-lmrescore-nnet3-rnnlm.cc
+++ b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
@@ -23,8 +23,9 @@
 #include "fstext/fstext-lib.h"
 #include "lat/kaldi-lattice.h"
 #include "lat/lattice-functions.h"
-#include "rnnlm/kaldi-rnnlm-rescoring.h"
+#include "rnnlm/rnnlm-lattice-rescoring.h"
 #include "util/common-utils.h"
+#include "nnet3/nnet-utils.h"
 
 int main(int argc, char *argv[]) {
   try {
@@ -61,15 +62,16 @@ int main(int argc, char *argv[]) {
       exit(1);
     }
 
-    std::string lats_rspecifier, rnn_wordlist,
+    std::string lats_rspecifier, rnn_wordlist, word_embedding_rxfilename,
         word_symbols_rxfilename, rnnlm_rxfilename, lats_wspecifier;
-    KALDI_ASSERT (po.NumArgs() == 5);
+    KALDI_ASSERT (po.NumArgs() == 6);
 
     rnn_wordlist = po.GetArg(1);
     word_symbols_rxfilename = po.GetArg(2);
     lats_rspecifier = po.GetArg(3);
     rnnlm_rxfilename = po.GetArg(4);
-    lats_wspecifier = po.GetArg(5);
+    word_embedding_rxfilename = po.GetArg(5);
+    lats_wspecifier = po.GetArg(6);
 
     // Reads the language model.
     kaldi::nnet3::Nnet rnnlm;
diff --git a/src/rnnlm/Makefile b/src/rnnlm/Makefile
index 1e57d2f77ad..04228a08201 100644
--- a/src/rnnlm/Makefile
+++ b/src/rnnlm/Makefile
@@ -10,7 +10,7 @@ TESTFILES = sampler-test sampling-lm-test rnnlm-example-test
 OBJFILES = sampler.o rnnlm-example.o rnnlm-example-utils.o \
            rnnlm-core-training.o rnnlm-embedding-training.o rnnlm-core-compute.o \
            rnnlm-utils.o rnnlm-training.o rnnlm-test-utils.o sampling-lm-estimate.o \
-           sampling-lm.o kaldi-rnnlm-decodable-simple-looped.o
+           sampling-lm.o rnnlm-decodable-simple-looped.o rnnlm-lattice-rescoring.o
 
 LIBNAME = kaldi-rnnlm
 
diff --git a/src/rnnlm/kaldi-rnnlm-decodable-simple-looped.cc b/src/rnnlm/rnnlm-decodable-simple-looped.cc
similarity index 98%
rename from src/rnnlm/kaldi-rnnlm-decodable-simple-looped.cc
rename to src/rnnlm/rnnlm-decodable-simple-looped.cc
index ba298e417d3..c6de5a549a3 100644
--- a/src/rnnlm/kaldi-rnnlm-decodable-simple-looped.cc
+++ b/src/rnnlm/rnnlm-decodable-simple-looped.cc
@@ -18,7 +18,7 @@
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.
 
-#include "rnnlm/kaldi-rnnlm-decodable-simple-looped.h"
+#include "rnnlm/rnnlm-decodable-simple-looped.h"
 #include "nnet3/nnet-utils.h"
 #include "nnet3/nnet-compile-looped.h"
 
@@ -79,7 +79,7 @@ DecodableRnnlmSimpleLooped::DecodableRnnlmSimpleLooped(
     // since everytime we provide one chunk to the decodable object, the size of
     // feats_ == frames_per_chunk
     feats_(info_.frames_per_chunk,
-           info_.word_embedding_mat.NumRows()), // or Cols()? TODO(hxu)
+           info_.word_embedding_mat.NumRows()), // or Cols()? TODO(hxu), should be vocab size
     current_log_post_offset_(-1)
 {
   num_frames_ = feats_.NumRows();
diff --git a/src/rnnlm/kaldi-rnnlm-decodable-simple-looped.h b/src/rnnlm/rnnlm-decodable-simple-looped.h
similarity index 100%
rename from src/rnnlm/kaldi-rnnlm-decodable-simple-looped.h
rename to src/rnnlm/rnnlm-decodable-simple-looped.h
diff --git a/src/rnnlm/rnnlm-lattice-rescoring.cc b/src/rnnlm/rnnlm-lattice-rescoring.cc
new file mode 100644
index 00000000000..8d7ab58e538
--- /dev/null
+++ b/src/rnnlm/rnnlm-lattice-rescoring.cc
@@ -0,0 +1,161 @@
+// rnnlm/rnnlm-lattice-rescoring.cc
+
+// Copyright 2017 Johns Hopkins University (author: Daniel Povey)
+//                Yiming Wang
+//                Hainan Xu
+//
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#include <utility>
+
+#include "rnnlm/rnnlm-lattice-rescoring.h"
+#include "util/stl-utils.h"
+#include "util/text-utils.h"
+
+namespace kaldi {
+namespace nnet3 {
+
+void KaldiRnnlmDeterministicFst::ReadFstWordSymbolTableAndRnnWordlist(
+    const std::string &rnn_wordlist,
+//    const std::string &rnn_out_wordlist,
+    const std::string &word_symbol_table_rxfilename) {
+  // Reads symbol table.
+  fst::SymbolTable *fst_word_symbols = NULL;
+  if (!(fst_word_symbols =
+      fst::SymbolTable::ReadText(word_symbol_table_rxfilename))) {
+    KALDI_ERR << "Could not read symbol table from file "
+              << word_symbol_table_rxfilename;
+  }
+
+  full_voc_size_ = fst_word_symbols->NumSymbols();
+  fst_label_to_word_.resize(full_voc_size_);
+
+  for (int32 i = 0; i < fst_label_to_word_.size(); ++i) {
+    fst_label_to_word_[i] = fst_word_symbols->Find(i);
+    if (fst_label_to_word_[i] == "") {
+      KALDI_ERR << "Could not find word for integer " << i << "in the word "
+                << "symbol table, mismatched symbol table or you have discoutinuous "
+                << "integers in your symbol table?";
+    }
+  }
+
+//  fst_label_to_rnn_out_label_.resize(fst_word_symbols->NumSymbols(), -1);
+  fst_label_to_rnn_label_.resize(fst_word_symbols->NumSymbols(), -1);
+
+  out_OOS_index_ = 1;
+  {
+    std::ifstream ifile(rnn_wordlist.c_str());
+    int32 id;
+    string word;
+    int32 i = 0;
+    while (ifile >> word >> id) {
+      if (word == "<oos>") {
+        KALDI_ASSERT(id == out_OOS_index_);
+      }
+      KALDI_ASSERT(i == id);
+      i++;
+      rnn_label_to_word_.push_back(word);
+
+      int fst_label = fst_word_symbols->Find(rnn_label_to_word_[id]);
+      KALDI_ASSERT(fst::SymbolTable::kNoSymbol != fst_label || id == out_OOS_index_ || id == 0);
+      if (id != out_OOS_index_ && out_OOS_index_ != 0) {
+        fst_label_to_rnn_label_[fst_label] = id;
+      }
+    }
+  }
+
+  for (int32 i = 0; i < fst_label_to_rnn_label_.size(); i++) {
+    if (fst_label_to_rnn_label_[i] == -1) {
+      fst_label_to_rnn_label_[i] = out_OOS_index_;
+    }
+  }
+}
+
+KaldiRnnlmDeterministicFst::KaldiRnnlmDeterministicFst(int32 max_ngram_order,
+    const std::string &rnn_wordlist,
+    const std::string &word_symbol_table_rxfilename,
+    const DecodableRnnlmSimpleLoopedInfo &info) {
+  max_ngram_order_ = max_ngram_order;
+  ReadFstWordSymbolTableAndRnnWordlist(rnn_wordlist,
+                                       word_symbol_table_rxfilename);
+
+  std::vector<Label> bos;
+  bos.push_back(0); // 0 for <s>
+  state_to_wseq_.push_back(bos);
+  DecodableRnnlmSimpleLooped decodable_rnnlm(info);
+  decodable_rnnlm.TakeFeatures(std::vector<Label>(1, bos[0]));
+  state_to_decodable_rnnlm_.push_back(decodable_rnnlm);
+  wseq_to_state_[bos] = 0;
+  start_state_ = 0;
+}
+
+fst::StdArc::Weight KaldiRnnlmDeterministicFst::Final(StateId s) {
+  // At this point, we should have created the state.
+  KALDI_ASSERT(static_cast<size_t>(s) < state_to_wseq_.size());
+
+  // log prob of end of sentence
+  BaseFloat logprob = state_to_decodable_rnnlm_[s].GetOutput(0, 0);
+  return Weight(-logprob);
+}
+
+bool KaldiRnnlmDeterministicFst::GetArc(StateId s, Label ilabel,
+                                        fst::StdArc *oarc) {
+  // At this point, we should have created the state.
+  KALDI_ASSERT(static_cast<size_t>(s) < state_to_wseq_.size());
+
+  std::vector<Label> wseq = state_to_wseq_[s];
+  DecodableRnnlmSimpleLooped decodable_rnnlm = state_to_decodable_rnnlm_[s];
+  int32 rnn_word = fst_label_to_rnn_label_[ilabel];
+
+  BaseFloat logprob = decodable_rnnlm.GetOutput(0, rnn_word);
+  if (rnn_word == out_OOS_index_)
+    logprob = logprob - Log(full_voc_size_ - rnn_label_to_word_.size() + 1.0);
+
+  wseq.push_back(rnn_word);
+  if (max_ngram_order_ > 0) {
+    while (wseq.size() >= max_ngram_order_) {
+      // History state has at most <max_ngram_order_> - 1 words in the state.
+      wseq.erase(wseq.begin(), wseq.begin() + 1);
+    }
+  }
+
+  std::pair<const std::vector<Label>, StateId> wseq_state_pair(
+      wseq, static_cast<Label>(state_to_wseq_.size()));
+
+  // Attemps to insert the current <lseq_state_pair>. If the pair already exists
+  // then it returns false.
+  typedef MapType::iterator IterType;
+  std::pair<IterType, bool> result = wseq_to_state_.insert(wseq_state_pair);
+
+  // If the pair was just inserted, then also add it to <state_to_wseq_> and
+  // <state_to_decodable_rnnlm_>.
+  if (result.second == true) {
+    state_to_wseq_.push_back(wseq);
+    decodable_rnnlm.TakeFeatures(std::vector<Label>(1, wseq.back()));
+    state_to_decodable_rnnlm_.push_back(decodable_rnnlm);
+  }
+
+  // Creates the arc.
+  oarc->ilabel = ilabel;
+  oarc->olabel = ilabel;
+  oarc->nextstate = result.first->second;
+  oarc->weight = Weight(-logprob);
+
+  return true;
+}
+
+}  // namespace nnet3
+}  // namespace kaldi
diff --git a/src/rnnlm/rnnlm-lattice-rescoring.h b/src/rnnlm/rnnlm-lattice-rescoring.h
new file mode 100644
index 00000000000..4d89510eb45
--- /dev/null
+++ b/src/rnnlm/rnnlm-lattice-rescoring.h
@@ -0,0 +1,88 @@
+// rnnlm/rnnlm-lattice-rescoring.h
+//
+// Copyright 2017 Johns Hopkins University (author: Daniel Povey) 
+//                Yiming Wang
+//                Hainan Xu
+//
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_LM_KALDI_RNNLM_RESCORING_H_
+#define KALDI_LM_KALDI_RNNLM_RESCORING_H_
+
+#include <string>
+#include <vector>
+
+#include "base/kaldi-common.h"
+#include "fstext/deterministic-fst.h"
+#include "rnnlm/rnnlm-decodable-simple-looped.h"
+#include "util/common-utils.h"
+
+namespace kaldi {
+namespace nnet3 {
+
+class KaldiRnnlmDeterministicFst
+    : public fst::DeterministicOnDemandFst<fst::StdArc> {
+ public:
+  typedef fst::StdArc::Weight Weight;
+  typedef fst::StdArc::StateId StateId;
+  typedef fst::StdArc::Label Label;
+
+  // Does not take ownership.
+  KaldiRnnlmDeterministicFst(int32 max_ngram_order,
+      const std::string &rnn_wordlist,
+      const std::string &word_symbol_table_rxfilename,
+      const DecodableRnnlmSimpleLoopedInfo &info);
+
+  // We cannot use "const" because the pure virtual function in the interface is
+  // not const.
+  virtual StateId Start() { return start_state_; }
+
+  // We cannot use "const" because the pure virtual function in the interface is
+  // not const.
+  virtual Weight Final(StateId s);
+
+  virtual bool GetArc(StateId s, Label ilabel, fst::StdArc* oarc);
+
+ private:
+  std::vector<int32> fst_label_to_rnn_label_;
+  std::vector<std::string> rnn_label_to_word_;
+
+//  std::vector<int32> fst_label_to_rnn_in_label_;
+//  std::vector<std::string> rnn_in_label_to_word_;
+
+  std::vector<std::string> fst_label_to_word_;
+  int32 full_voc_size_;
+  int32 out_OOS_index_;
+
+  typedef unordered_map
+      <std::vector<Label>, StateId, VectorHasher<Label> > MapType;
+  StateId start_state_;
+  MapType wseq_to_state_;
+  std::vector<std::vector<Label> > state_to_wseq_;
+
+  int32 max_ngram_order_;
+  std::vector<DecodableRnnlmSimpleLooped> state_to_decodable_rnnlm_;
+
+  void ReadFstWordSymbolTableAndRnnWordlist(const std::string &rnn_in_wordlist,
+//      const std::string &rnn_out_wordlist,
+      const std::string &word_symbol_table_rxfilename);
+
+};
+
+}  // namespace nnet3
+}  // namespace kaldi
+
+#endif  // KALDI_LM_KALDI_RNNLM_RESCORING_H_

From ef09b62bb294444ead5a8b5df68850466289fd8b Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Fri, 22 Sep 2017 09:13:18 -0400
Subject: [PATCH 03/23] lattice-rescoring runnable but buggy

---
 src/latbin/lattice-lmrescore-kaldi-rnnlm.cc | 16 ++++++++--------
 src/rnnlm/rnnlm-decodable-simple-looped.cc  |  4 ++--
 src/rnnlm/rnnlm-lattice-rescoring.cc        |  9 +++++++--
 3 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
index 0ff8789608e..742876d92b2 100644
--- a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
+++ b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
@@ -1,6 +1,7 @@
-// latbin/lattice-lmrescore-nnet3-rnnlm.cc
+// latbin/lattice-lmrescore-kaldi-rnnlm.cc
 
 // Copyright 2017 Johns Hopkins University (author: Daniel Povey)
+//                Hainan Xu
 //                Yiming Wang
 
 // See ../../COPYING for clarification regarding multiple authors
@@ -57,20 +58,19 @@ int main(int argc, char *argv[]) {
 
     po.Read(argc, argv);
 
-    if (po.NumArgs() != 4 && po.NumArgs() != 5) {
+    if (po.NumArgs() != 6) {
       po.PrintUsage();
       exit(1);
     }
 
     std::string lats_rspecifier, rnn_wordlist, word_embedding_rxfilename,
         word_symbols_rxfilename, rnnlm_rxfilename, lats_wspecifier;
-    KALDI_ASSERT (po.NumArgs() == 6);
 
-    rnn_wordlist = po.GetArg(1);
-    word_symbols_rxfilename = po.GetArg(2);
-    lats_rspecifier = po.GetArg(3);
-    rnnlm_rxfilename = po.GetArg(4);
-    word_embedding_rxfilename = po.GetArg(5);
+    word_embedding_rxfilename = po.GetArg(1);
+    rnn_wordlist = po.GetArg(2);
+    word_symbols_rxfilename = po.GetArg(3);
+    lats_rspecifier = po.GetArg(4);
+    rnnlm_rxfilename = po.GetArg(5);
     lats_wspecifier = po.GetArg(6);
 
     // Reads the language model.
diff --git a/src/rnnlm/rnnlm-decodable-simple-looped.cc b/src/rnnlm/rnnlm-decodable-simple-looped.cc
index c6de5a549a3..d13fe3ebebe 100644
--- a/src/rnnlm/rnnlm-decodable-simple-looped.cc
+++ b/src/rnnlm/rnnlm-decodable-simple-looped.cc
@@ -156,8 +156,8 @@ void DecodableRnnlmSimpleLooped::AdvanceChunk() {
 //  CuMatrix<BaseFloat> new_input(feats_chunk.NumRows(), input_layer->OutputDim());
 //  input_layer->Propagate(feats_chunk, &new_input);
 
-  CuMatrix<BaseFloat> input_embeddings(1, info_.word_embedding_mat.NumRows(), kUndefined);
-  input_embeddings.Row(0).CopyFromVec(info_.word_embedding_mat.Row(feats_chunk.Row(0).Sum()));
+  CuMatrix<BaseFloat> input_embeddings(1, info_.word_embedding_mat.NumCols());
+  input_embeddings.RowRange(0, 1).AddMat(1.0, info_.word_embedding_mat.RowRange(feats_chunk.Row(0).Sum(), 1), kNoTrans);
   computer_.AcceptInput("input", &input_embeddings);
 
   computer_.Run();
diff --git a/src/rnnlm/rnnlm-lattice-rescoring.cc b/src/rnnlm/rnnlm-lattice-rescoring.cc
index 8d7ab58e538..4bce0806eba 100644
--- a/src/rnnlm/rnnlm-lattice-rescoring.cc
+++ b/src/rnnlm/rnnlm-lattice-rescoring.cc
@@ -53,6 +53,7 @@ void KaldiRnnlmDeterministicFst::ReadFstWordSymbolTableAndRnnWordlist(
   }
 
 //  fst_label_to_rnn_out_label_.resize(fst_word_symbols->NumSymbols(), -1);
+  KALDI_LOG << "resize to " << fst_word_symbols->NumSymbols();
   fst_label_to_rnn_label_.resize(fst_word_symbols->NumSymbols(), -1);
 
   out_OOS_index_ = 1;
@@ -69,9 +70,13 @@ void KaldiRnnlmDeterministicFst::ReadFstWordSymbolTableAndRnnWordlist(
       i++;
       rnn_label_to_word_.push_back(word);
 
+//      if (word == "<brk>") {
+//        continue;
+//      }
+
       int fst_label = fst_word_symbols->Find(rnn_label_to_word_[id]);
-      KALDI_ASSERT(fst::SymbolTable::kNoSymbol != fst_label || id == out_OOS_index_ || id == 0);
-      if (id != out_OOS_index_ && out_OOS_index_ != 0) {
+      KALDI_ASSERT(fst::SymbolTable::kNoSymbol != fst_label || id == out_OOS_index_ || id == 0 || word == "<brk>");
+      if (id != out_OOS_index_ && out_OOS_index_ != 0 && fst_label != -1) {
         fst_label_to_rnn_label_[fst_label] = id;
       }
     }

From 390a1bbfbc9d2c623e7f142cc81e6dff88884165 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Sun, 24 Sep 2017 14:36:27 -0400
Subject: [PATCH 04/23] making a PR

---
 egs/swbd/s5/local/rnnlm/run_lstm_d.sh      | 89 ++++++++++++++++++++++
 egs/swbd/s5/local/rnnlm/run_rescoring.sh   | 47 ++++++++++++
 egs/swbd/s5/rnnlm                          |  1 +
 src/rnnlm/rnnlm-decodable-simple-looped.cc |  5 +-
 src/rnnlm/rnnlm-lattice-rescoring.cc       | 11 ++-
 src/rnnlm/rnnlm-lattice-rescoring.h        |  1 +
 6 files changed, 148 insertions(+), 6 deletions(-)
 create mode 100755 egs/swbd/s5/local/rnnlm/run_lstm_d.sh
 create mode 100755 egs/swbd/s5/local/rnnlm/run_rescoring.sh
 create mode 120000 egs/swbd/s5/rnnlm

diff --git a/egs/swbd/s5/local/rnnlm/run_lstm_d.sh b/egs/swbd/s5/local/rnnlm/run_lstm_d.sh
new file mode 100755
index 00000000000..c5b86ca4741
--- /dev/null
+++ b/egs/swbd/s5/local/rnnlm/run_lstm_d.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
+#           2015  Guoguo Chen
+#           2017  Hainan Xu
+
+# This script trains LMs on the swbd LM-training data.
+# This script takes no command-line arguments but takes the --cmd option.
+
+# Begin configuration section.
+cmd=run.pl
+dir=exp/rnnlm_lstm_d
+embedding_dim=800
+lstm_rpd=200
+lstm_nrpd=200
+stage=-10
+train_stage=-10
+
+. utils/parse_options.sh
+
+text=data/train/text
+lexicon=data/local/dict_nosp/lexiconp.txt
+text_dir=data/rnnlm/text_nosp
+mkdir -p $dir/config
+set -e
+
+for f in $text $lexicon; do
+  [ ! -f $f ] && \
+    echo "$0: expected file $f to exist; search for local/wsj_extend_dict.sh in run.sh" && exit 1
+done
+
+if [ $stage -le 0 ]; then
+  mkdir -p $text_dir
+  echo -n >$text_dir/dev.txt
+  # hold out one in every 500 lines as dev data.
+  cat $text | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%500 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/swbd.txt
+fi
+
+if [ $stage -le 1 ]; then
+  # the training scripts require that <s>, </s> and <brk> be present in a particular
+  # order.
+  awk '{print $1}' $lexicon | sort | uniq | \
+    awk 'BEGIN{print "<eps> 0";print "<s> 1"; print "</s> 2"; print "<brk> 3";n=4;} {print $1, n++}' \
+        >$dir/config/words.txt
+  # words that are not present in words.txt but are in the training or dev data, will be
+  # mapped to <SPOKEN_NOISE> during training.
+  echo "<unk>" >$dir/config/oov.txt
+
+  cat > $dir/config/data_weights.txt <<EOF
+swbd   1   1.0
+EOF
+
+  rnnlm/get_unigram_probs.py --vocab-file=$dir/config/words.txt \
+                             --unk-word="<unk>" \
+                             --data-weights-file=$dir/config/data_weights.txt \
+                             $text_dir | awk 'NF==2' >$dir/config/unigram_probs.txt
+
+  # choose features
+  rnnlm/choose_features.py --unigram-probs=$dir/config/unigram_probs.txt \
+                           --use-constant-feature=true \
+                           --special-words='<s>,</s>,<brk>,<unk>,[noise],[laughter]' \
+                           $dir/config/words.txt > $dir/config/features.txt
+
+  cat >$dir/config/xconfig <<EOF
+input dim=$embedding_dim name=input
+relu-renorm-layer name=tdnn1 dim=$embedding_dim input=Append(0, IfDefined(-1))
+fast-lstmp-layer name=lstm1 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd
+relu-renorm-layer name=tdnn2 dim=$embedding_dim input=Append(0, IfDefined(-3))
+fast-lstmp-layer name=lstm2 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd
+relu-renorm-layer name=tdnn3 dim=$embedding_dim input=Append(0, IfDefined(-3))
+output-layer name=output include-log-softmax=false dim=$embedding_dim
+EOF
+  rnnlm/validate_config_dir.sh $text_dir $dir/config
+fi
+
+if [ $stage -le 2 ]; then
+  # the --unigram-factor option is set larger than the default (100)
+  # in order to reduce the size of the sampling LM, because rnnlm-get-egs
+  # was taking up too much CPU (as much as 10 cores).
+  rnnlm/prepare_rnnlm_dir.sh --unigram-factor 200.0 \
+                             $text_dir $dir/config $dir
+fi
+
+if [ $stage -le 3 ]; then
+  rnnlm/train_rnnlm.sh --num-jobs-initial 1 --num-jobs-final 3 \
+                  --stage $train_stage --num-epochs 10 --cmd "queue.pl" $dir
+fi
+
+exit 0
diff --git a/egs/swbd/s5/local/rnnlm/run_rescoring.sh b/egs/swbd/s5/local/rnnlm/run_rescoring.sh
new file mode 100755
index 00000000000..22ae57aa586
--- /dev/null
+++ b/egs/swbd/s5/local/rnnlm/run_rescoring.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+n=50
+ngram_order=4
+rnndir=
+id=rnn
+
+. ./utils/parse_options.sh
+. ./cmd.sh
+. ./path.sh
+
+set -e
+
+#[ ! -f $rnndir/rnnlm ] && echo "Can't find RNNLM model" && exit 1;
+
+LM=fsh_sw1_tg
+rnndir=exp/rnnlm_lstm_h650_a
+
+#ln -s final.raw $rnndir/rnnlm 2>/dev/null
+touch $rnndir/unk.probs
+
+for decode_set in eval2000; do
+  dir=exp/chain/tdnn_lstm_1e_sp
+  decode_dir=${dir}/decode_${decode_set}_$LM
+
+  # N-best rescoring
+#  steps/rnnlmrescore.sh \
+#    --rnnlm-ver nnet3 \
+#    --N $n --cmd "$decode_cmd --mem 16G" --inv-acwt 10 0.5 \
+#    data/lang_$LM $rnndir \
+#    data/$mic/$decode_set ${decode_dir} \
+#    ${decode_dir}.$id.$n-best  &
+#
+#  continue
+
+  # will implement later
+  # Lattice rescoring
+  steps/lmrescore_rnnlm_lat.sh \
+    --cmd "$decode_cmd --mem 16G" \
+    --rnnlm-ver kaldirnnlm  --weight 0.5 --max-ngram-order $ngram_order \
+    data/lang_$LM $rnndir \
+    data/${decode_set}_hires ${decode_dir} \
+    ${decode_dir}.rnnlm.keli.nnet3rnnlm.lat.${ngram_order}gram
+
+done
+
+wait
diff --git a/egs/swbd/s5/rnnlm b/egs/swbd/s5/rnnlm
new file mode 120000
index 00000000000..e136939ba72
--- /dev/null
+++ b/egs/swbd/s5/rnnlm
@@ -0,0 +1 @@
+../../../scripts/rnnlm/
\ No newline at end of file
diff --git a/src/rnnlm/rnnlm-decodable-simple-looped.cc b/src/rnnlm/rnnlm-decodable-simple-looped.cc
index d13fe3ebebe..c5ad4d2d98e 100644
--- a/src/rnnlm/rnnlm-decodable-simple-looped.cc
+++ b/src/rnnlm/rnnlm-decodable-simple-looped.cc
@@ -79,7 +79,7 @@ DecodableRnnlmSimpleLooped::DecodableRnnlmSimpleLooped(
     // since everytime we provide one chunk to the decodable object, the size of
     // feats_ == frames_per_chunk
     feats_(info_.frames_per_chunk,
-           info_.word_embedding_mat.NumRows()), // or Cols()? TODO(hxu), should be vocab size
+           info_.word_embedding_mat.NumRows()),
     current_log_post_offset_(-1)
 {
   num_frames_ = feats_.NumRows();
@@ -157,7 +157,8 @@ void DecodableRnnlmSimpleLooped::AdvanceChunk() {
 //  input_layer->Propagate(feats_chunk, &new_input);
 
   CuMatrix<BaseFloat> input_embeddings(1, info_.word_embedding_mat.NumCols());
-  input_embeddings.RowRange(0, 1).AddMat(1.0, info_.word_embedding_mat.RowRange(feats_chunk.Row(0).Sum(), 1), kNoTrans);
+  int32 word_index = feats_chunk.Row(0).GetElement(0).first;
+  input_embeddings.RowRange(0, 1).AddMat(1.0, info_.word_embedding_mat.RowRange(word_index, 1), kNoTrans);
   computer_.AcceptInput("input", &input_embeddings);
 
   computer_.Run();
diff --git a/src/rnnlm/rnnlm-lattice-rescoring.cc b/src/rnnlm/rnnlm-lattice-rescoring.cc
index 4bce0806eba..930bbe36644 100644
--- a/src/rnnlm/rnnlm-lattice-rescoring.cc
+++ b/src/rnnlm/rnnlm-lattice-rescoring.cc
@@ -63,8 +63,8 @@ void KaldiRnnlmDeterministicFst::ReadFstWordSymbolTableAndRnnWordlist(
     string word;
     int32 i = 0;
     while (ifile >> word >> id) {
-      if (word == "<oos>") {
-        KALDI_ASSERT(id == out_OOS_index_);
+      if (word == "</s>") {
+        final_word_index_ = id;
       }
       KALDI_ASSERT(i == id);
       i++;
@@ -75,7 +75,10 @@ void KaldiRnnlmDeterministicFst::ReadFstWordSymbolTableAndRnnWordlist(
 //      }
 
       int fst_label = fst_word_symbols->Find(rnn_label_to_word_[id]);
-      KALDI_ASSERT(fst::SymbolTable::kNoSymbol != fst_label || id == out_OOS_index_ || id == 0 || word == "<brk>");
+      if (fst::SymbolTable::kNoSymbol != fst_label || id == out_OOS_index_ || id == 0 || word == "<brk>") {}
+      else {
+        KALDI_LOG << "warning: word " << word << " in RNNLM wordlist but not in FST wordlist";
+      }
       if (id != out_OOS_index_ && out_OOS_index_ != 0 && fst_label != -1) {
         fst_label_to_rnn_label_[fst_label] = id;
       }
@@ -112,7 +115,7 @@ fst::StdArc::Weight KaldiRnnlmDeterministicFst::Final(StateId s) {
   KALDI_ASSERT(static_cast<size_t>(s) < state_to_wseq_.size());
 
   // log prob of end of sentence
-  BaseFloat logprob = state_to_decodable_rnnlm_[s].GetOutput(0, 0);
+  BaseFloat logprob = state_to_decodable_rnnlm_[s].GetOutput(0, final_word_index_);
   return Weight(-logprob);
 }
 
diff --git a/src/rnnlm/rnnlm-lattice-rescoring.h b/src/rnnlm/rnnlm-lattice-rescoring.h
index 4d89510eb45..dd9fd178ee5 100644
--- a/src/rnnlm/rnnlm-lattice-rescoring.h
+++ b/src/rnnlm/rnnlm-lattice-rescoring.h
@@ -66,6 +66,7 @@ class KaldiRnnlmDeterministicFst
   std::vector<std::string> fst_label_to_word_;
   int32 full_voc_size_;
   int32 out_OOS_index_;
+  int32 final_word_index_;
 
   typedef unordered_map
       <std::vector<Label>, StateId, VectorHasher<Label> > MapType;

From dc4970981377e926f979f094e51d7cd07636a085 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Sun, 24 Sep 2017 14:44:41 -0400
Subject: [PATCH 05/23] small changes

---
 egs/swbd/s5/local/rnnlm/run_rescoring.sh    | 19 ++-----------------
 egs/swbd/s5/local/score.sh                  |  2 +-
 scripts/rnnlm/train_rnnlm.sh                |  8 +++++++-
 src/latbin/lattice-lmrescore-kaldi-rnnlm.cc |  8 ++++----
 src/rnnlm/rnnlm-decodable-simple-looped.cc  |  1 +
 src/rnnlm/rnnlm-decodable-simple-looped.h   |  1 +
 6 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/egs/swbd/s5/local/rnnlm/run_rescoring.sh b/egs/swbd/s5/local/rnnlm/run_rescoring.sh
index 22ae57aa586..703b79586b1 100755
--- a/egs/swbd/s5/local/rnnlm/run_rescoring.sh
+++ b/egs/swbd/s5/local/rnnlm/run_rescoring.sh
@@ -11,36 +11,21 @@ id=rnn
 
 set -e
 
-#[ ! -f $rnndir/rnnlm ] && echo "Can't find RNNLM model" && exit 1;
-
 LM=fsh_sw1_tg
-rnndir=exp/rnnlm_lstm_h650_a
+rnndir=exp/rnnlm_lstm_d
 
-#ln -s final.raw $rnndir/rnnlm 2>/dev/null
-touch $rnndir/unk.probs
 
 for decode_set in eval2000; do
   dir=exp/chain/tdnn_lstm_1e_sp
   decode_dir=${dir}/decode_${decode_set}_$LM
 
-  # N-best rescoring
-#  steps/rnnlmrescore.sh \
-#    --rnnlm-ver nnet3 \
-#    --N $n --cmd "$decode_cmd --mem 16G" --inv-acwt 10 0.5 \
-#    data/lang_$LM $rnndir \
-#    data/$mic/$decode_set ${decode_dir} \
-#    ${decode_dir}.$id.$n-best  &
-#
-#  continue
-
-  # will implement later
   # Lattice rescoring
   steps/lmrescore_rnnlm_lat.sh \
     --cmd "$decode_cmd --mem 16G" \
     --rnnlm-ver kaldirnnlm  --weight 0.5 --max-ngram-order $ngram_order \
     data/lang_$LM $rnndir \
     data/${decode_set}_hires ${decode_dir} \
-    ${decode_dir}.rnnlm.keli.nnet3rnnlm.lat.${ngram_order}gram
+    ${decode_dir}.nnet3rnnlm.lat.${ngram_order}gram
 
 done
 
diff --git a/egs/swbd/s5/local/score.sh b/egs/swbd/s5/local/score.sh
index da71d126a86..2006b68e653 100755
--- a/egs/swbd/s5/local/score.sh
+++ b/egs/swbd/s5/local/score.sh
@@ -31,7 +31,7 @@ data=$1
 
 if [ -f $data/stm ]; then # use sclite scoring.
   echo "$data/stm exists: using local/score_sclite.sh"
-  eval local/score_sclite.sh $orig_args
+  eval local/score_sclite.sh "$orig_args"
 else
   echo "$data/stm does not exist: using local/score_basic.sh"
   eval local/score_basic.sh $orig_args
diff --git a/scripts/rnnlm/train_rnnlm.sh b/scripts/rnnlm/train_rnnlm.sh
index f60223f8cc1..b290febe998 100755
--- a/scripts/rnnlm/train_rnnlm.sh
+++ b/scripts/rnnlm/train_rnnlm.sh
@@ -64,9 +64,11 @@ num_splits=$(cat $dir/text/info/num_splits)
 num_repeats=$(cat $dir/text/info/num_repeats)
 text_files=$(for n in $(seq $num_splits); do echo $dir/text/$n.txt; done)
 vocab_size=$(tail -n 1 $dir/config/words.txt | awk '{print $NF + 1}')
+embedding_type=
 
 if [ -f $dir/feat_embedding.0.mat ]; then
   sparse_features=true
+  embedding_type=feat_embedding
   if [ -f $dir/word_embedding.0.mat ]; then
     echo "$0: error: $dir/feat_embedding.0.mat and $dir/word_embedding.0.mat both exist."
     exit 1;
@@ -74,6 +76,7 @@ if [ -f $dir/feat_embedding.0.mat ]; then
   ! [ -f $dir/word_feats.txt ] && echo "$0: expected $0/word_feats.txt to exist" && exit 1;
 else
   sparse_features=false
+  embedding_type=word_embedding
   ! [ -f $dir/word_embedding.0.mat ] && \
     echo "$0: expected $dir/word_embedding.0.mat to exist" && exit 1
 fi
@@ -192,7 +195,7 @@ while [ $x -lt $num_iters ]; do
       [ -f $dir/.train_error ] && \
         echo "$0: failure on iteration $x of training, see $dir/log/train.$x.*.log for details." && exit 1
       if [ $this_num_jobs -gt 1 ]; then
-        # average the models and the embedding matrces.  Use run.pl as we don't
+        # average the models and the embedding matrces.  Use run.pl as we don\'t
         # want this to wait on the queue (if there is a queue).
         src_models=$(for n in $(seq $this_num_jobs); do echo $dir/$[x+1].$n.raw; done)
         src_matrices=$(for n in $(seq $this_num_jobs); do echo $dir/${embedding_type}.$[x+1].$n.mat; done)
@@ -218,8 +221,11 @@ if [ $stage -le $num_iters ]; then
   echo "$0: best iteration (out of $num_iters) was $best_iter, linking it to final iteration."
   ln -sf $embedding_type.$best_iter.mat $dir/$embedding_type.final.mat
   ln -sf $best_iter.raw $dir/final.raw
+  ln -sf $best_iter.raw $dir/rnnlm  # to make it consistent with other RNNLMs
 fi
 
+touch $dir/unk.probs
+
 # Now get some diagnostics about the evolution of the objective function.
 if [ $stage -le $[num_iters+1] ]; then
   (
diff --git a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
index 742876d92b2..4cd6cf9c3ce 100644
--- a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
+++ b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
@@ -40,11 +40,11 @@ int main(int argc, char *argv[]) {
         "composing with the wrapped LM using a special type of composition\n"
         "algorithm. Determinization will be applied on the composed lattice.\n"
         "\n"
-        "Usage: lattice-lmrescore-nnet3-rnnlm [options] <rnnlm-wordlist> \\\n"
+        "Usage: lattice-lmrescore-kaldi-rnnlm [options] <embedding-file> <rnnlm-wordlist> \\\n"
         "             <word-symbol-table-rxfilename> <lattice-rspecifier> \\\n"
-        "             <rnnlm-rxfilename> <lattice-wspecifier>\n"
-        " e.g.: lattice-lmrescore-nnet3-rnnlm --lm-scale=-1.0 words.txt \\\n"
-        "                     ark:in.lats rnnlm ark:out.lats\n";
+        "             <raw-rnnlm-rxfilename> <lattice-wspecifier>\n"
+        " e.g.: lattice-lmrescore-kaldi-rnnlm --lm-scale=-1.0 word_embedding.mat \\\n"
+        "       rnn_words.txt fst_words.txt ark:in.lats rnnlm ark:out.lats\n";
 
     ParseOptions po(usage);
     int32 max_ngram_order = 3;
diff --git a/src/rnnlm/rnnlm-decodable-simple-looped.cc b/src/rnnlm/rnnlm-decodable-simple-looped.cc
index c5ad4d2d98e..0f2ba93987f 100644
--- a/src/rnnlm/rnnlm-decodable-simple-looped.cc
+++ b/src/rnnlm/rnnlm-decodable-simple-looped.cc
@@ -2,6 +2,7 @@
 
 // Copyright      2017  Johns Hopkins University (author: Daniel Povey)
 //                2017  Yiming Wang
+//                2017  Hainan Xu
 
 // See ../../COPYING for clarification regarding multiple authors
 //
diff --git a/src/rnnlm/rnnlm-decodable-simple-looped.h b/src/rnnlm/rnnlm-decodable-simple-looped.h
index 40259999a17..a5d76fcba82 100644
--- a/src/rnnlm/rnnlm-decodable-simple-looped.h
+++ b/src/rnnlm/rnnlm-decodable-simple-looped.h
@@ -2,6 +2,7 @@
 
 // Copyright 2017 Johns Hopkins University (author: Daniel Povey)
 //           2017 Yiming Wang
+//           2017 Hainan Xu
 
 // See ../../COPYING for clarification regarding multiple authors
 //

From 8a33e779dee2d37bb695777f465a163172f8c271 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Sun, 24 Sep 2017 14:47:40 -0400
Subject: [PATCH 06/23] include lmrescore_rnn_lat.sh

---
 egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh b/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh
index 0673e1b532c..10192a3f861 100755
--- a/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh
+++ b/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh
@@ -61,6 +61,11 @@ if [ "$rnnlm_ver" == "tensorflow" ]; then
   first_arg="$first_arg $rnnlm_dir/wordlist.rnn.final"
 fi
 
+if [ "$rnnlm_ver" == "kaldirnnlm" ]; then
+  rescoring_binary="lattice-lmrescore-kaldi-rnnlm"
+  first_arg="\"rnnlm-get-word-embedding $rnnlm_dir/word_feats.txt $rnnlm_dir/feat_embedding.final.mat -|\" $rnnlm_dir/config/words.txt "
+fi
+
 oldlm=$oldlang/G.fst
 if [ -f $oldlang/G.carpa ]; then
   oldlm=$oldlang/G.carpa

From 483450dc80eb35c3e4acc9ab5c788f243d041b14 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Mon, 25 Sep 2017 16:03:46 -0400
Subject: [PATCH 07/23] some aesthetic changes; not final yet

---
 egs/swbd/s5/local/rnnlm/run_rescoring.sh      |   2 +-
 scripts/rnnlm/lmrescore_rnnlm_lat.sh          | 115 ++++++++++++++++++
 scripts/rnnlm/train_rnnlm.sh                  |   3 -
 src/latbin/lattice-lmrescore-kaldi-rnnlm.cc   |  16 +--
 src/rnnlm/Makefile                            |   2 +-
 src/rnnlm/rnnlm-lattice-rescoring.cc          |  28 +++--
 src/rnnlm/rnnlm-lattice-rescoring.h           |  13 +-
 ...imple-looped.cc => rnnlm-simple-looped.cc} |  58 +++++----
 ...-simple-looped.h => rnnlm-simple-looped.h} |  55 ++++-----
 9 files changed, 200 insertions(+), 92 deletions(-)
 create mode 100755 scripts/rnnlm/lmrescore_rnnlm_lat.sh
 rename src/rnnlm/{rnnlm-decodable-simple-looped.cc => rnnlm-simple-looped.cc} (81%)
 rename src/rnnlm/{rnnlm-decodable-simple-looped.h => rnnlm-simple-looped.h} (78%)

diff --git a/egs/swbd/s5/local/rnnlm/run_rescoring.sh b/egs/swbd/s5/local/rnnlm/run_rescoring.sh
index 703b79586b1..f94391294c0 100755
--- a/egs/swbd/s5/local/rnnlm/run_rescoring.sh
+++ b/egs/swbd/s5/local/rnnlm/run_rescoring.sh
@@ -20,7 +20,7 @@ for decode_set in eval2000; do
   decode_dir=${dir}/decode_${decode_set}_$LM
 
   # Lattice rescoring
-  steps/lmrescore_rnnlm_lat.sh \
+  rnnlm/lmrescore_rnnlm_lat.sh \
     --cmd "$decode_cmd --mem 16G" \
     --rnnlm-ver kaldirnnlm  --weight 0.5 --max-ngram-order $ngram_order \
     data/lang_$LM $rnndir \
diff --git a/scripts/rnnlm/lmrescore_rnnlm_lat.sh b/scripts/rnnlm/lmrescore_rnnlm_lat.sh
new file mode 100755
index 00000000000..7c046b4a38f
--- /dev/null
+++ b/scripts/rnnlm/lmrescore_rnnlm_lat.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+
+# Copyright 2015  Guoguo Chen
+#           2017  Hainan Xu
+# Apache 2.0
+
+# This script rescores lattices with RNNLM.  See also rnnlmrescore.sh which is
+# an older script using n-best lists.
+
+# Begin configuration section.
+cmd=run.pl
+skip_scoring=false
+max_ngram_order=4
+N=10
+inv_acwt=12
+weight=1.0  # Interpolation weight for RNNLM.
+# End configuration section.
+rnnlm_ver=
+#layer_string=
+
+echo "$0 $@"  # Print the command line for logging
+
+. ./utils/parse_options.sh
+
+if [ $# != 5 ]; then
+   echo "Does language model rescoring of lattices (remove old LM, add new LM)"
+   echo "with RNNLM."
+   echo ""
+   echo "Usage: $0 [options] <old-lang-dir> <rnnlm-dir> \\"
+   echo "                   <data-dir> <input-decode-dir> <output-decode-dir>"
+   echo " e.g.: $0 ./rnnlm data/lang_tg data/test \\"
+   echo "                   exp/tri3/test_tg exp/tri3/test_rnnlm"
+   echo "options: [--cmd (run.pl|queue.pl [queue opts])]"
+   exit 1;
+fi
+
+[ -f path.sh ] && . ./path.sh;
+
+oldlang=$1
+rnnlm_dir=$2
+data=$3
+indir=$4
+outdir=$5
+
+rescoring_binary=lattice-lmrescore-rnnlm
+
+
+if [ "$rnnlm_ver" == "kaldirnnlm" ]; then
+  rescoring_binary="lattice-lmrescore-kaldi-rnnlm"
+  first_arg="\"rnnlm-get-word-embedding $rnnlm_dir/word_feats.txt $rnnlm_dir/feat_embedding.final.mat -|\" $rnnlm_dir/config/words.txt "
+fi
+
+oldlm=$oldlang/G.fst
+if [ -f $oldlang/G.carpa ]; then
+  oldlm=$oldlang/G.carpa
+elif [ ! -f $oldlm ]; then
+  echo "$0: expecting either $oldlang/G.fst or $oldlang/G.carpa to exist" &&\
+    exit 1;
+fi
+
+[ ! -f $oldlm ] && echo "$0: Missing file $oldlm" && exit 1;
+[ ! -f $rnnlm_dir/final.raw ] && echo "$0: Missing file $rnnlm_dir/final.raw" && exit 1;
+[ ! -f $rnnlm_dir/feat_embedding.final.mat ] && [ ! -f $rnnlm_dir/word_embedding.final.mat ] && echo "$0: Missing word embedding file" && exit 1;
+
+[ ! -f $oldlang/words.txt ] &&\
+  echo "$0: Missing file $oldlang/words.txt" && exit 1;
+! ls $indir/lat.*.gz >/dev/null &&\
+  echo "$0: No lattices input directory $indir" && exit 1;
+awk -v n=$0 -v w=$weight 'BEGIN {if (w < 0 || w > 1) {
+  print n": Interpolation weight should be in the range of [0, 1]"; exit 1;}}' \
+  || exit 1;
+
+oldlm_command="fstproject --project_output=true $oldlm |"
+
+acwt=`perl -e "print (1.0/$inv_acwt);"`
+
+word_embedding=
+if [ -f $rnnlm_dir/word_embedding.final.mat ]; then
+  word_embedding=$rnnlm_dir/word_embedding.final.mat
+else
+  word_embedding="\"rnnlm-get-word-embedding $rnnlm_dir/word_feats.txt $rnnlm_dir/feat_embedding.final.mat -|\""
+fi
+
+mkdir -p $outdir/log
+nj=`cat $indir/num_jobs` || exit 1;
+cp $indir/num_jobs $outdir
+
+oldlm_weight=`perl -e "print -1.0 * $weight;"`
+if [ "$oldlm" == "$oldlang/G.fst" ]; then
+  $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
+    lattice-lmrescore --lm-scale=$oldlm_weight \
+    "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm_command" ark:-  \| \
+    $rescoring_binary --lm-scale=$weight \
+    --max-ngram-order=$max_ngram_order \
+    $oldlang/words.txt ark:- $rnnlm_dir/config/words.txt $word_embedding "$rnnlm_dir/final.raw" \
+    "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1;
+else
+  $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
+    lattice-lmrescore-const-arpa --lm-scale=$oldlm_weight \
+    "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm" ark:-  \| \
+    $rescoring_binary --lm-scale=$weight \
+    --max-ngram-order=$max_ngram_order \
+    $oldlang/words.txt ark:- $rnnlm_dir/config/words.txt $word_embedding "$rnnlm_dir/final.raw" \
+    "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1;
+fi
+if ! $skip_scoring ; then
+  err_msg="Not scoring because local/score.sh does not exist or not executable."
+  [ ! -x local/score.sh ] && echo $err_msg && exit 1;
+  local/score.sh --cmd "$cmd" $data $oldlang $outdir
+else
+  echo "Not scoring because requested so..."
+fi
+
+exit 0;
+
diff --git a/scripts/rnnlm/train_rnnlm.sh b/scripts/rnnlm/train_rnnlm.sh
index b290febe998..f549ad85d35 100755
--- a/scripts/rnnlm/train_rnnlm.sh
+++ b/scripts/rnnlm/train_rnnlm.sh
@@ -221,11 +221,8 @@ if [ $stage -le $num_iters ]; then
   echo "$0: best iteration (out of $num_iters) was $best_iter, linking it to final iteration."
   ln -sf $embedding_type.$best_iter.mat $dir/$embedding_type.final.mat
   ln -sf $best_iter.raw $dir/final.raw
-  ln -sf $best_iter.raw $dir/rnnlm  # to make it consistent with other RNNLMs
 fi
 
-touch $dir/unk.probs
-
 # Now get some diagnostics about the evolution of the objective function.
 if [ $stage -le $[num_iters+1] ]; then
   (
diff --git a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
index 4cd6cf9c3ce..5c7fcfdcaa2 100644
--- a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
+++ b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
@@ -1,8 +1,8 @@
 // latbin/lattice-lmrescore-kaldi-rnnlm.cc
 
 // Copyright 2017 Johns Hopkins University (author: Daniel Povey)
-//                Hainan Xu
-//                Yiming Wang
+//           2017 Hainan Xu
+//           2017 Yiming Wang
 
 // See ../../COPYING for clarification regarding multiple authors
 //
@@ -66,10 +66,10 @@ int main(int argc, char *argv[]) {
     std::string lats_rspecifier, rnn_wordlist, word_embedding_rxfilename,
         word_symbols_rxfilename, rnnlm_rxfilename, lats_wspecifier;
 
-    word_embedding_rxfilename = po.GetArg(1);
-    rnn_wordlist = po.GetArg(2);
-    word_symbols_rxfilename = po.GetArg(3);
-    lats_rspecifier = po.GetArg(4);
+    word_symbols_rxfilename = po.GetArg(1);
+    lats_rspecifier = po.GetArg(2);
+    rnn_wordlist = po.GetArg(3);
+    word_embedding_rxfilename = po.GetArg(4);
     rnnlm_rxfilename = po.GetArg(5);
     lats_wspecifier = po.GetArg(6);
 
@@ -85,8 +85,8 @@ int main(int argc, char *argv[]) {
     CuMatrix<BaseFloat> word_embedding_mat;
     ReadKaldiObject(word_embedding_rxfilename, &word_embedding_mat);
 
-    const nnet3::DecodableRnnlmSimpleLoopedComputationOptions opts;
-    const nnet3::DecodableRnnlmSimpleLoopedInfo info(opts, rnnlm, word_embedding_mat);
+    const nnet3::RnnlmSimpleLoopedComputationOptions opts;
+    const nnet3::RnnlmSimpleLoopedInfo info(opts, rnnlm, word_embedding_mat);
 
     // Reads and writes as compact lattice.
     SequentialCompactLatticeReader compact_lattice_reader(lats_rspecifier);
diff --git a/src/rnnlm/Makefile b/src/rnnlm/Makefile
index 04228a08201..9484bbc0242 100644
--- a/src/rnnlm/Makefile
+++ b/src/rnnlm/Makefile
@@ -10,7 +10,7 @@ TESTFILES = sampler-test sampling-lm-test rnnlm-example-test
 OBJFILES = sampler.o rnnlm-example.o rnnlm-example-utils.o \
            rnnlm-core-training.o rnnlm-embedding-training.o rnnlm-core-compute.o \
            rnnlm-utils.o rnnlm-training.o rnnlm-test-utils.o sampling-lm-estimate.o \
-           sampling-lm.o rnnlm-decodable-simple-looped.o rnnlm-lattice-rescoring.o
+           sampling-lm.o rnnlm-simple-looped.o rnnlm-lattice-rescoring.o
 
 LIBNAME = kaldi-rnnlm
 
diff --git a/src/rnnlm/rnnlm-lattice-rescoring.cc b/src/rnnlm/rnnlm-lattice-rescoring.cc
index 930bbe36644..9c0c1fd07ef 100644
--- a/src/rnnlm/rnnlm-lattice-rescoring.cc
+++ b/src/rnnlm/rnnlm-lattice-rescoring.cc
@@ -1,8 +1,8 @@
 // rnnlm/rnnlm-lattice-rescoring.cc
 
-// Copyright 2017 Johns Hopkins University (author: Daniel Povey)
-//                Yiming Wang
-//                Hainan Xu
+// Copyright 2017 Johns Hopkins University (author: Daniel Povey) 
+//           2017 Yiming Wang
+//           2017 Hainan Xu
 //
 // See ../../COPYING for clarification regarding multiple authors
 //
@@ -64,7 +64,9 @@ void KaldiRnnlmDeterministicFst::ReadFstWordSymbolTableAndRnnWordlist(
     int32 i = 0;
     while (ifile >> word >> id) {
       if (word == "</s>") {
-        final_word_index_ = id;
+        eos_index_ = id;
+      } else if (word == "<s>") {
+        bos_index_ = id;
       }
       KALDI_ASSERT(i == id);
       i++;
@@ -95,18 +97,18 @@ void KaldiRnnlmDeterministicFst::ReadFstWordSymbolTableAndRnnWordlist(
 KaldiRnnlmDeterministicFst::KaldiRnnlmDeterministicFst(int32 max_ngram_order,
     const std::string &rnn_wordlist,
     const std::string &word_symbol_table_rxfilename,
-    const DecodableRnnlmSimpleLoopedInfo &info) {
+    const RnnlmSimpleLoopedInfo &info) {
   max_ngram_order_ = max_ngram_order;
   ReadFstWordSymbolTableAndRnnWordlist(rnn_wordlist,
                                        word_symbol_table_rxfilename);
 
-  std::vector<Label> bos;
-  bos.push_back(0); // 0 for <s>
-  state_to_wseq_.push_back(bos);
-  DecodableRnnlmSimpleLooped decodable_rnnlm(info);
-  decodable_rnnlm.TakeFeatures(std::vector<Label>(1, bos[0]));
+  std::vector<Label> bos_seq;
+  bos_seq.push_back(bos_index_);
+  state_to_wseq_.push_back(bos_seq);
+  RnnlmSimpleLooped decodable_rnnlm(info);
+  decodable_rnnlm.TakeFeatures(bos_seq);
   state_to_decodable_rnnlm_.push_back(decodable_rnnlm);
-  wseq_to_state_[bos] = 0;
+  wseq_to_state_[bos_seq] = 0;
   start_state_ = 0;
 }
 
@@ -115,7 +117,7 @@ fst::StdArc::Weight KaldiRnnlmDeterministicFst::Final(StateId s) {
   KALDI_ASSERT(static_cast<size_t>(s) < state_to_wseq_.size());
 
   // log prob of end of sentence
-  BaseFloat logprob = state_to_decodable_rnnlm_[s].GetOutput(0, final_word_index_);
+  BaseFloat logprob = state_to_decodable_rnnlm_[s].GetOutput(0, eos_index_);
   return Weight(-logprob);
 }
 
@@ -125,7 +127,7 @@ bool KaldiRnnlmDeterministicFst::GetArc(StateId s, Label ilabel,
   KALDI_ASSERT(static_cast<size_t>(s) < state_to_wseq_.size());
 
   std::vector<Label> wseq = state_to_wseq_[s];
-  DecodableRnnlmSimpleLooped decodable_rnnlm = state_to_decodable_rnnlm_[s];
+  RnnlmSimpleLooped decodable_rnnlm = state_to_decodable_rnnlm_[s];
   int32 rnn_word = fst_label_to_rnn_label_[ilabel];
 
   BaseFloat logprob = decodable_rnnlm.GetOutput(0, rnn_word);
diff --git a/src/rnnlm/rnnlm-lattice-rescoring.h b/src/rnnlm/rnnlm-lattice-rescoring.h
index dd9fd178ee5..18107aa40e3 100644
--- a/src/rnnlm/rnnlm-lattice-rescoring.h
+++ b/src/rnnlm/rnnlm-lattice-rescoring.h
@@ -1,8 +1,8 @@
 // rnnlm/rnnlm-lattice-rescoring.h
 //
 // Copyright 2017 Johns Hopkins University (author: Daniel Povey) 
-//                Yiming Wang
-//                Hainan Xu
+//           2017 Yiming Wang
+//           2017 Hainan Xu
 //
 // See ../../COPYING for clarification regarding multiple authors
 //
@@ -27,7 +27,7 @@
 
 #include "base/kaldi-common.h"
 #include "fstext/deterministic-fst.h"
-#include "rnnlm/rnnlm-decodable-simple-looped.h"
+#include "rnnlm/rnnlm-simple-looped.h"
 #include "util/common-utils.h"
 
 namespace kaldi {
@@ -44,7 +44,7 @@ class KaldiRnnlmDeterministicFst
   KaldiRnnlmDeterministicFst(int32 max_ngram_order,
       const std::string &rnn_wordlist,
       const std::string &word_symbol_table_rxfilename,
-      const DecodableRnnlmSimpleLoopedInfo &info);
+      const RnnlmSimpleLoopedInfo &info);
 
   // We cannot use "const" because the pure virtual function in the interface is
   // not const.
@@ -66,7 +66,8 @@ class KaldiRnnlmDeterministicFst
   std::vector<std::string> fst_label_to_word_;
   int32 full_voc_size_;
   int32 out_OOS_index_;
-  int32 final_word_index_;
+  int32 bos_index_;
+  int32 eos_index_;
 
   typedef unordered_map
       <std::vector<Label>, StateId, VectorHasher<Label> > MapType;
@@ -75,7 +76,7 @@ class KaldiRnnlmDeterministicFst
   std::vector<std::vector<Label> > state_to_wseq_;
 
   int32 max_ngram_order_;
-  std::vector<DecodableRnnlmSimpleLooped> state_to_decodable_rnnlm_;
+  std::vector<RnnlmSimpleLooped> state_to_decodable_rnnlm_;
 
   void ReadFstWordSymbolTableAndRnnWordlist(const std::string &rnn_in_wordlist,
 //      const std::string &rnn_out_wordlist,
diff --git a/src/rnnlm/rnnlm-decodable-simple-looped.cc b/src/rnnlm/rnnlm-simple-looped.cc
similarity index 81%
rename from src/rnnlm/rnnlm-decodable-simple-looped.cc
rename to src/rnnlm/rnnlm-simple-looped.cc
index 0f2ba93987f..e413ea8972d 100644
--- a/src/rnnlm/rnnlm-decodable-simple-looped.cc
+++ b/src/rnnlm/rnnlm-simple-looped.cc
@@ -1,4 +1,4 @@
-// rnnlm/kaldi-rnnlm-decodable-simple-looped.cc
+// rnnlm/kaldi-rnnlm-simple-looped.cc
 
 // Copyright      2017  Johns Hopkins University (author: Daniel Povey)
 //                2017  Yiming Wang
@@ -19,7 +19,7 @@
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.
 
-#include "rnnlm/rnnlm-decodable-simple-looped.h"
+#include "rnnlm/rnnlm-simple-looped.h"
 #include "nnet3/nnet-utils.h"
 #include "nnet3/nnet-compile-looped.h"
 
@@ -27,23 +27,23 @@ namespace kaldi {
 namespace nnet3 {
 
 
-DecodableRnnlmSimpleLoopedInfo::DecodableRnnlmSimpleLoopedInfo(
-    const DecodableRnnlmSimpleLoopedComputationOptions &opts,
+RnnlmSimpleLoopedInfo::RnnlmSimpleLoopedInfo(
+    const RnnlmSimpleLoopedComputationOptions &opts,
     const kaldi::nnet3::Nnet &rnnlm,
     const CuMatrix<BaseFloat> &word_embedding_mat):
     opts(opts), rnnlm(rnnlm), word_embedding_mat(word_embedding_mat) {
   Init(opts, rnnlm, word_embedding_mat);
 }
 
-void DecodableRnnlmSimpleLoopedInfo::Init(
-    const DecodableRnnlmSimpleLoopedComputationOptions &opts,
+void RnnlmSimpleLoopedInfo::Init(
+    const RnnlmSimpleLoopedComputationOptions &opts,
     const kaldi::nnet3::Nnet &rnnlm,
     const CuMatrix<BaseFloat> &word_embedding_mat) {
   opts.Check();
   KALDI_ASSERT(IsSimpleNnet(rnnlm));
   int32 left_context, right_context;
   ComputeSimpleNnetContext(rnnlm, &left_context, &right_context);
-  frames_left_context = opts.extra_left_context_initial + left_context;
+  frames_left_context = left_context;
   frames_right_context = right_context;
   int32 frame_subsampling_factor = 1;
   frames_per_chunk = GetChunkSize(rnnlm, frame_subsampling_factor,
@@ -54,11 +54,11 @@ void DecodableRnnlmSimpleLoopedInfo::Init(
 
   int32 ivector_period = frames_per_chunk;
   int32 extra_right_context = 0;
-  int32 num_sequences = 1;  // we're processing one utterance at a time.
+  int32 num_sequences = 1;  // we're processing one word sequence at a time.
   CreateLoopedComputationRequestSimple(rnnlm, frames_per_chunk,
                                        frame_subsampling_factor,
                                        ivector_period,
-                                       opts.extra_left_context_initial,
+                                       0, // extra_left_context_initial == 0
                                        extra_right_context,
                                        num_sequences,
                                        &request1, &request2, &request3);
@@ -72,23 +72,21 @@ void DecodableRnnlmSimpleLoopedInfo::Init(
   }
 }
 
-DecodableRnnlmSimpleLooped::DecodableRnnlmSimpleLooped(
-    const DecodableRnnlmSimpleLoopedInfo &info) :
+RnnlmSimpleLooped::RnnlmSimpleLooped(
+    const RnnlmSimpleLoopedInfo &info) :
     info_(info),
     computer_(info_.opts.compute_config, info_.computation,
               info_.rnnlm, NULL),  // NULL is 'nnet_to_update'
-    // since everytime we provide one chunk to the decodable object, the size of
+    // since everytime we provide one chunk to the object, the size of
     // feats_ == frames_per_chunk
     feats_(info_.frames_per_chunk,
            info_.word_embedding_mat.NumRows()),
     current_log_post_offset_(-1)
-{
-  num_frames_ = feats_.NumRows();
-}
+{}
 
-void DecodableRnnlmSimpleLooped::TakeFeatures(
+void RnnlmSimpleLooped::TakeFeatures(
     const std::vector<int32> &word_indexes) {
-  KALDI_ASSERT(word_indexes.size() == num_frames_);
+  KALDI_ASSERT(word_indexes.size() == feats_.NumRows());
   std::vector<std::vector<std::pair<MatrixIndexT, BaseFloat> > >
       pairs(word_indexes.size());
   for (int32 i = 0; i < word_indexes.size(); i++) {
@@ -103,7 +101,7 @@ void DecodableRnnlmSimpleLooped::TakeFeatures(
   current_log_post_offset_ = -1;
 }
 
-void DecodableRnnlmSimpleLooped::GetNnetOutputForFrame(
+void RnnlmSimpleLooped::GetNnetOutputForFrame(
     int32 frame, VectorBase<BaseFloat> *output) {
   KALDI_ASSERT(frame >= 0 && frame < feats_.NumRows());
   if (frame >= current_log_post_offset_ + current_nnet_output_.NumRows())
@@ -112,7 +110,7 @@ void DecodableRnnlmSimpleLooped::GetNnetOutputForFrame(
                                                current_log_post_offset_));
 }
 
-BaseFloat DecodableRnnlmSimpleLooped::GetOutput(int32 frame, int32 word_index) {
+BaseFloat RnnlmSimpleLooped::GetOutput(int32 frame, int32 word_index) {
   KALDI_ASSERT(frame >= 0 && frame < feats_.NumRows());
   if (frame >= current_log_post_offset_ + current_nnet_output_.NumRows())
     AdvanceChunk();
@@ -126,16 +124,26 @@ BaseFloat DecodableRnnlmSimpleLooped::GetOutput(int32 frame, int32 word_index) {
   current_nnet_output_gpu.Swap(&current_nnet_output_);
   const CuSubVector<BaseFloat> hidden(current_nnet_output_gpu,
                                       frame - current_log_post_offset_);
-  BaseFloat log_prob =
-    VecVec(hidden, word_embedding_mat.Row(word_index));
-//      output_layer->ComputeLogprobOfWordGivenHistory(hidden, word_index);
+  BaseFloat log_prob;
+
+  if (info_.opts.force_normalize) {
+    CuVector<BaseFloat> log_probs(word_embedding_mat.NumRows());
+
+    log_probs.AddMatVec(1.0, word_embedding_mat, kTrans, hidden, 0.0);
+    log_probs.ApplySoftMax();
+    log_probs.ApplyLog();
+    log_prob = log_probs(word_index);
+  } else {
+    log_prob = VecVec(hidden, word_embedding_mat.Row(word_index));
+  }
+
   // swap the pointer back so that this function can be called multiple times
   // with the same returned value before taking next new feats
   current_nnet_output_.Swap(&current_nnet_output_gpu);
   return log_prob;
 }
 
-void DecodableRnnlmSimpleLooped::AdvanceChunk() {
+void RnnlmSimpleLooped::AdvanceChunk() {
   int32 begin_input_frame, end_input_frame;
   begin_input_frame = -info_.frames_left_context;
   // note: end is last plus one.
@@ -153,10 +161,6 @@ void DecodableRnnlmSimpleLooped::AdvanceChunk() {
     feats_chunk.SetRow(r - begin_input_frame, feats_.Row(input_frame));
   }
 
-//  const rnnlm::LmInputComponent* input_layer = info_.lm_nnet.InputLayer();
-//  CuMatrix<BaseFloat> new_input(feats_chunk.NumRows(), input_layer->OutputDim());
-//  input_layer->Propagate(feats_chunk, &new_input);
-
   CuMatrix<BaseFloat> input_embeddings(1, info_.word_embedding_mat.NumCols());
   int32 word_index = feats_chunk.Row(0).GetElement(0).first;
   input_embeddings.RowRange(0, 1).AddMat(1.0, info_.word_embedding_mat.RowRange(word_index, 1), kNoTrans);
diff --git a/src/rnnlm/rnnlm-decodable-simple-looped.h b/src/rnnlm/rnnlm-simple-looped.h
similarity index 78%
rename from src/rnnlm/rnnlm-decodable-simple-looped.h
rename to src/rnnlm/rnnlm-simple-looped.h
index a5d76fcba82..648aaca9a25 100644
--- a/src/rnnlm/rnnlm-decodable-simple-looped.h
+++ b/src/rnnlm/rnnlm-simple-looped.h
@@ -1,4 +1,4 @@
-// rnnlm/kaldi-rnnlm-decodable-simple-looped.h
+// rnnlm/kaldi-rnnlm-simple-looped.h
 
 // Copyright 2017 Johns Hopkins University (author: Daniel Povey)
 //           2017 Yiming Wang
@@ -19,14 +19,14 @@
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef KALDI_RNNLM_DECODABLE_SIMPLE_LOOPED_H_
-#define KALDI_RNNLM_DECODABLE_SIMPLE_LOOPED_H_
+#ifndef KALDI_RNNLM_SIMPLE_LOOPED_H_
+#define KALDI_RNNLM_SIMPLE_LOOPED_H_
 
 #include <vector>
 #include "base/kaldi-common.h"
 #include "gmm/am-diag-gmm.h"
 #include "hmm/transition-model.h"
-#include "itf/decodable-itf.h"
+//#include "itf/decodable-itf.h"
 #include "nnet3/nnet-optimize.h"
 #include "nnet3/nnet-compute.h"
 #include "nnet3/am-nnet-simple.h"
@@ -46,31 +46,29 @@ namespace nnet3 {
 // Note: the 'simple' in the name means it applies to networks for which
 // IsSimpleNnet(nnet) would return true.  'looped' means we use looped
 // computations, with a kGotoLabel statement at the end of it.
-struct DecodableRnnlmSimpleLoopedComputationOptions {
-  int32 extra_left_context_initial;
+struct RnnlmSimpleLoopedComputationOptions {
   int32 frames_per_chunk;
   bool debug_computation;
+  bool force_normalize;
   NnetOptimizeOptions optimize_config;
   NnetComputeOptions compute_config;
-  DecodableRnnlmSimpleLoopedComputationOptions():
-      extra_left_context_initial(0),
+  RnnlmSimpleLoopedComputationOptions():
       frames_per_chunk(1),
-      debug_computation(false) { }
+      debug_computation(false),
+      force_normalize(false) { }
 
   void Check() const {
-    KALDI_ASSERT(extra_left_context_initial >= 0 && frames_per_chunk > 0);
+    KALDI_ASSERT(frames_per_chunk > 0);
   }
 
   void Register(OptionsItf *opts) {
-    opts->Register("extra-left-context-initial", &extra_left_context_initial,
-                   "Extra left context to use at the first frame of an utterance (note: "
-                   "this will just consist of repeats of the first frame, and should not "
-                   "usually be necessary.");
     opts->Register("frames-per-chunk", &frames_per_chunk,
                    "Number of frames in each chunk that is separately evaluated "
                    "by the neural net.");
     opts->Register("debug-computation", &debug_computation, "If true, turn on "
                    "debug for the actual computation (very verbose!)");
+    opts->Register("force-normalize", &force_normalize, "If true, force "
+                   " normalize the word posteriors");
 
     // register the optimization options with the prefix "optimization".
     ParseOptions optimization_opts("optimization", opts);
@@ -82,23 +80,18 @@ struct DecodableRnnlmSimpleLoopedComputationOptions {
   }
 };
 
-
-/**
-   When you instantiate class DecodableNnetSimpleLooped, you should give it
-   a const reference to this class, that has been previously initialized.
- */
-class DecodableRnnlmSimpleLoopedInfo  {
+class RnnlmSimpleLoopedInfo  {
  public:
-  DecodableRnnlmSimpleLoopedInfo(
-      const DecodableRnnlmSimpleLoopedComputationOptions &opts,
+  RnnlmSimpleLoopedInfo(
+      const RnnlmSimpleLoopedComputationOptions &opts,
       const kaldi::nnet3::Nnet &rnnlm,
       const CuMatrix<BaseFloat> &word_embedding_mat);
 
-  void Init(const DecodableRnnlmSimpleLoopedComputationOptions &opts,
+  void Init(const RnnlmSimpleLoopedComputationOptions &opts,
             const kaldi::nnet3::Nnet &rnnlm,
             const CuMatrix<BaseFloat> &word_embedding_mat);
 
-  const DecodableRnnlmSimpleLoopedComputationOptions &opts;
+  const RnnlmSimpleLoopedComputationOptions &opts;
 
   const kaldi::nnet3::Nnet &rnnlm;
   const CuMatrix<BaseFloat> &word_embedding_mat;
@@ -129,7 +122,7 @@ class DecodableRnnlmSimpleLoopedInfo  {
   via other wrapper classes.
 
   It accept just input features */
-class DecodableRnnlmSimpleLooped {
+class RnnlmSimpleLooped {
  public:
   /**
      This constructor takes features as input.
@@ -140,10 +133,7 @@ class DecodableRnnlmSimpleLooped {
                         this class needs, and contains a pointer to the neural net.
      @param [in] feats  The input feature matrix.
   */
-  DecodableRnnlmSimpleLooped(const DecodableRnnlmSimpleLoopedInfo &info);
-
-  // returns the number of frames of likelihoods.  The same as feats_.NumRows()
-  inline int32 NumFrames() const { return num_frames_; }
+  RnnlmSimpleLooped(const RnnlmSimpleLoopedInfo &info);
 
   inline int32 NnetOutputDim() const { return info_.nnet_output_dim; }
 
@@ -154,6 +144,7 @@ class DecodableRnnlmSimpleLooped {
   void GetNnetOutputForFrame(int32 frame, VectorBase<BaseFloat> *output);
 
   // Updates feats_ with the new incoming word specified in word_indexes
+  // We usually do this one at a time
   void TakeFeatures(const std::vector<int32> &word_indexes);
 
   // Gets the output for a particular frame and word_index, with
@@ -164,13 +155,11 @@ class DecodableRnnlmSimpleLooped {
   // This function does the computation for the next chunk.
   void AdvanceChunk();
 
-  const DecodableRnnlmSimpleLoopedInfo &info_;
+  const RnnlmSimpleLoopedInfo &info_;
 
   NnetComputer computer_;
 
   SparseMatrix<BaseFloat> feats_;
-  
-  int32 num_frames_;
 
   // The current nnet's output that we got from the last time we
   // ran the computation.
@@ -185,4 +174,4 @@ class DecodableRnnlmSimpleLooped {
 } // namespace nnet3
 } // namespace kaldi
 
-#endif  // KALDI_RNNLM_DECODABLE_SIMPLE_LOOPED_H_
+#endif  // KALDI_RNNLM_SIMPLE_LOOPED_H_

From b1167a2c00cf0ea4d6efd355f2dd7cbc823fad4c Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Tue, 26 Sep 2017 20:43:31 -0400
Subject: [PATCH 08/23] cached version of lattice rescoring; buggy it seems

---
 src/latbin/lattice-lmrescore-kaldi-rnnlm.cc | 11 +--
 src/rnnlm/rnnlm-lattice-rescoring.cc        | 75 +++++++++++++++------
 src/rnnlm/rnnlm-lattice-rescoring.h         | 20 ++++--
 src/rnnlm/rnnlm-simple-looped.cc            | 64 +++++++++++-------
 src/rnnlm/rnnlm-simple-looped.h             | 11 ++-
 5 files changed, 121 insertions(+), 60 deletions(-)

diff --git a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
index 5c7fcfdcaa2..5752dbb4a7a 100644
--- a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
+++ b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
@@ -93,6 +93,12 @@ int main(int argc, char *argv[]) {
     CompactLatticeWriter compact_lattice_writer(lats_wspecifier);
 
     int32 n_done = 0, n_fail = 0;
+
+    nnet3::KaldiRnnlmDeterministicFst rnnlm_fst(max_ngram_order,
+                                                rnn_wordlist,
+                                                word_symbols_rxfilename,
+                                                info);
+
     for (; !compact_lattice_reader.Done(); compact_lattice_reader.Next()) {
       std::string key = compact_lattice_reader.Key();
       CompactLattice clat = compact_lattice_reader.Value();
@@ -109,10 +115,6 @@ int main(int argc, char *argv[]) {
 
         // Wraps the rnnlm into FST. We re-create it for each lattice to prevent
         // memory usage increasing with time.
-        nnet3::KaldiRnnlmDeterministicFst rnnlm_fst(max_ngram_order,
-                                                    rnn_wordlist,
-                                                    word_symbols_rxfilename,
-                                                    info);
 
         // Composes lattice with language model.
         CompactLattice composed_clat;
@@ -138,6 +140,7 @@ int main(int argc, char *argv[]) {
         n_done++;
         compact_lattice_writer.Write(key, clat);
       }
+      rnnlm_fst.Clear();
     }
 
     KALDI_LOG << "Done " << n_done << " lattices, failed for " << n_fail;
diff --git a/src/rnnlm/rnnlm-lattice-rescoring.cc b/src/rnnlm/rnnlm-lattice-rescoring.cc
index 9c0c1fd07ef..3d4efa34c6f 100644
--- a/src/rnnlm/rnnlm-lattice-rescoring.cc
+++ b/src/rnnlm/rnnlm-lattice-rescoring.cc
@@ -28,9 +28,37 @@
 namespace kaldi {
 namespace nnet3 {
 
+KaldiRnnlmDeterministicFst::~KaldiRnnlmDeterministicFst() {
+  int size = state_to_decodable_rnnlm_.size();
+  KALDI_ASSERT(state_to_nnet3_output_.size() == size);
+  for (int i = 0; i < size; i++) {
+    delete state_to_decodable_rnnlm_[i];
+    delete state_to_nnet3_output_[i];
+  }
+  
+  state_to_decodable_rnnlm_.resize(0);
+  state_to_nnet3_output_.resize(0);
+  state_to_wseq_.resize(0);
+  wseq_to_state_.clear();
+}
+
+void KaldiRnnlmDeterministicFst::Clear() {
+  int size = state_to_decodable_rnnlm_.size();
+  KALDI_ASSERT(state_to_nnet3_output_.size() == size);
+  for (int i = 1; i < size; i++) {
+    delete state_to_decodable_rnnlm_[i];
+    delete state_to_nnet3_output_[i];
+  }
+  
+  state_to_decodable_rnnlm_.resize(1);
+  state_to_nnet3_output_.resize(1);
+  state_to_wseq_.resize(1);
+  wseq_to_state_.clear();
+  wseq_to_state_[state_to_wseq_[0]] = 0;
+}
+
 void KaldiRnnlmDeterministicFst::ReadFstWordSymbolTableAndRnnWordlist(
     const std::string &rnn_wordlist,
-//    const std::string &rnn_out_wordlist,
     const std::string &word_symbol_table_rxfilename) {
   // Reads symbol table.
   fst::SymbolTable *fst_word_symbols = NULL;
@@ -52,8 +80,6 @@ void KaldiRnnlmDeterministicFst::ReadFstWordSymbolTableAndRnnWordlist(
     }
   }
 
-//  fst_label_to_rnn_out_label_.resize(fst_word_symbols->NumSymbols(), -1);
-  KALDI_LOG << "resize to " << fst_word_symbols->NumSymbols();
   fst_label_to_rnn_label_.resize(fst_word_symbols->NumSymbols(), -1);
 
   out_OOS_index_ = 1;
@@ -72,10 +98,6 @@ void KaldiRnnlmDeterministicFst::ReadFstWordSymbolTableAndRnnWordlist(
       i++;
       rnn_label_to_word_.push_back(word);
 
-//      if (word == "<brk>") {
-//        continue;
-//      }
-
       int fst_label = fst_word_symbols->Find(rnn_label_to_word_[id]);
       if (fst::SymbolTable::kNoSymbol != fst_label || id == out_OOS_index_ || id == 0 || word == "<brk>") {}
       else {
@@ -92,6 +114,7 @@ void KaldiRnnlmDeterministicFst::ReadFstWordSymbolTableAndRnnWordlist(
       fst_label_to_rnn_label_[i] = out_OOS_index_;
     }
   }
+  delete fst_word_symbols;
 }
 
 KaldiRnnlmDeterministicFst::KaldiRnnlmDeterministicFst(int32 max_ngram_order,
@@ -105,20 +128,23 @@ KaldiRnnlmDeterministicFst::KaldiRnnlmDeterministicFst(int32 max_ngram_order,
   std::vector<Label> bos_seq;
   bos_seq.push_back(bos_index_);
   state_to_wseq_.push_back(bos_seq);
-  RnnlmSimpleLooped decodable_rnnlm(info);
-  decodable_rnnlm.TakeFeatures(bos_seq);
-  state_to_decodable_rnnlm_.push_back(decodable_rnnlm);
+  RnnlmSimpleLooped *decodable_rnnlm = new RnnlmSimpleLooped(info);
+  decodable_rnnlm->TakeFeatures(bos_seq);
+  CuVector<BaseFloat> *hidden = decodable_rnnlm->GetOutput(0);
   wseq_to_state_[bos_seq] = 0;
   start_state_ = 0;
+
+  state_to_decodable_rnnlm_.push_back(decodable_rnnlm);
+  state_to_nnet3_output_.push_back(hidden);
 }
 
 fst::StdArc::Weight KaldiRnnlmDeterministicFst::Final(StateId s) {
   // At this point, we should have created the state.
   KALDI_ASSERT(static_cast<size_t>(s) < state_to_wseq_.size());
 
-  // log prob of end of sentence
-  BaseFloat logprob = state_to_decodable_rnnlm_[s].GetOutput(0, eos_index_);
-  return Weight(-logprob);
+  const CuVector<BaseFloat> &nnet3_out = *state_to_nnet3_output_[s];
+  RnnlmSimpleLooped* rnn = state_to_decodable_rnnlm_[s];
+  return rnn->LogProbOfWord(eos_index_, nnet3_out);
 }
 
 bool KaldiRnnlmDeterministicFst::GetArc(StateId s, Label ilabel,
@@ -127,12 +153,15 @@ bool KaldiRnnlmDeterministicFst::GetArc(StateId s, Label ilabel,
   KALDI_ASSERT(static_cast<size_t>(s) < state_to_wseq_.size());
 
   std::vector<Label> wseq = state_to_wseq_[s];
-  RnnlmSimpleLooped decodable_rnnlm = state_to_decodable_rnnlm_[s];
+//  RnnlmSimpleLooped* rnnlm = new RnnlmSimpleLooped(*state_to_decodable_rnnlm_[s]);
+  const RnnlmSimpleLooped* rnnlm = state_to_decodable_rnnlm_[s];
   int32 rnn_word = fst_label_to_rnn_label_[ilabel];
 
-  BaseFloat logprob = decodable_rnnlm.GetOutput(0, rnn_word);
-  if (rnn_word == out_OOS_index_)
-    logprob = logprob - Log(full_voc_size_ - rnn_label_to_word_.size() + 1.0);
+  const CuVector<BaseFloat> &nnet3_out = *state_to_nnet3_output_[s];
+  BaseFloat logprob = rnnlm->LogProbOfWord(rnn_word, nnet3_out);
+
+//  if (rnn_word == out_OOS_index_)
+//    logprob = logprob - Log(full_voc_size_ - rnn_label_to_word_.size() + 1.0);
 
   wseq.push_back(rnn_word);
   if (max_ngram_order_ > 0) {
@@ -150,12 +179,16 @@ bool KaldiRnnlmDeterministicFst::GetArc(StateId s, Label ilabel,
   typedef MapType::iterator IterType;
   std::pair<IterType, bool> result = wseq_to_state_.insert(wseq_state_pair);
 
-  // If the pair was just inserted, then also add it to <state_to_wseq_> and
-  // <state_to_decodable_rnnlm_>.
+  // If the pair was just inserted, then also add it to state_to_* structures
   if (result.second == true) {
+    RnnlmSimpleLooped *rnnlm2 = new RnnlmSimpleLooped(*rnnlm);  // make a copy
+
+//  std::vector<int32> fst_label_to_rnn_in_label_;
+//  std::vector<std::string> rnn_in_label_to_word_;
+    rnnlm2->TakeFeatures(std::vector<Label>(1, rnn_word));
     state_to_wseq_.push_back(wseq);
-    decodable_rnnlm.TakeFeatures(std::vector<Label>(1, wseq.back()));
-    state_to_decodable_rnnlm_.push_back(decodable_rnnlm);
+    state_to_nnet3_output_.push_back(rnnlm2->GetOutput(0));
+    state_to_decodable_rnnlm_.push_back(rnnlm2);
   }
 
   // Creates the arc.
diff --git a/src/rnnlm/rnnlm-lattice-rescoring.h b/src/rnnlm/rnnlm-lattice-rescoring.h
index 18107aa40e3..4af30a63fd8 100644
--- a/src/rnnlm/rnnlm-lattice-rescoring.h
+++ b/src/rnnlm/rnnlm-lattice-rescoring.h
@@ -45,6 +45,9 @@ class KaldiRnnlmDeterministicFst
       const std::string &rnn_wordlist,
       const std::string &word_symbol_table_rxfilename,
       const RnnlmSimpleLoopedInfo &info);
+  ~KaldiRnnlmDeterministicFst();
+
+  void Clear();
 
   // We cannot use "const" because the pure virtual function in the interface is
   // not const.
@@ -59,11 +62,8 @@ class KaldiRnnlmDeterministicFst
  private:
   std::vector<int32> fst_label_to_rnn_label_;
   std::vector<std::string> rnn_label_to_word_;
-
-//  std::vector<int32> fst_label_to_rnn_in_label_;
-//  std::vector<std::string> rnn_in_label_to_word_;
-
   std::vector<std::string> fst_label_to_word_;
+
   int32 full_voc_size_;
   int32 out_OOS_index_;
   int32 bos_index_;
@@ -72,14 +72,20 @@ class KaldiRnnlmDeterministicFst
   typedef unordered_map
       <std::vector<Label>, StateId, VectorHasher<Label> > MapType;
   StateId start_state_;
+  int32 max_ngram_order_;
+
   MapType wseq_to_state_;
+
+  // mapping from state-id to history sequence
   std::vector<std::vector<Label> > state_to_wseq_;
 
-  int32 max_ngram_order_;
-  std::vector<RnnlmSimpleLooped> state_to_decodable_rnnlm_;
+  // mapping from state-id to RNNLM structure including computer
+  std::vector<RnnlmSimpleLooped*> state_to_decodable_rnnlm_;
+
+  // mapping from state-id to output word-embedding
+  std::vector<CuVector<BaseFloat>*> state_to_nnet3_output_;
 
   void ReadFstWordSymbolTableAndRnnWordlist(const std::string &rnn_in_wordlist,
-//      const std::string &rnn_out_wordlist,
       const std::string &word_symbol_table_rxfilename);
 
 };
diff --git a/src/rnnlm/rnnlm-simple-looped.cc b/src/rnnlm/rnnlm-simple-looped.cc
index e413ea8972d..de528ad15cd 100644
--- a/src/rnnlm/rnnlm-simple-looped.cc
+++ b/src/rnnlm/rnnlm-simple-looped.cc
@@ -84,6 +84,12 @@ RnnlmSimpleLooped::RnnlmSimpleLooped(
     current_log_post_offset_(-1)
 {}
 
+RnnlmSimpleLooped::RnnlmSimpleLooped(const RnnlmSimpleLooped &other):
+  info_(other.info_), computer_(other.computer_), feats_(other.feats_),
+  current_nnet_output_(other.current_nnet_output_),
+  current_log_post_offset_(other.current_log_post_offset_)
+{}
+
 void RnnlmSimpleLooped::TakeFeatures(
     const std::vector<int32> &word_indexes) {
   KALDI_ASSERT(word_indexes.size() == feats_.NumRows());
@@ -101,29 +107,18 @@ void RnnlmSimpleLooped::TakeFeatures(
   current_log_post_offset_ = -1;
 }
 
-void RnnlmSimpleLooped::GetNnetOutputForFrame(
-    int32 frame, VectorBase<BaseFloat> *output) {
-  KALDI_ASSERT(frame >= 0 && frame < feats_.NumRows());
-  if (frame >= current_log_post_offset_ + current_nnet_output_.NumRows())
-    AdvanceChunk();
-  output->CopyFromVec(current_nnet_output_.Row(frame -
-                                               current_log_post_offset_));
-}
-
-BaseFloat RnnlmSimpleLooped::GetOutput(int32 frame, int32 word_index) {
-  KALDI_ASSERT(frame >= 0 && frame < feats_.NumRows());
-  if (frame >= current_log_post_offset_ + current_nnet_output_.NumRows())
-    AdvanceChunk();
-
-//  int32 embedding_dim = info_.word_embedding_mat.NumCols();
-//  int32 num_words = info_.word_embedding_mat.NumRows();
-
+//void RnnlmSimpleLooped::GetNnetOutputForFrame(
+//    int32 frame, VectorBase<BaseFloat> *output) {
+//  KALDI_ASSERT(frame >= 0 && frame < feats_.NumRows());
+//  if (frame >= current_log_post_offset_ + current_nnet_output_.NumRows())
+//    AdvanceChunk();
+//  output->CopyFromVec(current_nnet_output_.Row(frame -
+//                                               current_log_post_offset_));
+//}
+
+BaseFloat RnnlmSimpleLooped::LogProbOfWord(int32 word_index,
+                               const CuVectorBase<BaseFloat> &hidden) const {
   const CuMatrix<BaseFloat> &word_embedding_mat = info_.word_embedding_mat;
-
-  CuMatrix<BaseFloat> current_nnet_output_gpu;
-  current_nnet_output_gpu.Swap(&current_nnet_output_);
-  const CuSubVector<BaseFloat> hidden(current_nnet_output_gpu,
-                                      frame - current_log_post_offset_);
   BaseFloat log_prob;
 
   if (info_.opts.force_normalize) {
@@ -136,13 +131,30 @@ BaseFloat RnnlmSimpleLooped::GetOutput(int32 frame, int32 word_index) {
   } else {
     log_prob = VecVec(hidden, word_embedding_mat.Row(word_index));
   }
-
-  // swap the pointer back so that this function can be called multiple times
-  // with the same returned value before taking next new feats
-  current_nnet_output_.Swap(&current_nnet_output_gpu);
   return log_prob;
 }
 
+CuVector<BaseFloat>* RnnlmSimpleLooped::GetOutput(int32 frame) {
+  KALDI_ASSERT(frame >= 0 && frame < feats_.NumRows());
+  if (frame >= current_log_post_offset_ + current_nnet_output_.NumRows())
+    AdvanceChunk();
+
+//  int32 embedding_dim = info_.word_embedding_mat.NumCols();
+//  int32 num_words = info_.word_embedding_mat.NumRows();
+
+
+  CuMatrix<BaseFloat> current_nnet_output_gpu;
+  current_nnet_output_gpu.Swap(&current_nnet_output_);
+  const CuSubVector<BaseFloat> hidden(current_nnet_output_gpu,
+                                      frame - current_log_post_offset_);
+  return new CuVector<BaseFloat>(hidden);
+//
+//  // swap the pointer back so that this function can be called multiple times
+//  // with the same returned value before taking next new feats
+//  current_nnet_output_.Swap(&current_nnet_output_gpu);
+//  return log_prob;
+}
+
 void RnnlmSimpleLooped::AdvanceChunk() {
   int32 begin_input_frame, end_input_frame;
   begin_input_frame = -info_.frames_left_context;
diff --git a/src/rnnlm/rnnlm-simple-looped.h b/src/rnnlm/rnnlm-simple-looped.h
index 648aaca9a25..e5f552cabbe 100644
--- a/src/rnnlm/rnnlm-simple-looped.h
+++ b/src/rnnlm/rnnlm-simple-looped.h
@@ -135,13 +135,15 @@ class RnnlmSimpleLooped {
   */
   RnnlmSimpleLooped(const RnnlmSimpleLoopedInfo &info);
 
+  RnnlmSimpleLooped(const RnnlmSimpleLooped &other);
+
   inline int32 NnetOutputDim() const { return info_.nnet_output_dim; }
 
   // Gets the nnet's output for a particular frame, with 0 <= frame < NumFrames().
   // 'output' must be correctly sized (with dimension NnetOutputDim()).  Note:
   // you're expected to call this, and GetOutput(), in an order of increasing
   // frames.  If you deviate from this, one of these calls may crash.
-  void GetNnetOutputForFrame(int32 frame, VectorBase<BaseFloat> *output);
+//  void GetNnetOutputForFrame(int32 frame, VectorBase<BaseFloat> *output);
 
   // Updates feats_ with the new incoming word specified in word_indexes
   // We usually do this one at a time
@@ -149,7 +151,12 @@ class RnnlmSimpleLooped {
 
   // Gets the output for a particular frame and word_index, with
   // 0 <= frame < NumFrames().
-  BaseFloat GetOutput(int32 frame, int32 word_index);
+//  BaseFloat GetOutput(int32 frame, int32 word_index);
+  // create a CuVector in heap, pointer owned by the caller
+  CuVector<BaseFloat>* GetOutput(int32 frame);
+
+  BaseFloat LogProbOfWord(int32 word_index,
+                          const CuVectorBase<BaseFloat> &hidden) const;
 
  private:
   // This function does the computation for the next chunk.

From a52da292de43d8bb62c6637f09289996352ec0c3 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Wed, 27 Sep 2017 15:43:40 -0400
Subject: [PATCH 09/23] purely aesthetic changes

---
 src/latbin/lattice-lmrescore-kaldi-rnnlm.cc |   4 +-
 src/rnnlm/rnnlm-lattice-rescoring.cc        |  66 ++++++------
 src/rnnlm/rnnlm-lattice-rescoring.h         |  16 ++-
 src/rnnlm/rnnlm-simple-looped.cc            | 108 +++++---------------
 src/rnnlm/rnnlm-simple-looped.h             |  89 ++++++----------
 5 files changed, 108 insertions(+), 175 deletions(-)

diff --git a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
index 5752dbb4a7a..10a9ddc98c3 100644
--- a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
+++ b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
@@ -85,8 +85,8 @@ int main(int argc, char *argv[]) {
     CuMatrix<BaseFloat> word_embedding_mat;
     ReadKaldiObject(word_embedding_rxfilename, &word_embedding_mat);
 
-    const nnet3::RnnlmSimpleLoopedComputationOptions opts;
-    const nnet3::RnnlmSimpleLoopedInfo info(opts, rnnlm, word_embedding_mat);
+    const nnet3::RnnlmComputeStateComputationOptions opts;
+    const nnet3::RnnlmComputeStateInfo info(opts, rnnlm, word_embedding_mat);
 
     // Reads and writes as compact lattice.
     SequentialCompactLatticeReader compact_lattice_reader(lats_rspecifier);
diff --git a/src/rnnlm/rnnlm-lattice-rescoring.cc b/src/rnnlm/rnnlm-lattice-rescoring.cc
index 3d4efa34c6f..ba34f1ad675 100644
--- a/src/rnnlm/rnnlm-lattice-rescoring.cc
+++ b/src/rnnlm/rnnlm-lattice-rescoring.cc
@@ -29,28 +29,28 @@ namespace kaldi {
 namespace nnet3 {
 
 KaldiRnnlmDeterministicFst::~KaldiRnnlmDeterministicFst() {
-  int size = state_to_decodable_rnnlm_.size();
+  int size = state_to_rnnlm_state_.size();
   KALDI_ASSERT(state_to_nnet3_output_.size() == size);
   for (int i = 0; i < size; i++) {
-    delete state_to_decodable_rnnlm_[i];
+    delete state_to_rnnlm_state_[i];
     delete state_to_nnet3_output_[i];
   }
   
-  state_to_decodable_rnnlm_.resize(0);
+  state_to_rnnlm_state_.resize(0);
   state_to_nnet3_output_.resize(0);
   state_to_wseq_.resize(0);
   wseq_to_state_.clear();
 }
 
 void KaldiRnnlmDeterministicFst::Clear() {
-  int size = state_to_decodable_rnnlm_.size();
+  int size = state_to_rnnlm_state_.size();
   KALDI_ASSERT(state_to_nnet3_output_.size() == size);
   for (int i = 1; i < size; i++) {
-    delete state_to_decodable_rnnlm_[i];
+    delete state_to_rnnlm_state_[i];
     delete state_to_nnet3_output_[i];
   }
   
-  state_to_decodable_rnnlm_.resize(1);
+  state_to_rnnlm_state_.resize(1);
   state_to_nnet3_output_.resize(1);
   state_to_wseq_.resize(1);
   wseq_to_state_.clear();
@@ -82,28 +82,34 @@ void KaldiRnnlmDeterministicFst::ReadFstWordSymbolTableAndRnnWordlist(
 
   fst_label_to_rnn_label_.resize(fst_word_symbols->NumSymbols(), -1);
 
-  out_OOS_index_ = 1;
+  oos_index_ = -2;  // use -2 since fst::SymbolTable::kNoSymbol is -1
   {
     std::ifstream ifile(rnn_wordlist.c_str());
     int32 id;
     string word;
     int32 i = 0;
     while (ifile >> word >> id) {
-      if (word == "</s>") {
+      if (word == eos_symbol_) {
         eos_index_ = id;
-      } else if (word == "<s>") {
+      } else if (word == bos_symbol_) {
         bos_index_ = id;
+      } else if (word == oos_symbol_) {
+        oos_index_ = id;
+      } else if (word == brk_symbol_) {
+        brk_index_ = id;
       }
       KALDI_ASSERT(i == id);
       i++;
       rnn_label_to_word_.push_back(word);
 
       int fst_label = fst_word_symbols->Find(rnn_label_to_word_[id]);
-      if (fst::SymbolTable::kNoSymbol != fst_label || id == out_OOS_index_ || id == 0 || word == "<brk>") {}
-      else {
-        KALDI_LOG << "warning: word " << word << " in RNNLM wordlist but not in FST wordlist";
+      if (fst::SymbolTable::kNoSymbol != fst_label && id != oos_index_
+               && id != bos_index_ && id != brk_index_) {
+        KALDI_LOG << "warning: word " << word
+                  << " in RNNLM wordlist but not in FST wordlist";
       }
-      if (id != out_OOS_index_ && out_OOS_index_ != 0 && fst_label != -1) {
+      if (id != oos_index_ && oos_index_ != -2 &&
+                fst_label != fst::SymbolTable::kNoSymbol) {
         fst_label_to_rnn_label_[fst_label] = id;
       }
     }
@@ -111,7 +117,7 @@ void KaldiRnnlmDeterministicFst::ReadFstWordSymbolTableAndRnnWordlist(
 
   for (int32 i = 0; i < fst_label_to_rnn_label_.size(); i++) {
     if (fst_label_to_rnn_label_[i] == -1) {
-      fst_label_to_rnn_label_[i] = out_OOS_index_;
+      fst_label_to_rnn_label_[i] = oos_index_;
     }
   }
   delete fst_word_symbols;
@@ -120,21 +126,25 @@ void KaldiRnnlmDeterministicFst::ReadFstWordSymbolTableAndRnnWordlist(
 KaldiRnnlmDeterministicFst::KaldiRnnlmDeterministicFst(int32 max_ngram_order,
     const std::string &rnn_wordlist,
     const std::string &word_symbol_table_rxfilename,
-    const RnnlmSimpleLoopedInfo &info) {
+    const RnnlmComputeStateInfo &info) {
   max_ngram_order_ = max_ngram_order;
+  bos_symbol_ = info.opts.bos_symbol;
+  eos_symbol_ = info.opts.eos_symbol;
+  oos_symbol_ = info.opts.oos_symbol;
+  brk_symbol_ = info.opts.brk_symbol;
   ReadFstWordSymbolTableAndRnnWordlist(rnn_wordlist,
                                        word_symbol_table_rxfilename);
 
   std::vector<Label> bos_seq;
   bos_seq.push_back(bos_index_);
   state_to_wseq_.push_back(bos_seq);
-  RnnlmSimpleLooped *decodable_rnnlm = new RnnlmSimpleLooped(info);
-  decodable_rnnlm->TakeFeatures(bos_seq);
-  CuVector<BaseFloat> *hidden = decodable_rnnlm->GetOutput(0);
+  RnnlmComputeState *decodable_rnnlm = new RnnlmComputeState(info);
+  decodable_rnnlm->TakeFeatures(bos_index_);
+  CuVector<BaseFloat> *hidden = decodable_rnnlm->GetOutput();
   wseq_to_state_[bos_seq] = 0;
   start_state_ = 0;
 
-  state_to_decodable_rnnlm_.push_back(decodable_rnnlm);
+  state_to_rnnlm_state_.push_back(decodable_rnnlm);
   state_to_nnet3_output_.push_back(hidden);
 }
 
@@ -143,7 +153,7 @@ fst::StdArc::Weight KaldiRnnlmDeterministicFst::Final(StateId s) {
   KALDI_ASSERT(static_cast<size_t>(s) < state_to_wseq_.size());
 
   const CuVector<BaseFloat> &nnet3_out = *state_to_nnet3_output_[s];
-  RnnlmSimpleLooped* rnn = state_to_decodable_rnnlm_[s];
+  RnnlmComputeState* rnn = state_to_rnnlm_state_[s];
   return rnn->LogProbOfWord(eos_index_, nnet3_out);
 }
 
@@ -153,14 +163,13 @@ bool KaldiRnnlmDeterministicFst::GetArc(StateId s, Label ilabel,
   KALDI_ASSERT(static_cast<size_t>(s) < state_to_wseq_.size());
 
   std::vector<Label> wseq = state_to_wseq_[s];
-//  RnnlmSimpleLooped* rnnlm = new RnnlmSimpleLooped(*state_to_decodable_rnnlm_[s]);
-  const RnnlmSimpleLooped* rnnlm = state_to_decodable_rnnlm_[s];
+  const RnnlmComputeState* rnnlm = state_to_rnnlm_state_[s];
   int32 rnn_word = fst_label_to_rnn_label_[ilabel];
 
   const CuVector<BaseFloat> &nnet3_out = *state_to_nnet3_output_[s];
   BaseFloat logprob = rnnlm->LogProbOfWord(rnn_word, nnet3_out);
 
-//  if (rnn_word == out_OOS_index_)
+//  if (rnn_word == oos_index_)
 //    logprob = logprob - Log(full_voc_size_ - rnn_label_to_word_.size() + 1.0);
 
   wseq.push_back(rnn_word);
@@ -181,14 +190,11 @@ bool KaldiRnnlmDeterministicFst::GetArc(StateId s, Label ilabel,
 
   // If the pair was just inserted, then also add it to state_to_* structures
   if (result.second == true) {
-    RnnlmSimpleLooped *rnnlm2 = new RnnlmSimpleLooped(*rnnlm);  // make a copy
-
-//  std::vector<int32> fst_label_to_rnn_in_label_;
-//  std::vector<std::string> rnn_in_label_to_word_;
-    rnnlm2->TakeFeatures(std::vector<Label>(1, rnn_word));
+    RnnlmComputeState *rnnlm2 = new RnnlmComputeState(*rnnlm);  // make a copy
+    rnnlm2->TakeFeatures(rnn_word);
     state_to_wseq_.push_back(wseq);
-    state_to_nnet3_output_.push_back(rnnlm2->GetOutput(0));
-    state_to_decodable_rnnlm_.push_back(rnnlm2);
+    state_to_nnet3_output_.push_back(rnnlm2->GetOutput());
+    state_to_rnnlm_state_.push_back(rnnlm2);
   }
 
   // Creates the arc.
diff --git a/src/rnnlm/rnnlm-lattice-rescoring.h b/src/rnnlm/rnnlm-lattice-rescoring.h
index 4af30a63fd8..9dd46d1bfbe 100644
--- a/src/rnnlm/rnnlm-lattice-rescoring.h
+++ b/src/rnnlm/rnnlm-lattice-rescoring.h
@@ -44,7 +44,7 @@ class KaldiRnnlmDeterministicFst
   KaldiRnnlmDeterministicFst(int32 max_ngram_order,
       const std::string &rnn_wordlist,
       const std::string &word_symbol_table_rxfilename,
-      const RnnlmSimpleLoopedInfo &info);
+      const RnnlmComputeStateInfo &info);
   ~KaldiRnnlmDeterministicFst();
 
   void Clear();
@@ -65,9 +65,15 @@ class KaldiRnnlmDeterministicFst
   std::vector<std::string> fst_label_to_word_;
 
   int32 full_voc_size_;
-  int32 out_OOS_index_;
+
+  string bos_symbol_;
+  string eos_symbol_;
+  string oos_symbol_;
+  string brk_symbol_;
   int32 bos_index_;
   int32 eos_index_;
+  int32 oos_index_;
+  int32 brk_index_;
 
   typedef unordered_map
       <std::vector<Label>, StateId, VectorHasher<Label> > MapType;
@@ -79,10 +85,12 @@ class KaldiRnnlmDeterministicFst
   // mapping from state-id to history sequence
   std::vector<std::vector<Label> > state_to_wseq_;
 
-  // mapping from state-id to RNNLM structure including computer
-  std::vector<RnnlmSimpleLooped*> state_to_decodable_rnnlm_;
+  // mapping from state-id to RNNLM states
+  // the pointers are owned here
+  std::vector<RnnlmComputeState*> state_to_rnnlm_state_;
 
   // mapping from state-id to output word-embedding
+  // the pointers are owned here
   std::vector<CuVector<BaseFloat>*> state_to_nnet3_output_;
 
   void ReadFstWordSymbolTableAndRnnWordlist(const std::string &rnn_in_wordlist,
diff --git a/src/rnnlm/rnnlm-simple-looped.cc b/src/rnnlm/rnnlm-simple-looped.cc
index de528ad15cd..ff687995367 100644
--- a/src/rnnlm/rnnlm-simple-looped.cc
+++ b/src/rnnlm/rnnlm-simple-looped.cc
@@ -27,40 +27,36 @@ namespace kaldi {
 namespace nnet3 {
 
 
-RnnlmSimpleLoopedInfo::RnnlmSimpleLoopedInfo(
-    const RnnlmSimpleLoopedComputationOptions &opts,
+RnnlmComputeStateInfo::RnnlmComputeStateInfo(
+    const RnnlmComputeStateComputationOptions &opts,
     const kaldi::nnet3::Nnet &rnnlm,
     const CuMatrix<BaseFloat> &word_embedding_mat):
     opts(opts), rnnlm(rnnlm), word_embedding_mat(word_embedding_mat) {
   Init(opts, rnnlm, word_embedding_mat);
 }
 
-void RnnlmSimpleLoopedInfo::Init(
-    const RnnlmSimpleLoopedComputationOptions &opts,
+void RnnlmComputeStateInfo::Init(
+    const RnnlmComputeStateComputationOptions &opts,
     const kaldi::nnet3::Nnet &rnnlm,
     const CuMatrix<BaseFloat> &word_embedding_mat) {
   opts.Check();
   KALDI_ASSERT(IsSimpleNnet(rnnlm));
   int32 left_context, right_context;
   ComputeSimpleNnetContext(rnnlm, &left_context, &right_context);
-  frames_left_context = left_context;
-  frames_right_context = right_context;
+  KALDI_ASSERT(0 == left_context);
+  KALDI_ASSERT(0 == right_context);
   int32 frame_subsampling_factor = 1;
-  frames_per_chunk = GetChunkSize(rnnlm, frame_subsampling_factor,
-                                  opts.frames_per_chunk);
-  KALDI_ASSERT(frames_per_chunk == opts.frames_per_chunk);
   nnet_output_dim = rnnlm.OutputDim("output");
   KALDI_ASSERT(nnet_output_dim > 0);
 
-  int32 ivector_period = frames_per_chunk;
-  int32 extra_right_context = 0;
-  int32 num_sequences = 1;  // we're processing one word sequence at a time.
-  CreateLoopedComputationRequestSimple(rnnlm, frames_per_chunk,
+  ComputationRequest request1, request2, request3;
+  CreateLoopedComputationRequestSimple(rnnlm,
+                                       1, // num_frames
                                        frame_subsampling_factor,
-                                       ivector_period,
+                                       1, // ivector_period = 1
                                        0, // extra_left_context_initial == 0
-                                       extra_right_context,
-                                       num_sequences,
+                                       0, // extra_right_context == 0
+                                       1, // num_sequnces == 1
                                        &request1, &request2, &request3);
 
   CompileLooped(rnnlm, opts.optimize_config, request1, request2,
@@ -72,51 +68,27 @@ void RnnlmSimpleLoopedInfo::Init(
   }
 }
 
-RnnlmSimpleLooped::RnnlmSimpleLooped(
-    const RnnlmSimpleLoopedInfo &info) :
+RnnlmComputeState::RnnlmComputeState(
+    const RnnlmComputeStateInfo &info) :
     info_(info),
     computer_(info_.opts.compute_config, info_.computation,
               info_.rnnlm, NULL),  // NULL is 'nnet_to_update'
-    // since everytime we provide one chunk to the object, the size of
-    // feats_ == frames_per_chunk
-    feats_(info_.frames_per_chunk,
-           info_.word_embedding_mat.NumRows()),
+    feats_(-1),
     current_log_post_offset_(-1)
 {}
 
-RnnlmSimpleLooped::RnnlmSimpleLooped(const RnnlmSimpleLooped &other):
+RnnlmComputeState::RnnlmComputeState(const RnnlmComputeState &other):
   info_(other.info_), computer_(other.computer_), feats_(other.feats_),
   current_nnet_output_(other.current_nnet_output_),
   current_log_post_offset_(other.current_log_post_offset_)
 {}
 
-void RnnlmSimpleLooped::TakeFeatures(
-    const std::vector<int32> &word_indexes) {
-  KALDI_ASSERT(word_indexes.size() == feats_.NumRows());
-  std::vector<std::vector<std::pair<MatrixIndexT, BaseFloat> > >
-      pairs(word_indexes.size());
-  for (int32 i = 0; i < word_indexes.size(); i++) {
-    std::pair<MatrixIndexT, BaseFloat> one_hot_index(word_indexes[i], 1.0);
-    std::vector<std::pair<MatrixIndexT, BaseFloat> > row(1, one_hot_index);
-    pairs[i] = row;
-  }
-  SparseMatrix<BaseFloat> feats_temp(feats_.NumCols(), pairs);
-  feats_.Swap(&feats_temp);
-  // resets offset so that AdvanceChunk() would be called in GetOutput() and
-  // GetNnetOutputForFrame() after taking new features
+void RnnlmComputeState::TakeFeatures(int32 word_index) {
+  feats_ = word_index;
   current_log_post_offset_ = -1;
 }
 
-//void RnnlmSimpleLooped::GetNnetOutputForFrame(
-//    int32 frame, VectorBase<BaseFloat> *output) {
-//  KALDI_ASSERT(frame >= 0 && frame < feats_.NumRows());
-//  if (frame >= current_log_post_offset_ + current_nnet_output_.NumRows())
-//    AdvanceChunk();
-//  output->CopyFromVec(current_nnet_output_.Row(frame -
-//                                               current_log_post_offset_));
-//}
-
-BaseFloat RnnlmSimpleLooped::LogProbOfWord(int32 word_index,
+BaseFloat RnnlmComputeState::LogProbOfWord(int32 word_index,
                                const CuVectorBase<BaseFloat> &hidden) const {
   const CuMatrix<BaseFloat> &word_embedding_mat = info_.word_embedding_mat;
   BaseFloat log_prob;
@@ -134,47 +106,18 @@ BaseFloat RnnlmSimpleLooped::LogProbOfWord(int32 word_index,
   return log_prob;
 }
 
-CuVector<BaseFloat>* RnnlmSimpleLooped::GetOutput(int32 frame) {
-  KALDI_ASSERT(frame >= 0 && frame < feats_.NumRows());
-  if (frame >= current_log_post_offset_ + current_nnet_output_.NumRows())
-    AdvanceChunk();
-
-//  int32 embedding_dim = info_.word_embedding_mat.NumCols();
-//  int32 num_words = info_.word_embedding_mat.NumRows();
-
-
+CuVector<BaseFloat>* RnnlmComputeState::GetOutput() {
+  AdvanceChunk();
   CuMatrix<BaseFloat> current_nnet_output_gpu;
   current_nnet_output_gpu.Swap(&current_nnet_output_);
   const CuSubVector<BaseFloat> hidden(current_nnet_output_gpu,
-                                      frame - current_log_post_offset_);
+                                      -current_log_post_offset_);
   return new CuVector<BaseFloat>(hidden);
-//
-//  // swap the pointer back so that this function can be called multiple times
-//  // with the same returned value before taking next new feats
-//  current_nnet_output_.Swap(&current_nnet_output_gpu);
-//  return log_prob;
 }
 
-void RnnlmSimpleLooped::AdvanceChunk() {
-  int32 begin_input_frame, end_input_frame;
-  begin_input_frame = -info_.frames_left_context;
-  // note: end is last plus one.
-  end_input_frame = info_.frames_per_chunk + info_.frames_right_context;
-  // currently there is no left/right context and frames_per_chunk == 1
-  KALDI_ASSERT(begin_input_frame == 0 && end_input_frame == 1);
-
-  SparseMatrix<BaseFloat> feats_chunk(end_input_frame - begin_input_frame,
-                                      feats_.NumCols());
-  int32 num_features = feats_.NumRows();
-  for (int32 r = begin_input_frame; r < end_input_frame; r++) {
-    int32 input_frame = r;
-    if (input_frame < 0) input_frame = 0;
-    if (input_frame >= num_features) input_frame = num_features - 1;
-    feats_chunk.SetRow(r - begin_input_frame, feats_.Row(input_frame));
-  }
-
+void RnnlmComputeState::AdvanceChunk() {
   CuMatrix<BaseFloat> input_embeddings(1, info_.word_embedding_mat.NumCols());
-  int32 word_index = feats_chunk.Row(0).GetElement(0).first;
+  int32 word_index = feats_;
   input_embeddings.RowRange(0, 1).AddMat(1.0, info_.word_embedding_mat.RowRange(word_index, 1), kNoTrans);
   computer_.AcceptInput("input", &input_embeddings);
 
@@ -189,12 +132,11 @@ void RnnlmSimpleLooped::AdvanceChunk() {
     current_nnet_output_.Resize(0, 0);
     current_nnet_output_.Swap(&output);
   }
-  KALDI_ASSERT(current_nnet_output_.NumRows() == info_.frames_per_chunk &&
+  KALDI_ASSERT(current_nnet_output_.NumRows() == 1 &&
                current_nnet_output_.NumCols() == info_.nnet_output_dim);
 
   current_log_post_offset_ = 0;
 }
 
-
 } // namespace nnet3
 } // namespace kaldi
diff --git a/src/rnnlm/rnnlm-simple-looped.h b/src/rnnlm/rnnlm-simple-looped.h
index e5f552cabbe..d2503db40c8 100644
--- a/src/rnnlm/rnnlm-simple-looped.h
+++ b/src/rnnlm/rnnlm-simple-looped.h
@@ -26,7 +26,6 @@
 #include "base/kaldi-common.h"
 #include "gmm/am-diag-gmm.h"
 #include "hmm/transition-model.h"
-//#include "itf/decodable-itf.h"
 #include "nnet3/nnet-optimize.h"
 #include "nnet3/nnet-compute.h"
 #include "nnet3/am-nnet-simple.h"
@@ -46,29 +45,39 @@ namespace nnet3 {
 // Note: the 'simple' in the name means it applies to networks for which
 // IsSimpleNnet(nnet) would return true.  'looped' means we use looped
 // computations, with a kGotoLabel statement at the end of it.
-struct RnnlmSimpleLoopedComputationOptions {
-  int32 frames_per_chunk;
+struct RnnlmComputeStateComputationOptions {
   bool debug_computation;
   bool force_normalize;
+  string bos_symbol;
+  string eos_symbol;
+  string oos_symbol;
+  string brk_symbol;
   NnetOptimizeOptions optimize_config;
   NnetComputeOptions compute_config;
-  RnnlmSimpleLoopedComputationOptions():
-      frames_per_chunk(1),
+  RnnlmComputeStateComputationOptions():
       debug_computation(false),
-      force_normalize(false) { }
+      force_normalize(false),
+      bos_symbol("<s>"),
+      eos_symbol("</s>"),
+      oos_symbol("<oos>"),
+      brk_symbol("<brk>") { }
 
   void Check() const {
-    KALDI_ASSERT(frames_per_chunk > 0);
   }
 
   void Register(OptionsItf *opts) {
-    opts->Register("frames-per-chunk", &frames_per_chunk,
-                   "Number of frames in each chunk that is separately evaluated "
-                   "by the neural net.");
     opts->Register("debug-computation", &debug_computation, "If true, turn on "
                    "debug for the actual computation (very verbose!)");
     opts->Register("force-normalize", &force_normalize, "If true, force "
                    " normalize the word posteriors");
+    opts->Register("bos-symbol", &bos_symbol, "symbol in wordlist representing "
+                   "the begin-of-sentence symbol, usually <s>");
+    opts->Register("eos-symbol", &bos_symbol, "symbol in wordlist representing "
+                   "the end-of-sentence symbol, usually </s>");
+    opts->Register("oos-symbol", &oos_symbol, "symbol in wordlist representing "
+                   "the out-of-vocabulary symbol, usually <oos>");
+    opts->Register("eos-symbol", &brk_symbol, "symbol in wordlist representing "
+                   "the break symbol, usually <brk>");
 
     // register the optimization options with the prefix "optimization".
     ParseOptions optimization_opts("optimization", opts);
@@ -80,39 +89,25 @@ struct RnnlmSimpleLoopedComputationOptions {
   }
 };
 
-class RnnlmSimpleLoopedInfo  {
+class RnnlmComputeStateInfo  {
  public:
-  RnnlmSimpleLoopedInfo(
-      const RnnlmSimpleLoopedComputationOptions &opts,
+  RnnlmComputeStateInfo(
+      const RnnlmComputeStateComputationOptions &opts,
       const kaldi::nnet3::Nnet &rnnlm,
       const CuMatrix<BaseFloat> &word_embedding_mat);
 
-  void Init(const RnnlmSimpleLoopedComputationOptions &opts,
+  void Init(const RnnlmComputeStateComputationOptions &opts,
             const kaldi::nnet3::Nnet &rnnlm,
             const CuMatrix<BaseFloat> &word_embedding_mat);
 
-  const RnnlmSimpleLoopedComputationOptions &opts;
+  const RnnlmComputeStateComputationOptions &opts;
 
   const kaldi::nnet3::Nnet &rnnlm;
   const CuMatrix<BaseFloat> &word_embedding_mat;
 
-  // frames_left_context equals the model left context plus the value of the
-  // --extra-left-context-initial option.
-  int32 frames_left_context;
-  // frames_right_context is the same as the right-context of the model.
-  int32 frames_right_context;
-  // The frames_per_chunk equals the number of input frames we need for each
-  // chunk (except for the first chunk).
-  int32 frames_per_chunk;
-
   // The output dimension of the nnet neural network (not the final output).
   int32 nnet_output_dim;
 
-  // The 3 computation requests that are used to create the looped
-  // computation are stored in the class, as we need them to work out
-  // exactly shich iVectors are needed.
-  ComputationRequest request1, request2, request3;
-
   // The compiled, 'looped' computation.
   NnetComputation computation;
 };
@@ -122,7 +117,7 @@ class RnnlmSimpleLoopedInfo  {
   via other wrapper classes.
 
   It accept just input features */
-class RnnlmSimpleLooped {
+class RnnlmComputeState {
  public:
   /**
      This constructor takes features as input.
@@ -131,49 +126,31 @@ class RnnlmSimpleLooped {
 
      @param [in] info   This helper class contains all the static pre-computed information
                         this class needs, and contains a pointer to the neural net.
-     @param [in] feats  The input feature matrix.
+     @param [in] feats  The input feature word
   */
-  RnnlmSimpleLooped(const RnnlmSimpleLoopedInfo &info);
-
-  RnnlmSimpleLooped(const RnnlmSimpleLooped &other);
-
-  inline int32 NnetOutputDim() const { return info_.nnet_output_dim; }
-
-  // Gets the nnet's output for a particular frame, with 0 <= frame < NumFrames().
-  // 'output' must be correctly sized (with dimension NnetOutputDim()).  Note:
-  // you're expected to call this, and GetOutput(), in an order of increasing
-  // frames.  If you deviate from this, one of these calls may crash.
-//  void GetNnetOutputForFrame(int32 frame, VectorBase<BaseFloat> *output);
+  RnnlmComputeState(const RnnlmComputeStateInfo &info);
+  RnnlmComputeState(const RnnlmComputeState &other);
 
   // Updates feats_ with the new incoming word specified in word_indexes
   // We usually do this one at a time
-  void TakeFeatures(const std::vector<int32> &word_indexes);
-
-  // Gets the output for a particular frame and word_index, with
-  // 0 <= frame < NumFrames().
-//  BaseFloat GetOutput(int32 frame, int32 word_index);
-  // create a CuVector in heap, pointer owned by the caller
-  CuVector<BaseFloat>* GetOutput(int32 frame);
-
+  void TakeFeatures(int32 word_index);
+  CuVector<BaseFloat>* GetOutput();
   BaseFloat LogProbOfWord(int32 word_index,
                           const CuVectorBase<BaseFloat> &hidden) const;
 
  private:
   // This function does the computation for the next chunk.
   void AdvanceChunk();
-
-  const RnnlmSimpleLoopedInfo &info_;
-
+  const RnnlmComputeStateInfo &info_;
   NnetComputer computer_;
-
-  SparseMatrix<BaseFloat> feats_;
+  int32 feats_;
 
   // The current nnet's output that we got from the last time we
   // ran the computation.
   Matrix<BaseFloat> current_nnet_output_;
 
   // The time-offset of the current log-posteriors, equals
-  // -1 when initialized, or 0 once AdvanceChunk() was called
+  // -1 when initialized, or takes a new word, or 0 once AdvanceChunk() was called
   int32 current_log_post_offset_;
 };
 

From 3bdaa4d0bccac966978176b5945c70854df9fb34 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Wed, 27 Sep 2017 21:42:11 -0400
Subject: [PATCH 10/23] re-written some of the classes

---
 egs/swbd/s5/local/rnnlm/run_rescoring.sh      | 14 +--
 scripts/rnnlm/lmrescore_rnnlm_lat.sh          | 21 ++---
 src/latbin/lattice-lmrescore-kaldi-rnnlm.cc   | 13 ++-
 src/rnnlm/Makefile                            |  2 +-
 ...imple-looped.cc => rnnlm-compute-state.cc} | 93 ++++++++-----------
 ...-simple-looped.h => rnnlm-compute-state.h} | 83 ++++++-----------
 src/rnnlm/rnnlm-lattice-rescoring.cc          | 60 ++++++------
 src/rnnlm/rnnlm-lattice-rescoring.h           | 12 +--
 8 files changed, 118 insertions(+), 180 deletions(-)
 rename src/rnnlm/{rnnlm-simple-looped.cc => rnnlm-compute-state.cc} (61%)
 rename src/rnnlm/{rnnlm-simple-looped.h => rnnlm-compute-state.h} (58%)

diff --git a/egs/swbd/s5/local/rnnlm/run_rescoring.sh b/egs/swbd/s5/local/rnnlm/run_rescoring.sh
index f94391294c0..62d7e982d46 100755
--- a/egs/swbd/s5/local/rnnlm/run_rescoring.sh
+++ b/egs/swbd/s5/local/rnnlm/run_rescoring.sh
@@ -1,9 +1,7 @@
 #!/bin/bash
 
-n=50
 ngram_order=4
-rnndir=
-id=rnn
+rnndir=exp/rnnlm_lstm_d
 
 . ./utils/parse_options.sh
 . ./cmd.sh
@@ -12,8 +10,6 @@ id=rnn
 set -e
 
 LM=fsh_sw1_tg
-rnndir=exp/rnnlm_lstm_d
-
 
 for decode_set in eval2000; do
   dir=exp/chain/tdnn_lstm_1e_sp
@@ -21,12 +17,10 @@ for decode_set in eval2000; do
 
   # Lattice rescoring
   rnnlm/lmrescore_rnnlm_lat.sh \
-    --cmd "$decode_cmd --mem 16G" \
-    --rnnlm-ver kaldirnnlm  --weight 0.5 --max-ngram-order $ngram_order \
+    --cmd "$decode_cmd --mem 16G -l hostname=b*" \
+    --weight 0.5 --max-ngram-order $ngram_order \
     data/lang_$LM $rnndir \
     data/${decode_set}_hires ${decode_dir} \
-    ${decode_dir}.nnet3rnnlm.lat.${ngram_order}gram
+    ${decode_dir}.kaldirnnlm.lat.${ngram_order}gram
 
 done
-
-wait
diff --git a/scripts/rnnlm/lmrescore_rnnlm_lat.sh b/scripts/rnnlm/lmrescore_rnnlm_lat.sh
index 7c046b4a38f..a719a9acaac 100755
--- a/scripts/rnnlm/lmrescore_rnnlm_lat.sh
+++ b/scripts/rnnlm/lmrescore_rnnlm_lat.sh
@@ -4,8 +4,7 @@
 #           2017  Hainan Xu
 # Apache 2.0
 
-# This script rescores lattices with RNNLM.  See also rnnlmrescore.sh which is
-# an older script using n-best lists.
+# This script rescores lattices with KALDI RNNLM.
 
 # Begin configuration section.
 cmd=run.pl
@@ -15,8 +14,6 @@ N=10
 inv_acwt=12
 weight=1.0  # Interpolation weight for RNNLM.
 # End configuration section.
-rnnlm_ver=
-#layer_string=
 
 echo "$0 $@"  # Print the command line for logging
 
@@ -24,12 +21,12 @@ echo "$0 $@"  # Print the command line for logging
 
 if [ $# != 5 ]; then
    echo "Does language model rescoring of lattices (remove old LM, add new LM)"
-   echo "with RNNLM."
+   echo "with Kaldi RNNLM."
    echo ""
    echo "Usage: $0 [options] <old-lang-dir> <rnnlm-dir> \\"
    echo "                   <data-dir> <input-decode-dir> <output-decode-dir>"
-   echo " e.g.: $0 ./rnnlm data/lang_tg data/test \\"
-   echo "                   exp/tri3/test_tg exp/tri3/test_rnnlm"
+   echo " e.g.: $0 data/lang_tg exp/rnnlm_lstm/ data/test \\"
+   echo "                   exp/tri3/test_tg exp/tri3/test_rnnlm_4gram"
    echo "options: [--cmd (run.pl|queue.pl [queue opts])]"
    exit 1;
 fi
@@ -42,13 +39,7 @@ data=$3
 indir=$4
 outdir=$5
 
-rescoring_binary=lattice-lmrescore-rnnlm
-
-
-if [ "$rnnlm_ver" == "kaldirnnlm" ]; then
-  rescoring_binary="lattice-lmrescore-kaldi-rnnlm"
-  first_arg="\"rnnlm-get-word-embedding $rnnlm_dir/word_feats.txt $rnnlm_dir/feat_embedding.final.mat -|\" $rnnlm_dir/config/words.txt "
-fi
+rescoring_binary=lattice-lmrescore-kaldi-rnnlm
 
 oldlm=$oldlang/G.fst
 if [ -f $oldlang/G.carpa ]; then
@@ -103,6 +94,7 @@ else
     $oldlang/words.txt ark:- $rnnlm_dir/config/words.txt $word_embedding "$rnnlm_dir/final.raw" \
     "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1;
 fi
+
 if ! $skip_scoring ; then
   err_msg="Not scoring because local/score.sh does not exist or not executable."
   [ ! -x local/score.sh ] && echo $err_msg && exit 1;
@@ -112,4 +104,3 @@ else
 fi
 
 exit 0;
-
diff --git a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
index 10a9ddc98c3..8a02e8078fc 100644
--- a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
+++ b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
@@ -40,13 +40,17 @@ int main(int argc, char *argv[]) {
         "composing with the wrapped LM using a special type of composition\n"
         "algorithm. Determinization will be applied on the composed lattice.\n"
         "\n"
-        "Usage: lattice-lmrescore-kaldi-rnnlm [options] <embedding-file> <rnnlm-wordlist> \\\n"
+        "Usage: lattice-lmrescore-kaldi-rnnlm [options] \\\n"
         "             <word-symbol-table-rxfilename> <lattice-rspecifier> \\\n"
+        "             <rnnlm-wordlist> <embedding-file>  \\\n"
         "             <raw-rnnlm-rxfilename> <lattice-wspecifier>\n"
-        " e.g.: lattice-lmrescore-kaldi-rnnlm --lm-scale=-1.0 word_embedding.mat \\\n"
-        "       rnn_words.txt fst_words.txt ark:in.lats rnnlm ark:out.lats\n";
+        " e.g.: lattice-lmrescore-kaldi-rnnlm --lm-scale=-1.0 fst_words.txt \\\n"
+        "              ark:in.lats rnn_words.txt word_embedding.mat \\\n"
+        "              final.raw ark:out.lats\n";
 
     ParseOptions po(usage);
+    nnet3::RnnlmComputeStateComputationOptions opts;
+
     int32 max_ngram_order = 3;
     BaseFloat lm_scale = 1.0;
 
@@ -55,6 +59,7 @@ int main(int argc, char *argv[]) {
     po.Register("max-ngram-order", &max_ngram_order, "If positive, limit the "
                 "rnnlm context to the given number, -1 means we are not going "
                 "to limit it.");
+    opts.Register(&po);
 
     po.Read(argc, argv);
 
@@ -73,7 +78,6 @@ int main(int argc, char *argv[]) {
     rnnlm_rxfilename = po.GetArg(5);
     lats_wspecifier = po.GetArg(6);
 
-    // Reads the language model.
     kaldi::nnet3::Nnet rnnlm;
     ReadKaldiObject(rnnlm_rxfilename, &rnnlm);
 
@@ -85,7 +89,6 @@ int main(int argc, char *argv[]) {
     CuMatrix<BaseFloat> word_embedding_mat;
     ReadKaldiObject(word_embedding_rxfilename, &word_embedding_mat);
 
-    const nnet3::RnnlmComputeStateComputationOptions opts;
     const nnet3::RnnlmComputeStateInfo info(opts, rnnlm, word_embedding_mat);
 
     // Reads and writes as compact lattice.
diff --git a/src/rnnlm/Makefile b/src/rnnlm/Makefile
index 9484bbc0242..6ee52bbb1d7 100644
--- a/src/rnnlm/Makefile
+++ b/src/rnnlm/Makefile
@@ -10,7 +10,7 @@ TESTFILES = sampler-test sampling-lm-test rnnlm-example-test
 OBJFILES = sampler.o rnnlm-example.o rnnlm-example-utils.o \
            rnnlm-core-training.o rnnlm-embedding-training.o rnnlm-core-compute.o \
            rnnlm-utils.o rnnlm-training.o rnnlm-test-utils.o sampling-lm-estimate.o \
-           sampling-lm.o rnnlm-simple-looped.o rnnlm-lattice-rescoring.o
+           sampling-lm.o rnnlm-compute-state.o rnnlm-lattice-rescoring.o
 
 LIBNAME = kaldi-rnnlm
 
diff --git a/src/rnnlm/rnnlm-simple-looped.cc b/src/rnnlm/rnnlm-compute-state.cc
similarity index 61%
rename from src/rnnlm/rnnlm-simple-looped.cc
rename to src/rnnlm/rnnlm-compute-state.cc
index ff687995367..e6161438c64 100644
--- a/src/rnnlm/rnnlm-simple-looped.cc
+++ b/src/rnnlm/rnnlm-compute-state.cc
@@ -19,35 +19,26 @@
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.
 
-#include "rnnlm/rnnlm-simple-looped.h"
+#include "rnnlm/rnnlm-compute-state.h"
 #include "nnet3/nnet-utils.h"
 #include "nnet3/nnet-compile-looped.h"
 
 namespace kaldi {
 namespace nnet3 {
 
-
 RnnlmComputeStateInfo::RnnlmComputeStateInfo(
     const RnnlmComputeStateComputationOptions &opts,
     const kaldi::nnet3::Nnet &rnnlm,
     const CuMatrix<BaseFloat> &word_embedding_mat):
     opts(opts), rnnlm(rnnlm), word_embedding_mat(word_embedding_mat) {
-  Init(opts, rnnlm, word_embedding_mat);
-}
-
-void RnnlmComputeStateInfo::Init(
-    const RnnlmComputeStateComputationOptions &opts,
-    const kaldi::nnet3::Nnet &rnnlm,
-    const CuMatrix<BaseFloat> &word_embedding_mat) {
-  opts.Check();
   KALDI_ASSERT(IsSimpleNnet(rnnlm));
   int32 left_context, right_context;
   ComputeSimpleNnetContext(rnnlm, &left_context, &right_context);
   KALDI_ASSERT(0 == left_context);
   KALDI_ASSERT(0 == right_context);
   int32 frame_subsampling_factor = 1;
-  nnet_output_dim = rnnlm.OutputDim("output");
-  KALDI_ASSERT(nnet_output_dim > 0);
+  int32 embedding_dim = rnnlm.OutputDim("output");
+  KALDI_ASSERT(embedding_dim == word_embedding_mat.NumCols());
 
   ComputationRequest request1, request2, request3;
   CreateLoopedComputationRequestSimple(rnnlm,
@@ -68,74 +59,66 @@ void RnnlmComputeStateInfo::Init(
   }
 }
 
-RnnlmComputeState::RnnlmComputeState(
-    const RnnlmComputeStateInfo &info) :
+RnnlmComputeState::RnnlmComputeState(const RnnlmComputeStateInfo &info,
+                                     int32 bos_index) :
     info_(info),
     computer_(info_.opts.compute_config, info_.computation,
               info_.rnnlm, NULL),  // NULL is 'nnet_to_update'
-    feats_(-1),
-    current_log_post_offset_(-1)
-{}
+    previous_word_(-1),
+    normalization_factor_(0.0) {
+  AddWord(bos_index);
+}
 
 RnnlmComputeState::RnnlmComputeState(const RnnlmComputeState &other):
-  info_(other.info_), computer_(other.computer_), feats_(other.feats_),
-  current_nnet_output_(other.current_nnet_output_),
-  current_log_post_offset_(other.current_log_post_offset_)
+  info_(other.info_), computer_(other.computer_),
+  previous_word_(other.previous_word_),
+  normalization_factor_(other.normalization_factor_)
 {}
 
-void RnnlmComputeState::TakeFeatures(int32 word_index) {
-  feats_ = word_index;
-  current_log_post_offset_ = -1;
+RnnlmComputeState* RnnlmComputeState::GetSuccessorState(int32 next_word) const {
+  RnnlmComputeState *ans = new RnnlmComputeState(*this);
+  ans->AddWord(next_word);
+  return ans;
 }
 
-BaseFloat RnnlmComputeState::LogProbOfWord(int32 word_index,
-                               const CuVectorBase<BaseFloat> &hidden) const {
-  const CuMatrix<BaseFloat> &word_embedding_mat = info_.word_embedding_mat;
-  BaseFloat log_prob;
+void RnnlmComputeState::AddWord(int32 word_index) {
+  previous_word_ = word_index;
+  AdvanceChunk();
 
-  if (info_.opts.force_normalize) {
-    CuVector<BaseFloat> log_probs(word_embedding_mat.NumRows());
+  const CuMatrix<BaseFloat> &word_embedding_mat = info_.word_embedding_mat;
+  if (info_.opts.normalize_probs) {
+    CuVector<BaseFloat> log_probs(info_.word_embedding_mat.NumRows());
 
-    log_probs.AddMatVec(1.0, word_embedding_mat, kTrans, hidden, 0.0);
-    log_probs.ApplySoftMax();
-    log_probs.ApplyLog();
-    log_prob = log_probs(word_index);
-  } else {
-    log_prob = VecVec(hidden, word_embedding_mat.Row(word_index));
+    log_probs.AddMatVec(1.0, word_embedding_mat, kTrans,
+                        predicted_word_embedding_->Row(0), 0.0);
+    log_probs.ApplyExp();
+    normalization_factor_ = log(log_probs.Sum());
   }
-  return log_prob;
 }
 
-CuVector<BaseFloat>* RnnlmComputeState::GetOutput() {
-  AdvanceChunk();
-  CuMatrix<BaseFloat> current_nnet_output_gpu;
-  current_nnet_output_gpu.Swap(&current_nnet_output_);
-  const CuSubVector<BaseFloat> hidden(current_nnet_output_gpu,
-                                      -current_log_post_offset_);
-  return new CuVector<BaseFloat>(hidden);
+BaseFloat RnnlmComputeState::LogProbOfWord(int32 word_index) const {
+  const CuMatrix<BaseFloat> &word_embedding_mat = info_.word_embedding_mat;
+
+  BaseFloat log_prob = VecVec(predicted_word_embedding_->Row(0),
+                              word_embedding_mat.Row(word_index));
+  if (info_.opts.normalize_probs) {
+    log_prob -= normalization_factor_;
+  }
+  return log_prob;
 }
 
 void RnnlmComputeState::AdvanceChunk() {
   CuMatrix<BaseFloat> input_embeddings(1, info_.word_embedding_mat.NumCols());
-  int32 word_index = feats_;
-  input_embeddings.RowRange(0, 1).AddMat(1.0, info_.word_embedding_mat.RowRange(word_index, 1), kNoTrans);
+  input_embeddings.RowRange(0, 1).AddMat(1.0, info_.word_embedding_mat.RowRange(previous_word_, 1), kNoTrans);
   computer_.AcceptInput("input", &input_embeddings);
-
   computer_.Run();
-
   {
     // Note: here GetOutput() is used instead of GetOutputDestructive(), since
     // here we have recurrence that goes directly from the output, and the call
     // to GetOutputDestructive() would cause a crash on the next chunk.
-    CuMatrix<BaseFloat> output(computer_.GetOutput("output"));
-
-    current_nnet_output_.Resize(0, 0);
-    current_nnet_output_.Swap(&output);
+    const CuMatrixBase<BaseFloat> &output(computer_.GetOutput("output"));
+    predicted_word_embedding_ = &output;
   }
-  KALDI_ASSERT(current_nnet_output_.NumRows() == 1 &&
-               current_nnet_output_.NumCols() == info_.nnet_output_dim);
-
-  current_log_post_offset_ = 0;
 }
 
 } // namespace nnet3
diff --git a/src/rnnlm/rnnlm-simple-looped.h b/src/rnnlm/rnnlm-compute-state.h
similarity index 58%
rename from src/rnnlm/rnnlm-simple-looped.h
rename to src/rnnlm/rnnlm-compute-state.h
index d2503db40c8..62fdd323652 100644
--- a/src/rnnlm/rnnlm-simple-looped.h
+++ b/src/rnnlm/rnnlm-compute-state.h
@@ -19,8 +19,8 @@
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef KALDI_RNNLM_SIMPLE_LOOPED_H_
-#define KALDI_RNNLM_SIMPLE_LOOPED_H_
+#ifndef KALDI_RNNLM_COMPUTE_STATEH_
+#define KALDI_RNNLM_COMPUTE_STATEH_
 
 #include <vector>
 #include "base/kaldi-common.h"
@@ -34,20 +34,9 @@
 namespace kaldi {
 namespace nnet3 {
 
-// See also nnet-am-decodable-simple.h, which is a decodable object that's based
-// on breaking up the input into fixed chunks.  The decodable object defined here is based on
-// 'looped' computations, which naturally handle infinite left-context (but are
-// only ideal for systems that have only recurrence in the forward direction,
-// i.e. not BLSTMs... because there isn't a natural way to enforce extra right
-// context for each chunk.)
-
-
-// Note: the 'simple' in the name means it applies to networks for which
-// IsSimpleNnet(nnet) would return true.  'looped' means we use looped
-// computations, with a kGotoLabel statement at the end of it.
 struct RnnlmComputeStateComputationOptions {
   bool debug_computation;
-  bool force_normalize;
+  bool normalize_probs;
   string bos_symbol;
   string eos_symbol;
   string oos_symbol;
@@ -56,20 +45,18 @@ struct RnnlmComputeStateComputationOptions {
   NnetComputeOptions compute_config;
   RnnlmComputeStateComputationOptions():
       debug_computation(false),
-      force_normalize(false),
+      normalize_probs(false),
       bos_symbol("<s>"),
       eos_symbol("</s>"),
       oos_symbol("<oos>"),
       brk_symbol("<brk>") { }
 
-  void Check() const {
-  }
-
   void Register(OptionsItf *opts) {
     opts->Register("debug-computation", &debug_computation, "If true, turn on "
                    "debug for the actual computation (very verbose!)");
-    opts->Register("force-normalize", &force_normalize, "If true, force "
-                   " normalize the word posteriors");
+    opts->Register("normalize-probs", &normalize_probs, "If true, word "
+       "probabilities will be correctly normalized (otherwise the sum-to-one "
+       "normalization is approximate)");
     opts->Register("bos-symbol", &bos_symbol, "symbol in wordlist representing "
                    "the begin-of-sentence symbol, usually <s>");
     opts->Register("eos-symbol", &bos_symbol, "symbol in wordlist representing "
@@ -96,18 +83,10 @@ class RnnlmComputeStateInfo  {
       const kaldi::nnet3::Nnet &rnnlm,
       const CuMatrix<BaseFloat> &word_embedding_mat);
 
-  void Init(const RnnlmComputeStateComputationOptions &opts,
-            const kaldi::nnet3::Nnet &rnnlm,
-            const CuMatrix<BaseFloat> &word_embedding_mat);
-
   const RnnlmComputeStateComputationOptions &opts;
-
   const kaldi::nnet3::Nnet &rnnlm;
   const CuMatrix<BaseFloat> &word_embedding_mat;
 
-  // The output dimension of the nnet neural network (not the final output).
-  int32 nnet_output_dim;
-
   // The compiled, 'looped' computation.
   NnetComputation computation;
 };
@@ -116,46 +95,42 @@ class RnnlmComputeStateInfo  {
   This class handles the neural net computation; it's mostly accessed
   via other wrapper classes.
 
-  It accept just input features */
+  It accept just input word as features */
 class RnnlmComputeState {
  public:
-  /**
-     This constructor takes features as input.
-     Note: it stores references to all arguments to the constructor, so don't
-     delete them till this goes out of scope.
-
-     @param [in] info   This helper class contains all the static pre-computed information
-                        this class needs, and contains a pointer to the neural net.
-     @param [in] feats  The input feature word
-  */
-  RnnlmComputeState(const RnnlmComputeStateInfo &info);
+  /// we compile the computation and generate the state after the BOS history
+  RnnlmComputeState(const RnnlmComputeStateInfo &info, int32 bos_index);
+  /// copy constructor
   RnnlmComputeState(const RnnlmComputeState &other);
 
-  // Updates feats_ with the new incoming word specified in word_indexes
-  // We usually do this one at a time
-  void TakeFeatures(int32 word_index);
-  CuVector<BaseFloat>* GetOutput();
-  BaseFloat LogProbOfWord(int32 word_index,
-                          const CuVectorBase<BaseFloat> &hidden) const;
+  /// generate another state by passing the next-word
+  /// pointer owned by the caller
+  RnnlmComputeState* GetSuccessorState(int32 next_word) const;
 
+  /// Return the log-prob that the model predicts for the provided word-index,
+  /// given the previous history determined by the sequence of calls to AddWord()
+  /// (implicitly starting with the BOS symbol).
+  BaseFloat LogProbOfWord(int32 word_index) const;
  private:
-  // This function does the computation for the next chunk.
+  /// Advance the state of the RNNLM by appending this word to the word sequence.
+  void AddWord(int32 word_index);
+  /// This function does the computation for the next chunk.
   void AdvanceChunk();
+
   const RnnlmComputeStateInfo &info_;
   NnetComputer computer_;
-  int32 feats_;
+  int32 previous_word_;
 
-  // The current nnet's output that we got from the last time we
-  // ran the computation.
-  Matrix<BaseFloat> current_nnet_output_;
+  // this is the log of the sum of the exp'ed values in the output
+  BaseFloat normalization_factor_;
 
-  // The time-offset of the current log-posteriors, equals
-  // -1 when initialized, or takes a new word, or 0 once AdvanceChunk() was called
-  int32 current_log_post_offset_;
+  // this points to the matrix returned by GetOutput() on the Nnet object
+  // pointer not owned here
+  const CuMatrixBase<BaseFloat> *predicted_word_embedding_;
 };
 
 
 } // namespace nnet3
 } // namespace kaldi
 
-#endif  // KALDI_RNNLM_SIMPLE_LOOPED_H_
+#endif  // KALDI_RNNLM_COMPUTE_STATE_H
diff --git a/src/rnnlm/rnnlm-lattice-rescoring.cc b/src/rnnlm/rnnlm-lattice-rescoring.cc
index ba34f1ad675..00993a3688d 100644
--- a/src/rnnlm/rnnlm-lattice-rescoring.cc
+++ b/src/rnnlm/rnnlm-lattice-rescoring.cc
@@ -30,28 +30,24 @@ namespace nnet3 {
 
 KaldiRnnlmDeterministicFst::~KaldiRnnlmDeterministicFst() {
   int size = state_to_rnnlm_state_.size();
-  KALDI_ASSERT(state_to_nnet3_output_.size() == size);
   for (int i = 0; i < size; i++) {
     delete state_to_rnnlm_state_[i];
-    delete state_to_nnet3_output_[i];
   }
   
   state_to_rnnlm_state_.resize(0);
-  state_to_nnet3_output_.resize(0);
   state_to_wseq_.resize(0);
   wseq_to_state_.clear();
 }
 
 void KaldiRnnlmDeterministicFst::Clear() {
+  // similar to the destructor but we retain the 0-th entries in each map
+  // which corresponds to the <bos> state
   int size = state_to_rnnlm_state_.size();
-  KALDI_ASSERT(state_to_nnet3_output_.size() == size);
   for (int i = 1; i < size; i++) {
     delete state_to_rnnlm_state_[i];
-    delete state_to_nnet3_output_[i];
   }
   
   state_to_rnnlm_state_.resize(1);
-  state_to_nnet3_output_.resize(1);
   state_to_wseq_.resize(1);
   wseq_to_state_.clear();
   wseq_to_state_[state_to_wseq_[0]] = 0;
@@ -102,22 +98,30 @@ void KaldiRnnlmDeterministicFst::ReadFstWordSymbolTableAndRnnWordlist(
       i++;
       rnn_label_to_word_.push_back(word);
 
-      int fst_label = fst_word_symbols->Find(rnn_label_to_word_[id]);
-      if (fst::SymbolTable::kNoSymbol != fst_label && id != oos_index_
-               && id != bos_index_ && id != brk_index_) {
+      int fst_label = fst_word_symbols->Find(word);
+      if (fst::SymbolTable::kNoSymbol == fst_label && word != oos_symbol_
+               && word != bos_symbol_ && word != brk_symbol_) {
         KALDI_LOG << "warning: word " << word
                   << " in RNNLM wordlist but not in FST wordlist";
       }
-      if (id != oos_index_ && oos_index_ != -2 &&
-                fst_label != fst::SymbolTable::kNoSymbol) {
+      if (word != oos_symbol_ && fst_label != fst::SymbolTable::kNoSymbol) {
         fst_label_to_rnn_label_[fst_label] = id;
       }
     }
   }
 
-  for (int32 i = 0; i < fst_label_to_rnn_label_.size(); i++) {
-    if (fst_label_to_rnn_label_[i] == -1) {
-      fst_label_to_rnn_label_[i] = oos_index_;
+  if (oos_index_ > -1) {
+    for (int32 i = 0; i < fst_label_to_rnn_label_.size(); i++) {
+      if (fst_label_to_rnn_label_[i] == -1) {
+        fst_label_to_rnn_label_[i] = oos_index_;
+      }
+    }
+  } else {
+    for (int32 i = 0; i < fst_label_to_rnn_label_.size(); i++) {
+      if (fst_label_to_rnn_label_[i] == -1) {
+        KALDI_LOG << "warning: word " << fst_word_symbols->Find(i)
+                  << " in FST wordlist but not in RNNLM wordlist";
+      }
     }
   }
   delete fst_word_symbols;
@@ -138,44 +142,39 @@ KaldiRnnlmDeterministicFst::KaldiRnnlmDeterministicFst(int32 max_ngram_order,
   std::vector<Label> bos_seq;
   bos_seq.push_back(bos_index_);
   state_to_wseq_.push_back(bos_seq);
-  RnnlmComputeState *decodable_rnnlm = new RnnlmComputeState(info);
-  decodable_rnnlm->TakeFeatures(bos_index_);
-  CuVector<BaseFloat> *hidden = decodable_rnnlm->GetOutput();
+  RnnlmComputeState *decodable_rnnlm = new RnnlmComputeState(info, bos_index_);
   wseq_to_state_[bos_seq] = 0;
   start_state_ = 0;
 
   state_to_rnnlm_state_.push_back(decodable_rnnlm);
-  state_to_nnet3_output_.push_back(hidden);
 }
 
 fst::StdArc::Weight KaldiRnnlmDeterministicFst::Final(StateId s) {
-  // At this point, we should have created the state.
+  /// At this point, we have created the state.
   KALDI_ASSERT(static_cast<size_t>(s) < state_to_wseq_.size());
 
-  const CuVector<BaseFloat> &nnet3_out = *state_to_nnet3_output_[s];
   RnnlmComputeState* rnn = state_to_rnnlm_state_[s];
-  return rnn->LogProbOfWord(eos_index_, nnet3_out);
+  return Weight(-rnn->LogProbOfWord(eos_index_));
 }
 
 bool KaldiRnnlmDeterministicFst::GetArc(StateId s, Label ilabel,
                                         fst::StdArc *oarc) {
-  // At this point, we should have created the state.
+  /// At this point, we have created the state.
   KALDI_ASSERT(static_cast<size_t>(s) < state_to_wseq_.size());
 
   std::vector<Label> wseq = state_to_wseq_[s];
   const RnnlmComputeState* rnnlm = state_to_rnnlm_state_[s];
   int32 rnn_word = fst_label_to_rnn_label_[ilabel];
 
-  const CuVector<BaseFloat> &nnet3_out = *state_to_nnet3_output_[s];
-  BaseFloat logprob = rnnlm->LogProbOfWord(rnn_word, nnet3_out);
+  BaseFloat logprob = rnnlm->LogProbOfWord(rnn_word);
 
-//  if (rnn_word == oos_index_)
-//    logprob = logprob - Log(full_voc_size_ - rnn_label_to_word_.size() + 1.0);
+  if (rnn_word == oos_index_)
+    logprob = logprob - Log(full_voc_size_ - rnn_label_to_word_.size() + 1.0);
 
   wseq.push_back(rnn_word);
   if (max_ngram_order_ > 0) {
     while (wseq.size() >= max_ngram_order_) {
-      // History state has at most <max_ngram_order_> - 1 words in the state.
+      /// History state has at most <max_ngram_order_> - 1 words in the state.
       wseq.erase(wseq.begin(), wseq.begin() + 1);
     }
   }
@@ -183,17 +182,15 @@ bool KaldiRnnlmDeterministicFst::GetArc(StateId s, Label ilabel,
   std::pair<const std::vector<Label>, StateId> wseq_state_pair(
       wseq, static_cast<Label>(state_to_wseq_.size()));
 
-  // Attemps to insert the current <lseq_state_pair>. If the pair already exists
+  // Attemps to insert the current <wseq_state_pair>. If the pair already exists
   // then it returns false.
   typedef MapType::iterator IterType;
   std::pair<IterType, bool> result = wseq_to_state_.insert(wseq_state_pair);
 
   // If the pair was just inserted, then also add it to state_to_* structures
   if (result.second == true) {
-    RnnlmComputeState *rnnlm2 = new RnnlmComputeState(*rnnlm);  // make a copy
-    rnnlm2->TakeFeatures(rnn_word);
+    RnnlmComputeState *rnnlm2 = rnnlm->GetSuccessorState(rnn_word);
     state_to_wseq_.push_back(wseq);
-    state_to_nnet3_output_.push_back(rnnlm2->GetOutput());
     state_to_rnnlm_state_.push_back(rnnlm2);
   }
 
@@ -202,7 +199,6 @@ bool KaldiRnnlmDeterministicFst::GetArc(StateId s, Label ilabel,
   oarc->olabel = ilabel;
   oarc->nextstate = result.first->second;
   oarc->weight = Weight(-logprob);
-
   return true;
 }
 
diff --git a/src/rnnlm/rnnlm-lattice-rescoring.h b/src/rnnlm/rnnlm-lattice-rescoring.h
index 9dd46d1bfbe..55e3fab748d 100644
--- a/src/rnnlm/rnnlm-lattice-rescoring.h
+++ b/src/rnnlm/rnnlm-lattice-rescoring.h
@@ -19,15 +19,15 @@
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef KALDI_LM_KALDI_RNNLM_RESCORING_H_
-#define KALDI_LM_KALDI_RNNLM_RESCORING_H_
+#ifndef KALDI_RNNLM_RNNLM_LATTICE_RESCORING_H_
+#define KALDI_RNNLM_RNNLM_LATTICE_RESCORING_H_
 
 #include <string>
 #include <vector>
 
 #include "base/kaldi-common.h"
 #include "fstext/deterministic-fst.h"
-#include "rnnlm/rnnlm-simple-looped.h"
+#include "rnnlm/rnnlm-compute-state.h"
 #include "util/common-utils.h"
 
 namespace kaldi {
@@ -89,10 +89,6 @@ class KaldiRnnlmDeterministicFst
   // the pointers are owned here
   std::vector<RnnlmComputeState*> state_to_rnnlm_state_;
 
-  // mapping from state-id to output word-embedding
-  // the pointers are owned here
-  std::vector<CuVector<BaseFloat>*> state_to_nnet3_output_;
-
   void ReadFstWordSymbolTableAndRnnWordlist(const std::string &rnn_in_wordlist,
       const std::string &word_symbol_table_rxfilename);
 
@@ -101,4 +97,4 @@ class KaldiRnnlmDeterministicFst
 }  // namespace nnet3
 }  // namespace kaldi
 
-#endif  // KALDI_LM_KALDI_RNNLM_RESCORING_H_
+#endif  // KALDI_RNNLM_RNNLM_LATTICE_RESCORING_H_

From 2b0833542fca3b620bf220d75bb959017f86723c Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Wed, 27 Sep 2017 21:48:18 -0400
Subject: [PATCH 11/23] very small changes

---
 egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh | 5 -----
 src/rnnlm/rnnlm-compute-state.cc        | 2 +-
 src/rnnlm/rnnlm-compute-state.h         | 8 ++++----
 3 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh b/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh
index b8522fd0d98..f38fb2628c8 100755
--- a/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh
+++ b/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh
@@ -61,11 +61,6 @@ if [ "$rnnlm_ver" == "tensorflow" ]; then
   first_arg="$rnnlm_dir/unk.probs $rnnlm_dir/wordlist.rnn.final"
 fi
 
-if [ "$rnnlm_ver" == "kaldirnnlm" ]; then
-  rescoring_binary="lattice-lmrescore-kaldi-rnnlm"
-  first_arg="\"rnnlm-get-word-embedding $rnnlm_dir/word_feats.txt $rnnlm_dir/feat_embedding.final.mat -|\" $rnnlm_dir/config/words.txt "
-fi
-
 oldlm=$oldlang/G.fst
 if [ -f $oldlang/G.carpa ]; then
   oldlm=$oldlang/G.carpa
diff --git a/src/rnnlm/rnnlm-compute-state.cc b/src/rnnlm/rnnlm-compute-state.cc
index e6161438c64..1f95a0fbe37 100644
--- a/src/rnnlm/rnnlm-compute-state.cc
+++ b/src/rnnlm/rnnlm-compute-state.cc
@@ -1,4 +1,4 @@
-// rnnlm/kaldi-rnnlm-simple-looped.cc
+// src/rnnlm/rnnlm-compute-state.cc
 
 // Copyright      2017  Johns Hopkins University (author: Daniel Povey)
 //                2017  Yiming Wang
diff --git a/src/rnnlm/rnnlm-compute-state.h b/src/rnnlm/rnnlm-compute-state.h
index 62fdd323652..a7fa065e090 100644
--- a/src/rnnlm/rnnlm-compute-state.h
+++ b/src/rnnlm/rnnlm-compute-state.h
@@ -1,4 +1,4 @@
-// rnnlm/kaldi-rnnlm-simple-looped.h
+// src/rnnlm/rnnlm-compute-state.h
 
 // Copyright 2017 Johns Hopkins University (author: Daniel Povey)
 //           2017 Yiming Wang
@@ -19,8 +19,8 @@
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef KALDI_RNNLM_COMPUTE_STATEH_
-#define KALDI_RNNLM_COMPUTE_STATEH_
+#ifndef KALDI_RNNLM_COMPUTE_STATE_H_
+#define KALDI_RNNLM_COMPUTE_STATE_H_
 
 #include <vector>
 #include "base/kaldi-common.h"
@@ -133,4 +133,4 @@ class RnnlmComputeState {
 } // namespace nnet3
 } // namespace kaldi
 
-#endif  // KALDI_RNNLM_COMPUTE_STATE_H
+#endif  // KALDI_RNNLM_COMPUTE_STATE_H_

From 7cf4af892fc9f5831f13f2ff04f3569ed398a253 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Wed, 27 Sep 2017 22:36:34 -0400
Subject: [PATCH 12/23] fix a typo

---
 src/rnnlm/rnnlm-compute-state.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rnnlm/rnnlm-compute-state.h b/src/rnnlm/rnnlm-compute-state.h
index a7fa065e090..d3983f6e7ab 100644
--- a/src/rnnlm/rnnlm-compute-state.h
+++ b/src/rnnlm/rnnlm-compute-state.h
@@ -63,7 +63,7 @@ struct RnnlmComputeStateComputationOptions {
                    "the end-of-sentence symbol, usually </s>");
     opts->Register("oos-symbol", &oos_symbol, "symbol in wordlist representing "
                    "the out-of-vocabulary symbol, usually <oos>");
-    opts->Register("eos-symbol", &brk_symbol, "symbol in wordlist representing "
+    opts->Register("brk-symbol", &brk_symbol, "symbol in wordlist representing "
                    "the break symbol, usually <brk>");
 
     // register the optimization options with the prefix "optimization".

From 8f3524200d249716b6f0f8eb1e7db72b2fde604b Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Sun, 1 Oct 2017 23:02:48 -0400
Subject: [PATCH 13/23] make RNNLM share the same FST wordlist

---
 egs/swbd/s5/local/rnnlm/run_lstm_d.sh       |   9 +-
 scripts/rnnlm/get_word_features.py          |  21 +++-
 scripts/rnnlm/lmrescore_rnnlm_lat.sh        |  15 +--
 scripts/rnnlm/prepare_rnnlm_dir.sh          |   5 +-
 scripts/rnnlm/train_rnnlm.sh                |   6 +-
 src/latbin/lattice-lmrescore-kaldi-rnnlm.cc |  35 ++++---
 src/rnnlm/rnnlm-compute-state.cc            |   4 +-
 src/rnnlm/rnnlm-compute-state.h             |  32 +++---
 src/rnnlm/rnnlm-lattice-rescoring.cc        | 104 ++------------------
 src/rnnlm/rnnlm-lattice-rescoring.h         |  22 +----
 10 files changed, 90 insertions(+), 163 deletions(-)

diff --git a/egs/swbd/s5/local/rnnlm/run_lstm_d.sh b/egs/swbd/s5/local/rnnlm/run_lstm_d.sh
index c5b86ca4741..857bad509f3 100755
--- a/egs/swbd/s5/local/rnnlm/run_lstm_d.sh
+++ b/egs/swbd/s5/local/rnnlm/run_lstm_d.sh
@@ -37,11 +37,10 @@ if [ $stage -le 0 ]; then
 fi
 
 if [ $stage -le 1 ]; then
-  # the training scripts require that <s>, </s> and <brk> be present in a particular
-  # order.
-  awk '{print $1}' $lexicon | sort | uniq | \
-    awk 'BEGIN{print "<eps> 0";print "<s> 1"; print "</s> 2"; print "<brk> 3";n=4;} {print $1, n++}' \
-        >$dir/config/words.txt
+  cp data/lang/words.txt $dir/config/
+  n=`cat $dir/config/words.txt | wc -l`
+  echo "<brk> $n" >> $dir/config/words.txt
+
   # words that are not present in words.txt but are in the training or dev data, will be
   # mapped to <SPOKEN_NOISE> during training.
   echo "<unk>" >$dir/config/oov.txt
diff --git a/scripts/rnnlm/get_word_features.py b/scripts/rnnlm/get_word_features.py
index 6be3d29a853..f0c15a2c8d9 100755
--- a/scripts/rnnlm/get_word_features.py
+++ b/scripts/rnnlm/get_word_features.py
@@ -20,6 +20,16 @@
                     help="Specify the file containing unigram probs.")
 parser.add_argument("vocab_file", help="Path for vocab file")
 parser.add_argument("features_file", help="Path for features file")
+parser.add_argument("--treat-as-bos", type=str, default='',
+                    help="""Comma-separated list of written representations of
+                    words that are to be treated the same as the BOS symbol
+                    <s> for purposes of getting the word features (i.e. they will
+                    have the same features as <s>.  Because <s> will always
+                    learn to be predicted with a close-to-zero probability, this is
+                    a suitable thing to do for words that are in words.txt but
+                    are never expected to be predicted.  (Note: it's not necessary
+                    to do this for symbol zero, <eps>, because we exclude it from
+                    the normalization sum).  Example: --treat-as-bos='#0'""")
 
 args = parser.parse_args()
 
@@ -132,6 +142,12 @@ def read_features(features_file):
     unigram_probs = None
 feats = read_features(args.features_file)
 
+def treat_as_bos(word):
+  words = args.treat_as_bos.split(',')
+  for w in words:
+    if w == word:
+      return True
+  return False
 
 def get_feature_list(word, idx):
     """Return a dict from feat_id to value (as int or float), e.g.
@@ -197,7 +213,10 @@ def get_feature_list(word, idx):
     return ans
 
 for word, idx in sorted(vocab.items(), key=lambda x: x[1]):
-    feature_list = get_feature_list(word, idx)
+    if treat_as_bos(word):
+      feature_list = get_feature_list("<s>", idx)
+    else:
+      feature_list = get_feature_list(word, idx)
     print("{0}\t{1}".format(idx,
                             " ".join(["%s %.3g" % (f, v) for f, v in sorted(feature_list.items())])))
 
diff --git a/scripts/rnnlm/lmrescore_rnnlm_lat.sh b/scripts/rnnlm/lmrescore_rnnlm_lat.sh
index a719a9acaac..150f584b4e3 100755
--- a/scripts/rnnlm/lmrescore_rnnlm_lat.sh
+++ b/scripts/rnnlm/lmrescore_rnnlm_lat.sh
@@ -39,8 +39,6 @@ data=$3
 indir=$4
 outdir=$5
 
-rescoring_binary=lattice-lmrescore-kaldi-rnnlm
-
 oldlm=$oldlang/G.fst
 if [ -f $oldlang/G.carpa ]; then
   oldlm=$oldlang/G.carpa
@@ -65,6 +63,9 @@ oldlm_command="fstproject --project_output=true $oldlm |"
 
 acwt=`perl -e "print (1.0/$inv_acwt);"`
 
+bos_symbol=`grep "<s>" $rnnlm_dir/config/words.txt | awk '{print $2}'`
+eos_symbol=`grep "</s>" $rnnlm_dir/config/words.txt | awk '{print $2}'`
+
 word_embedding=
 if [ -f $rnnlm_dir/word_embedding.final.mat ]; then
   word_embedding=$rnnlm_dir/word_embedding.final.mat
@@ -81,17 +82,19 @@ if [ "$oldlm" == "$oldlang/G.fst" ]; then
   $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
     lattice-lmrescore --lm-scale=$oldlm_weight \
     "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm_command" ark:-  \| \
-    $rescoring_binary --lm-scale=$weight \
+    lattice-lmrescore-kaldi-rnnlm --lm-scale=$weight \
+    --bos-symbol=$bos_symbol --eos-symbol=$eos_symbol \
     --max-ngram-order=$max_ngram_order \
-    $oldlang/words.txt ark:- $rnnlm_dir/config/words.txt $word_embedding "$rnnlm_dir/final.raw" \
+    $word_embedding "$rnnlm_dir/final.raw" ark:- \
     "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1;
 else
   $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
     lattice-lmrescore-const-arpa --lm-scale=$oldlm_weight \
     "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm" ark:-  \| \
-    $rescoring_binary --lm-scale=$weight \
+    lattice-lmrescore-kaldi-rnnlm --lm-scale=$weight \
+    --bos-symbol=$bos_symbol --eos-symbol=$eos_symbol \
     --max-ngram-order=$max_ngram_order \
-    $oldlang/words.txt ark:- $rnnlm_dir/config/words.txt $word_embedding "$rnnlm_dir/final.raw" \
+    $word_embedding "$rnnlm_dir/final.raw" ark:- \
     "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1;
 fi
 
diff --git a/scripts/rnnlm/prepare_rnnlm_dir.sh b/scripts/rnnlm/prepare_rnnlm_dir.sh
index afe5862cc57..06e277db3b5 100755
--- a/scripts/rnnlm/prepare_rnnlm_dir.sh
+++ b/scripts/rnnlm/prepare_rnnlm_dir.sh
@@ -86,7 +86,7 @@ if [ $stage -le 3 ]; then
     else
       unigram_opt=
     fi
-    rnnlm/get_word_features.py $unigram_opt \
+    rnnlm/get_word_features.py $unigram_opt --treat-as-bos='#0' \
       $dir/config/words.txt $dir/config/features.txt >$dir/word_feats.txt
   else
     [ -f $dir/word_feats.txt ] && rm $dir/word_feats.txt
@@ -157,6 +157,8 @@ if [ $stage -le 7 ]; then
     num_splits=$(cat $dir/text/info/num_splits)
     text_files=$(for n in $(seq $num_splits); do echo -n $dir/text/$n.txt ''; done)
     vocab_size=$(tail -n 1 $dir/config/words.txt | awk '{print $NF + 1}')
+    bos_symbol=`grep "<s>" $dir/config/words.txt | awk '{print $2}'`
+    eos_symbol=`grep "</s>" $dir/config/words.txt | awk '{print $2}'`
 
     # this prints some nontrivial log information, so run using '$cmd' to ensure
     # the output gets saved.
@@ -164,6 +166,7 @@ if [ $stage -le 7 ]; then
     # the size of the sampling LM.
     $cmd $dir/log/prepare_sampling_lm.log \
          rnnlm-get-sampling-lm --unigram-factor=$unigram_factor \
+              --bos-symbol=$bos_symbol --eos-symbol=$eos_symbol \
               --vocab-size=$vocab_size  "cat $text_files|" $dir/sampling.lm
     echo "$0: done estimating LM for sampling."
   else
diff --git a/scripts/rnnlm/train_rnnlm.sh b/scripts/rnnlm/train_rnnlm.sh
index 286f9e9ae6b..e4e7398a4f4 100755
--- a/scripts/rnnlm/train_rnnlm.sh
+++ b/scripts/rnnlm/train_rnnlm.sh
@@ -60,6 +60,9 @@ for f in $dir/config/{words,data_weights,oov}.txt \
   [ ! -f $f ] && echo "$0: expected $f to exist" && exit 1
 done
 
+bos_symbol=`grep "<s>" $dir/config/words.txt | awk '{print $2}'`
+eos_symbol=`grep "</s>" $dir/config/words.txt | awk '{print $2}'`
+
 # set some variables and check more files.
 num_splits=$(cat $dir/text/info/num_splits)
 num_repeats=$(cat $dir/text/info/num_repeats)
@@ -140,7 +143,8 @@ while [ $x -lt $num_iters ]; do
     else gpu_opt=''; queue_gpu_opt=''; fi
     [ -f $dir/.error ] && rm $dir/.error
     $cmd $queue_gpu_opt $dir/log/compute_prob.$x.log \
-       rnnlm-get-egs $(cat $dir/special_symbol_opts.txt) --vocab-size=$vocab_size $dir/text/dev.txt ark:- \| \
+       rnnlm-get-egs $(cat $dir/special_symbol_opts.txt) \
+                     --vocab-size=$vocab_size $dir/text/dev.txt ark:- \| \
        rnnlm-compute-prob $gpu_opt $dir/$x.raw "$word_embedding" ark:- || touch $dir/.error &
 
     if [ $x -gt 0 ]; then
diff --git a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
index 8a02e8078fc..416cd249026 100644
--- a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
+++ b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
@@ -41,12 +41,12 @@ int main(int argc, char *argv[]) {
         "algorithm. Determinization will be applied on the composed lattice.\n"
         "\n"
         "Usage: lattice-lmrescore-kaldi-rnnlm [options] \\\n"
-        "             <word-symbol-table-rxfilename> <lattice-rspecifier> \\\n"
-        "             <rnnlm-wordlist> <embedding-file>  \\\n"
-        "             <raw-rnnlm-rxfilename> <lattice-wspecifier>\n"
-        " e.g.: lattice-lmrescore-kaldi-rnnlm --lm-scale=-1.0 fst_words.txt \\\n"
-        "              ark:in.lats rnn_words.txt word_embedding.mat \\\n"
-        "              final.raw ark:out.lats\n";
+        "             <embedding-file> <raw-rnnlm-rxfilename> \\\n"
+        "             <lattice-rspecifier> <lattice-wspecifier>\n"
+        " e.g.: lattice-lmrescore-kaldi-rnnlm --lm-scale=-1.0 \\\n"
+        "              word_embedding.mat \\\n"
+        "              --bos-symbol=1 --eos-symbol=2 \\\n"
+        "              final.raw ark:in.lats ark:out.lats\n";
 
     ParseOptions po(usage);
     nnet3::RnnlmComputeStateComputationOptions opts;
@@ -63,20 +63,22 @@ int main(int argc, char *argv[]) {
 
     po.Read(argc, argv);
 
-    if (po.NumArgs() != 6) {
+    if (po.NumArgs() != 4) {
       po.PrintUsage();
       exit(1);
     }
 
-    std::string lats_rspecifier, rnn_wordlist, word_embedding_rxfilename,
+    if (opts.bos_index == -1 || opts.eos_index == -1) {
+      KALDI_ERR << "must set --bos-symbol and --eos-symbol options";
+    }
+
+    std::string lats_rspecifier, word_embedding_rxfilename,
         word_symbols_rxfilename, rnnlm_rxfilename, lats_wspecifier;
 
-    word_symbols_rxfilename = po.GetArg(1);
-    lats_rspecifier = po.GetArg(2);
-    rnn_wordlist = po.GetArg(3);
-    word_embedding_rxfilename = po.GetArg(4);
-    rnnlm_rxfilename = po.GetArg(5);
-    lats_wspecifier = po.GetArg(6);
+    word_embedding_rxfilename = po.GetArg(1);
+    rnnlm_rxfilename = po.GetArg(2);
+    lats_rspecifier = po.GetArg(3);
+    lats_wspecifier = po.GetArg(4);
 
     kaldi::nnet3::Nnet rnnlm;
     ReadKaldiObject(rnnlm_rxfilename, &rnnlm);
@@ -97,10 +99,7 @@ int main(int argc, char *argv[]) {
 
     int32 n_done = 0, n_fail = 0;
 
-    nnet3::KaldiRnnlmDeterministicFst rnnlm_fst(max_ngram_order,
-                                                rnn_wordlist,
-                                                word_symbols_rxfilename,
-                                                info);
+    nnet3::KaldiRnnlmDeterministicFst rnnlm_fst(max_ngram_order, info);
 
     for (; !compact_lattice_reader.Done(); compact_lattice_reader.Next()) {
       std::string key = compact_lattice_reader.Key();
diff --git a/src/rnnlm/rnnlm-compute-state.cc b/src/rnnlm/rnnlm-compute-state.cc
index 1f95a0fbe37..79b1ffce0b5 100644
--- a/src/rnnlm/rnnlm-compute-state.cc
+++ b/src/rnnlm/rnnlm-compute-state.cc
@@ -92,7 +92,9 @@ void RnnlmComputeState::AddWord(int32 word_index) {
     log_probs.AddMatVec(1.0, word_embedding_mat, kTrans,
                         predicted_word_embedding_->Row(0), 0.0);
     log_probs.ApplyExp();
-    normalization_factor_ = log(log_probs.Sum());
+
+    // excluding the <eps> symbol which is always 0
+    normalization_factor_ = log(log_probs.Range(1, log_probs.Dim() - 1).Sum());
   }
 }
 
diff --git a/src/rnnlm/rnnlm-compute-state.h b/src/rnnlm/rnnlm-compute-state.h
index d3983f6e7ab..84e4b88b936 100644
--- a/src/rnnlm/rnnlm-compute-state.h
+++ b/src/rnnlm/rnnlm-compute-state.h
@@ -37,19 +37,18 @@ namespace nnet3 {
 struct RnnlmComputeStateComputationOptions {
   bool debug_computation;
   bool normalize_probs;
-  string bos_symbol;
-  string eos_symbol;
-  string oos_symbol;
-  string brk_symbol;
+  // we need this when we initialize the RnnlmComputeState and pass the BOS history
+  int32 bos_index;
+  // we need this to compute the Final() cost of a state
+  int32 eos_index;
   NnetOptimizeOptions optimize_config;
   NnetComputeOptions compute_config;
   RnnlmComputeStateComputationOptions():
       debug_computation(false),
       normalize_probs(false),
-      bos_symbol("<s>"),
-      eos_symbol("</s>"),
-      oos_symbol("<oos>"),
-      brk_symbol("<brk>") { }
+      bos_index(-1),
+      eos_index(-1)
+      { }
 
   void Register(OptionsItf *opts) {
     opts->Register("debug-computation", &debug_computation, "If true, turn on "
@@ -57,14 +56,10 @@ struct RnnlmComputeStateComputationOptions {
     opts->Register("normalize-probs", &normalize_probs, "If true, word "
        "probabilities will be correctly normalized (otherwise the sum-to-one "
        "normalization is approximate)");
-    opts->Register("bos-symbol", &bos_symbol, "symbol in wordlist representing "
-                   "the begin-of-sentence symbol, usually <s>");
-    opts->Register("eos-symbol", &bos_symbol, "symbol in wordlist representing "
-                   "the end-of-sentence symbol, usually </s>");
-    opts->Register("oos-symbol", &oos_symbol, "symbol in wordlist representing "
-                   "the out-of-vocabulary symbol, usually <oos>");
-    opts->Register("brk-symbol", &brk_symbol, "symbol in wordlist representing "
-                   "the break symbol, usually <brk>");
+    opts->Register("bos-symbol", &bos_index, "index in wordlist representing "
+                   "the begin-of-sentence symbol");
+    opts->Register("eos-symbol", &eos_index, "index in wordlist representing "
+                   "the end-of-sentence symbol");
 
     // register the optimization options with the prefix "optimization".
     ParseOptions optimization_opts("optimization", opts);
@@ -76,6 +71,11 @@ struct RnnlmComputeStateComputationOptions {
   }
 };
 
+/*
+  this class const references to the word-embedding, nnet3 part of rnnlm and
+the RnnlmComputeStateComputationOptions. It handles the computation of the nnet3
+object
+*/
 class RnnlmComputeStateInfo  {
  public:
   RnnlmComputeStateInfo(
diff --git a/src/rnnlm/rnnlm-lattice-rescoring.cc b/src/rnnlm/rnnlm-lattice-rescoring.cc
index 00993a3688d..f7949dc85da 100644
--- a/src/rnnlm/rnnlm-lattice-rescoring.cc
+++ b/src/rnnlm/rnnlm-lattice-rescoring.cc
@@ -53,91 +53,11 @@ void KaldiRnnlmDeterministicFst::Clear() {
   wseq_to_state_[state_to_wseq_[0]] = 0;
 }
 
-void KaldiRnnlmDeterministicFst::ReadFstWordSymbolTableAndRnnWordlist(
-    const std::string &rnn_wordlist,
-    const std::string &word_symbol_table_rxfilename) {
-  // Reads symbol table.
-  fst::SymbolTable *fst_word_symbols = NULL;
-  if (!(fst_word_symbols =
-      fst::SymbolTable::ReadText(word_symbol_table_rxfilename))) {
-    KALDI_ERR << "Could not read symbol table from file "
-              << word_symbol_table_rxfilename;
-  }
-
-  full_voc_size_ = fst_word_symbols->NumSymbols();
-  fst_label_to_word_.resize(full_voc_size_);
-
-  for (int32 i = 0; i < fst_label_to_word_.size(); ++i) {
-    fst_label_to_word_[i] = fst_word_symbols->Find(i);
-    if (fst_label_to_word_[i] == "") {
-      KALDI_ERR << "Could not find word for integer " << i << "in the word "
-                << "symbol table, mismatched symbol table or you have discoutinuous "
-                << "integers in your symbol table?";
-    }
-  }
-
-  fst_label_to_rnn_label_.resize(fst_word_symbols->NumSymbols(), -1);
-
-  oos_index_ = -2;  // use -2 since fst::SymbolTable::kNoSymbol is -1
-  {
-    std::ifstream ifile(rnn_wordlist.c_str());
-    int32 id;
-    string word;
-    int32 i = 0;
-    while (ifile >> word >> id) {
-      if (word == eos_symbol_) {
-        eos_index_ = id;
-      } else if (word == bos_symbol_) {
-        bos_index_ = id;
-      } else if (word == oos_symbol_) {
-        oos_index_ = id;
-      } else if (word == brk_symbol_) {
-        brk_index_ = id;
-      }
-      KALDI_ASSERT(i == id);
-      i++;
-      rnn_label_to_word_.push_back(word);
-
-      int fst_label = fst_word_symbols->Find(word);
-      if (fst::SymbolTable::kNoSymbol == fst_label && word != oos_symbol_
-               && word != bos_symbol_ && word != brk_symbol_) {
-        KALDI_LOG << "warning: word " << word
-                  << " in RNNLM wordlist but not in FST wordlist";
-      }
-      if (word != oos_symbol_ && fst_label != fst::SymbolTable::kNoSymbol) {
-        fst_label_to_rnn_label_[fst_label] = id;
-      }
-    }
-  }
-
-  if (oos_index_ > -1) {
-    for (int32 i = 0; i < fst_label_to_rnn_label_.size(); i++) {
-      if (fst_label_to_rnn_label_[i] == -1) {
-        fst_label_to_rnn_label_[i] = oos_index_;
-      }
-    }
-  } else {
-    for (int32 i = 0; i < fst_label_to_rnn_label_.size(); i++) {
-      if (fst_label_to_rnn_label_[i] == -1) {
-        KALDI_LOG << "warning: word " << fst_word_symbols->Find(i)
-                  << " in FST wordlist but not in RNNLM wordlist";
-      }
-    }
-  }
-  delete fst_word_symbols;
-}
-
 KaldiRnnlmDeterministicFst::KaldiRnnlmDeterministicFst(int32 max_ngram_order,
-    const std::string &rnn_wordlist,
-    const std::string &word_symbol_table_rxfilename,
     const RnnlmComputeStateInfo &info) {
   max_ngram_order_ = max_ngram_order;
-  bos_symbol_ = info.opts.bos_symbol;
-  eos_symbol_ = info.opts.eos_symbol;
-  oos_symbol_ = info.opts.oos_symbol;
-  brk_symbol_ = info.opts.brk_symbol;
-  ReadFstWordSymbolTableAndRnnWordlist(rnn_wordlist,
-                                       word_symbol_table_rxfilename);
+  bos_index_ = info.opts.bos_index;
+  eos_index_ = info.opts.eos_index;
 
   std::vector<Label> bos_seq;
   bos_seq.push_back(bos_index_);
@@ -162,25 +82,21 @@ bool KaldiRnnlmDeterministicFst::GetArc(StateId s, Label ilabel,
   /// At this point, we have created the state.
   KALDI_ASSERT(static_cast<size_t>(s) < state_to_wseq_.size());
 
-  std::vector<Label> wseq = state_to_wseq_[s];
+  std::vector<Label> word_seq = state_to_wseq_[s];
   const RnnlmComputeState* rnnlm = state_to_rnnlm_state_[s];
-  int32 rnn_word = fst_label_to_rnn_label_[ilabel];
-
-  BaseFloat logprob = rnnlm->LogProbOfWord(rnn_word);
 
-  if (rnn_word == oos_index_)
-    logprob = logprob - Log(full_voc_size_ - rnn_label_to_word_.size() + 1.0);
+  BaseFloat logprob = rnnlm->LogProbOfWord(ilabel);
 
-  wseq.push_back(rnn_word);
+  word_seq.push_back(ilabel);
   if (max_ngram_order_ > 0) {
-    while (wseq.size() >= max_ngram_order_) {
+    while (word_seq.size() >= max_ngram_order_) {
       /// History state has at most <max_ngram_order_> - 1 words in the state.
-      wseq.erase(wseq.begin(), wseq.begin() + 1);
+      word_seq.erase(word_seq.begin(), word_seq.begin() + 1);
     }
   }
 
   std::pair<const std::vector<Label>, StateId> wseq_state_pair(
-      wseq, static_cast<Label>(state_to_wseq_.size()));
+      word_seq, static_cast<Label>(state_to_wseq_.size()));
 
   // Attemps to insert the current <wseq_state_pair>. If the pair already exists
   // then it returns false.
@@ -189,8 +105,8 @@ bool KaldiRnnlmDeterministicFst::GetArc(StateId s, Label ilabel,
 
   // If the pair was just inserted, then also add it to state_to_* structures
   if (result.second == true) {
-    RnnlmComputeState *rnnlm2 = rnnlm->GetSuccessorState(rnn_word);
-    state_to_wseq_.push_back(wseq);
+    RnnlmComputeState *rnnlm2 = rnnlm->GetSuccessorState(ilabel);
+    state_to_wseq_.push_back(word_seq);
     state_to_rnnlm_state_.push_back(rnnlm2);
   }
 
diff --git a/src/rnnlm/rnnlm-lattice-rescoring.h b/src/rnnlm/rnnlm-lattice-rescoring.h
index 55e3fab748d..ddc0ef0e98b 100644
--- a/src/rnnlm/rnnlm-lattice-rescoring.h
+++ b/src/rnnlm/rnnlm-lattice-rescoring.h
@@ -42,8 +42,6 @@ class KaldiRnnlmDeterministicFst
 
   // Does not take ownership.
   KaldiRnnlmDeterministicFst(int32 max_ngram_order,
-      const std::string &rnn_wordlist,
-      const std::string &word_symbol_table_rxfilename,
       const RnnlmComputeStateInfo &info);
   ~KaldiRnnlmDeterministicFst();
 
@@ -60,25 +58,12 @@ class KaldiRnnlmDeterministicFst
   virtual bool GetArc(StateId s, Label ilabel, fst::StdArc* oarc);
 
  private:
-  std::vector<int32> fst_label_to_rnn_label_;
-  std::vector<std::string> rnn_label_to_word_;
-  std::vector<std::string> fst_label_to_word_;
-
-  int32 full_voc_size_;
-
-  string bos_symbol_;
-  string eos_symbol_;
-  string oos_symbol_;
-  string brk_symbol_;
-  int32 bos_index_;
-  int32 eos_index_;
-  int32 oos_index_;
-  int32 brk_index_;
-
   typedef unordered_map
       <std::vector<Label>, StateId, VectorHasher<Label> > MapType;
   StateId start_state_;
   int32 max_ngram_order_;
+  int32 bos_index_;
+  int32 eos_index_;
 
   MapType wseq_to_state_;
 
@@ -89,9 +74,6 @@ class KaldiRnnlmDeterministicFst
   // the pointers are owned here
   std::vector<RnnlmComputeState*> state_to_rnnlm_state_;
 
-  void ReadFstWordSymbolTableAndRnnWordlist(const std::string &rnn_in_wordlist,
-      const std::string &word_symbol_table_rxfilename);
-
 };
 
 }  // namespace nnet3

From 705ecc8d7afa7d33116bf9f320d472aac7d065b2 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Mon, 2 Oct 2017 15:45:46 -0400
Subject: [PATCH 14/23] fix small issue when running lattice-rescoring with
 normalize-probs option

---
 scripts/rnnlm/lmrescore_rnnlm_lat.sh | 10 ++++++++--
 src/rnnlm/rnnlm-compute-state.cc     |  2 +-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/scripts/rnnlm/lmrescore_rnnlm_lat.sh b/scripts/rnnlm/lmrescore_rnnlm_lat.sh
index 150f584b4e3..9e6d0d7d8fc 100755
--- a/scripts/rnnlm/lmrescore_rnnlm_lat.sh
+++ b/scripts/rnnlm/lmrescore_rnnlm_lat.sh
@@ -13,6 +13,7 @@ max_ngram_order=4
 N=10
 inv_acwt=12
 weight=1.0  # Interpolation weight for RNNLM.
+normalize=false
 # End configuration section.
 
 echo "$0 $@"  # Print the command line for logging
@@ -73,6 +74,11 @@ else
   word_embedding="\"rnnlm-get-word-embedding $rnnlm_dir/word_feats.txt $rnnlm_dir/feat_embedding.final.mat -|\""
 fi
 
+normalize_opt=
+if $normalize; then
+  normalize_opt="--normalize-probs=true"
+fi
+
 mkdir -p $outdir/log
 nj=`cat $indir/num_jobs` || exit 1;
 cp $indir/num_jobs $outdir
@@ -84,7 +90,7 @@ if [ "$oldlm" == "$oldlang/G.fst" ]; then
     "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm_command" ark:-  \| \
     lattice-lmrescore-kaldi-rnnlm --lm-scale=$weight \
     --bos-symbol=$bos_symbol --eos-symbol=$eos_symbol \
-    --max-ngram-order=$max_ngram_order \
+    --max-ngram-order=$max_ngram_order $normalize_opt \
     $word_embedding "$rnnlm_dir/final.raw" ark:- \
     "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1;
 else
@@ -93,7 +99,7 @@ else
     "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm" ark:-  \| \
     lattice-lmrescore-kaldi-rnnlm --lm-scale=$weight \
     --bos-symbol=$bos_symbol --eos-symbol=$eos_symbol \
-    --max-ngram-order=$max_ngram_order \
+    --max-ngram-order=$max_ngram_order $normalize_opt \
     $word_embedding "$rnnlm_dir/final.raw" ark:- \
     "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1;
 fi
diff --git a/src/rnnlm/rnnlm-compute-state.cc b/src/rnnlm/rnnlm-compute-state.cc
index 79b1ffce0b5..169721f6e29 100644
--- a/src/rnnlm/rnnlm-compute-state.cc
+++ b/src/rnnlm/rnnlm-compute-state.cc
@@ -89,7 +89,7 @@ void RnnlmComputeState::AddWord(int32 word_index) {
   if (info_.opts.normalize_probs) {
     CuVector<BaseFloat> log_probs(info_.word_embedding_mat.NumRows());
 
-    log_probs.AddMatVec(1.0, word_embedding_mat, kTrans,
+    log_probs.AddMatVec(1.0, word_embedding_mat, kNoTrans,
                         predicted_word_embedding_->Row(0), 0.0);
     log_probs.ApplyExp();
 

From d19ecc181678b5987d22f78f676d2b8439afc225 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Fri, 6 Oct 2017 19:02:02 -0400
Subject: [PATCH 15/23] minor changes

---
 scripts/rnnlm/lmrescore_rnnlm_lat.sh        |  6 ++----
 scripts/rnnlm/prepare_rnnlm_dir.sh          |  7 +++----
 scripts/rnnlm/train_rnnlm.sh                | 21 ++++++++++-----------
 src/latbin/lattice-lmrescore-kaldi-rnnlm.cc | 16 ++++++++--------
 4 files changed, 23 insertions(+), 27 deletions(-)

diff --git a/scripts/rnnlm/lmrescore_rnnlm_lat.sh b/scripts/rnnlm/lmrescore_rnnlm_lat.sh
index 9e6d0d7d8fc..243d955e8b1 100755
--- a/scripts/rnnlm/lmrescore_rnnlm_lat.sh
+++ b/scripts/rnnlm/lmrescore_rnnlm_lat.sh
@@ -64,8 +64,7 @@ oldlm_command="fstproject --project_output=true $oldlm |"
 
 acwt=`perl -e "print (1.0/$inv_acwt);"`
 
-bos_symbol=`grep "<s>" $rnnlm_dir/config/words.txt | awk '{print $2}'`
-eos_symbol=`grep "</s>" $rnnlm_dir/config/words.txt | awk '{print $2}'`
+special_symbol_opts=`cat $dir/special_symbol_opts.txt | sed "s= =\n=g" | egrep "bos|eos" | tr "\n" " "`
 
 word_embedding=
 if [ -f $rnnlm_dir/word_embedding.final.mat ]; then
@@ -88,8 +87,7 @@ if [ "$oldlm" == "$oldlang/G.fst" ]; then
   $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
     lattice-lmrescore --lm-scale=$oldlm_weight \
     "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm_command" ark:-  \| \
-    lattice-lmrescore-kaldi-rnnlm --lm-scale=$weight \
-    --bos-symbol=$bos_symbol --eos-symbol=$eos_symbol \
+    lattice-lmrescore-kaldi-rnnlm --lm-scale=$weight $special_symbol_opts \
     --max-ngram-order=$max_ngram_order $normalize_opt \
     $word_embedding "$rnnlm_dir/final.raw" ark:- \
     "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1;
diff --git a/scripts/rnnlm/prepare_rnnlm_dir.sh b/scripts/rnnlm/prepare_rnnlm_dir.sh
index 06e277db3b5..e68a0f1c7dd 100755
--- a/scripts/rnnlm/prepare_rnnlm_dir.sh
+++ b/scripts/rnnlm/prepare_rnnlm_dir.sh
@@ -157,16 +157,15 @@ if [ $stage -le 7 ]; then
     num_splits=$(cat $dir/text/info/num_splits)
     text_files=$(for n in $(seq $num_splits); do echo -n $dir/text/$n.txt ''; done)
     vocab_size=$(tail -n 1 $dir/config/words.txt | awk '{print $NF + 1}')
-    bos_symbol=`grep "<s>" $dir/config/words.txt | awk '{print $2}'`
-    eos_symbol=`grep "</s>" $dir/config/words.txt | awk '{print $2}'`
+
+    special_symbol_opts=`cat $dir/special_symbol_opts.txt | sed "s= =\n=g" | egrep "bos|eos" | tr "\n" " "`
 
     # this prints some nontrivial log information, so run using '$cmd' to ensure
     # the output gets saved.
     # ***NOTE*** we will likely later have to pass in options to this program to control
     # the size of the sampling LM.
     $cmd $dir/log/prepare_sampling_lm.log \
-         rnnlm-get-sampling-lm --unigram-factor=$unigram_factor \
-              --bos-symbol=$bos_symbol --eos-symbol=$eos_symbol \
+         rnnlm-get-sampling-lm --unigram-factor=$unigram_factor $special_symbol_opts \
               --vocab-size=$vocab_size  "cat $text_files|" $dir/sampling.lm
     echo "$0: done estimating LM for sampling."
   else
diff --git a/scripts/rnnlm/train_rnnlm.sh b/scripts/rnnlm/train_rnnlm.sh
index e4e7398a4f4..c1e5c1ff373 100755
--- a/scripts/rnnlm/train_rnnlm.sh
+++ b/scripts/rnnlm/train_rnnlm.sh
@@ -60,8 +60,7 @@ for f in $dir/config/{words,data_weights,oov}.txt \
   [ ! -f $f ] && echo "$0: expected $f to exist" && exit 1
 done
 
-bos_symbol=`grep "<s>" $dir/config/words.txt | awk '{print $2}'`
-eos_symbol=`grep "</s>" $dir/config/words.txt | awk '{print $2}'`
+special_symbol_opts=`cat $dir/special_symbol_opts.txt | sed "s= =\n=g" | egrep "bos|eos" | tr "\n" " "`
 
 # set some variables and check more files.
 num_splits=$(cat $dir/text/info/num_splits)
@@ -72,7 +71,7 @@ embedding_type=
 
 if [ -f $dir/feat_embedding.0.mat ]; then
   sparse_features=true
-  embedding_type=feat_embedding
+  embedding_type=feat
   if [ -f $dir/word_embedding.0.mat ]; then
     echo "$0: error: $dir/feat_embedding.0.mat and $dir/word_embedding.0.mat both exist."
     exit 1;
@@ -80,7 +79,7 @@ if [ -f $dir/feat_embedding.0.mat ]; then
   ! [ -f $dir/word_feats.txt ] && echo "$0: expected $0/word_feats.txt to exist" && exit 1;
 else
   sparse_features=false
-  embedding_type=word_embedding
+  embedding_type=word
   ! [ -f $dir/word_embedding.0.mat ] && \
     echo "$0: expected $dir/word_embedding.0.mat to exist" && exit 1
 fi
@@ -172,9 +171,9 @@ while [ $x -lt $num_iters ]; do
         src_rnnlm="nnet3-copy --learning-rate=$this_learning_rate $dir/$x.raw -|"
         if $sparse_features; then
           sparse_opt="--read-sparse-word-features=$dir/word_feats.txt";
-          embedding_type=feat_embedding
+          embedding_type=feat
         else
-          sparse_opt=''; embedding_type=word_embedding
+          sparse_opt=''; embedding_type=word
         fi
         if $use_gpu; then gpu_opt="--use-gpu=yes"; queue_gpu_opt="--gpu 1";
         else gpu_opt="--use-gpu=no"; queue_gpu_opt=""; fi
@@ -192,8 +191,8 @@ while [ $x -lt $num_iters ]; do
              --embedding.learning-rate=$embedding_lrate \
              $sparse_opt $gpu_opt \
              --read-rnnlm="$src_rnnlm" --write-rnnlm=$dir/$dest_number.raw \
-             --read-embedding=$dir/$embedding_type.$x.mat \
-             --write-embedding=$dir/$embedding_type.$dest_number.mat \
+             --read-embedding=$dir/${embedding_type}_embedding.$x.mat \
+             --write-embedding=$dir/${embedding_type}_embedding.$dest_number.mat \
              "ark,bg:cat $repeated_data | rnnlm-get-egs --srand=$num_splits_processed $train_egs_args - ark:- |" || touch $dir/.train_error &
       done
       wait # wait for just the training jobs.
@@ -203,10 +202,10 @@ while [ $x -lt $num_iters ]; do
         # average the models and the embedding matrces.  Use run.pl as we don\'t
         # want this to wait on the queue (if there is a queue).
         src_models=$(for n in $(seq $this_num_jobs); do echo $dir/$[x+1].$n.raw; done)
-        src_matrices=$(for n in $(seq $this_num_jobs); do echo $dir/${embedding_type}.$[x+1].$n.mat; done)
+        src_matrices=$(for n in $(seq $this_num_jobs); do echo $dir/${embedding_type}_embedding.$[x+1].$n.mat; done)
         run.pl $dir/log/average.$[x+1].log \
           nnet3-average $src_models $dir/$[x+1].raw '&&' \
-          matrix-sum --average=true $src_matrices $dir/$embedding_type.$[x+1].mat
+          matrix-sum --average=true $src_matrices $dir/${embedding_type}_embedding.$[x+1].mat
       fi
     )
 
@@ -224,7 +223,7 @@ if [ $stage -le $num_iters ]; then
   # dev-set probability) as the final model.
   best_iter=$(rnnlm/get_best_model.py $dir)
   echo "$0: best iteration (out of $num_iters) was $best_iter, linking it to final iteration."
-  ln -sf $embedding_type.$best_iter.mat $dir/$embedding_type.final.mat
+  ln -sf ${embedding_type}_embedding.$best_iter.mat $dir/${embedding_type}_embedding.final.mat
   ln -sf $best_iter.raw $dir/final.raw
 fi
 
diff --git a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
index 416cd249026..8a8a53beaf8 100644
--- a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
+++ b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
@@ -35,10 +35,9 @@ int main(int argc, char *argv[]) {
     typedef kaldi::int64 int64;
 
     const char *usage =
-        "Rescores lattice with rnnlm. The LM will be wrapped into the\n"
-        "DeterministicOnDemandFst interface and the rescoring is done by\n"
-        "composing with the wrapped LM using a special type of composition\n"
-        "algorithm. Determinization will be applied on the composed lattice.\n"
+        "Rescores lattice with kaldi-rnnlm. This script is called from \n"
+        "scripts/rnnlm/lmrescore_rnnlm_lat.sh. An example for rescoring \n"
+        "lattices is at egs/swbd/s5/local/rnnlm/run_rescoring.sh \n"
         "\n"
         "Usage: lattice-lmrescore-kaldi-rnnlm [options] \\\n"
         "             <embedding-file> <raw-rnnlm-rxfilename> \\\n"
@@ -55,10 +54,11 @@ int main(int argc, char *argv[]) {
     BaseFloat lm_scale = 1.0;
 
     po.Register("lm-scale", &lm_scale, "Scaling factor for language model "
-                "costs; frequently 1.0 or -1.0");
-    po.Register("max-ngram-order", &max_ngram_order, "If positive, limit the "
-                "rnnlm context to the given number, -1 means we are not going "
-                "to limit it.");
+                "costs");
+    po.Register("max-ngram-order", &max_ngram_order,
+        "If positive, allow RNNLM histories longer than this to be identified "
+        "with each other for rescoring purposes (an approximation that "
+        "saves time and reduces output lattice size).");
     opts.Register(&po);
 
     po.Read(argc, argv);

From 232ef042bdd5601584a0fa4dfef5f45adba109f2 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Sat, 14 Oct 2017 16:54:57 -0400
Subject: [PATCH 16/23] fix small stylistic issues in code

---
 .../local/rnnlm/run_rnnlm.sh}                 |  4 +-
 egs/swbd/s5c/local/rnnlm/tuning/run_lstm.sh   |  1 +
 .../local/rnnlm/tuning}/run_lstm_d.sh         |  7 +-
 egs/swbd/s5c/local/rnnlm/tuning/run_lstm_e.sh | 83 ++++++++++++++++++
 egs/swbd/s5c/local/rnnlm/tuning/run_lstm_f.sh | 87 +++++++++++++++++++
 scripts/rnnlm/get_word_features.py            |  8 +-
 scripts/rnnlm/lmrescore_rnnlm_lat.sh          | 15 ++--
 scripts/rnnlm/prepare_rnnlm_dir.sh            |  2 +-
 scripts/rnnlm/train_rnnlm.sh                  |  2 -
 src/latbin/lattice-lmrescore-kaldi-rnnlm.cc   | 13 ++-
 src/rnnlm/rnnlm-compute-state.cc              | 14 +--
 src/rnnlm/rnnlm-compute-state.h               | 57 ++++++------
 src/rnnlm/rnnlm-lattice-rescoring.cc          | 20 ++---
 src/rnnlm/rnnlm-lattice-rescoring.h           | 10 +--
 src/rnnlm/sampling-lm-estimate.h              |  7 +-
 15 files changed, 252 insertions(+), 78 deletions(-)
 rename egs/swbd/{s5/local/rnnlm/run_rescoring.sh => s5c/local/rnnlm/run_rnnlm.sh} (89%)
 create mode 120000 egs/swbd/s5c/local/rnnlm/tuning/run_lstm.sh
 rename egs/swbd/{s5/local/rnnlm => s5c/local/rnnlm/tuning}/run_lstm_d.sh (89%)
 create mode 100755 egs/swbd/s5c/local/rnnlm/tuning/run_lstm_e.sh
 create mode 100755 egs/swbd/s5c/local/rnnlm/tuning/run_lstm_f.sh

diff --git a/egs/swbd/s5/local/rnnlm/run_rescoring.sh b/egs/swbd/s5c/local/rnnlm/run_rnnlm.sh
similarity index 89%
rename from egs/swbd/s5/local/rnnlm/run_rescoring.sh
rename to egs/swbd/s5c/local/rnnlm/run_rnnlm.sh
index 62d7e982d46..e6d2421243f 100755
--- a/egs/swbd/s5/local/rnnlm/run_rescoring.sh
+++ b/egs/swbd/s5c/local/rnnlm/run_rnnlm.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 ngram_order=4
-rnndir=exp/rnnlm_lstm_d
+rnndir=exp/rnnlm_lstm_e
 
 . ./utils/parse_options.sh
 . ./cmd.sh
@@ -11,6 +11,8 @@ set -e
 
 LM=fsh_sw1_tg
 
+./local/rnnlm/tuning/run_lstm_e.sh
+
 for decode_set in eval2000; do
   dir=exp/chain/tdnn_lstm_1e_sp
   decode_dir=${dir}/decode_${decode_set}_$LM
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm.sh
new file mode 120000
index 00000000000..41842a8c82b
--- /dev/null
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm.sh
@@ -0,0 +1 @@
+run_lstm_e.sh
\ No newline at end of file
diff --git a/egs/swbd/s5/local/rnnlm/run_lstm_d.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_d.sh
similarity index 89%
rename from egs/swbd/s5/local/rnnlm/run_lstm_d.sh
rename to egs/swbd/s5c/local/rnnlm/tuning/run_lstm_d.sh
index 857bad509f3..fb0d8f61d96 100755
--- a/egs/swbd/s5/local/rnnlm/run_lstm_d.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_d.sh
@@ -8,7 +8,6 @@
 # This script takes no command-line arguments but takes the --cmd option.
 
 # Begin configuration section.
-cmd=run.pl
 dir=exp/rnnlm_lstm_d
 embedding_dim=800
 lstm_rpd=200
@@ -73,11 +72,7 @@ EOF
 fi
 
 if [ $stage -le 2 ]; then
-  # the --unigram-factor option is set larger than the default (100)
-  # in order to reduce the size of the sampling LM, because rnnlm-get-egs
-  # was taking up too much CPU (as much as 10 cores).
-  rnnlm/prepare_rnnlm_dir.sh --unigram-factor 200.0 \
-                             $text_dir $dir/config $dir
+  rnnlm/prepare_rnnlm_dir.sh $text_dir $dir/config $dir
 fi
 
 if [ $stage -le 3 ]; then
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_e.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_e.sh
new file mode 100755
index 00000000000..4457e18a188
--- /dev/null
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_e.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
+#           2015  Guoguo Chen
+#           2017  Hainan Xu
+
+# This script trains LMs on the swbd LM-training data.
+# This script takes no command-line arguments but takes the --cmd option.
+
+# Begin configuration section.
+dir=exp/rnnlm_lstm_e
+embedding_dim=1024
+lstm_rpd=256
+lstm_nrpd=256
+stage=-10
+train_stage=-10
+
+. utils/parse_options.sh
+
+text=data/train/text
+lexicon=data/local/dict_nosp/lexiconp.txt
+text_dir=data/rnnlm/text_nosp
+mkdir -p $dir/config
+set -e
+
+for f in $text $lexicon; do
+  [ ! -f $f ] && \
+    echo "$0: expected file $f to exist; search for local/wsj_extend_dict.sh in run.sh" && exit 1
+done
+
+if [ $stage -le 0 ]; then
+  mkdir -p $text_dir
+  echo -n >$text_dir/dev.txt
+  # hold out one in every 500 lines as dev data.
+  cat $text | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%500 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/swbd.txt
+fi
+
+if [ $stage -le 1 ]; then
+  cp data/lang/words.txt $dir/config/
+  n=`cat $dir/config/words.txt | wc -l`
+  echo "<brk> $n" >> $dir/config/words.txt
+
+  # words that are not present in words.txt but are in the training or dev data, will be
+  # mapped to <SPOKEN_NOISE> during training.
+  echo "<unk>" >$dir/config/oov.txt
+
+  cat > $dir/config/data_weights.txt <<EOF
+swbd   1   1.0
+EOF
+
+  rnnlm/get_unigram_probs.py --vocab-file=$dir/config/words.txt \
+                             --unk-word="<unk>" \
+                             --data-weights-file=$dir/config/data_weights.txt \
+                             $text_dir | awk 'NF==2' >$dir/config/unigram_probs.txt
+
+  # choose features
+  rnnlm/choose_features.py --unigram-probs=$dir/config/unigram_probs.txt \
+                           --use-constant-feature=true \
+                           --special-words='<s>,</s>,<brk>,<unk>,[noise],[laughter]' \
+                           $dir/config/words.txt > $dir/config/features.txt
+
+  cat >$dir/config/xconfig <<EOF
+input dim=$embedding_dim name=input
+relu-renorm-layer name=tdnn1 dim=$embedding_dim input=Append(0, IfDefined(-1))
+fast-lstmp-layer name=lstm1 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd
+relu-renorm-layer name=tdnn2 dim=$embedding_dim input=Append(0, IfDefined(-3))
+fast-lstmp-layer name=lstm2 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd
+relu-renorm-layer name=tdnn3 dim=$embedding_dim input=Append(0, IfDefined(-3))
+output-layer name=output include-log-softmax=false dim=$embedding_dim
+EOF
+  rnnlm/validate_config_dir.sh $text_dir $dir/config
+fi
+
+if [ $stage -le 2 ]; then
+  rnnlm/prepare_rnnlm_dir.sh $text_dir $dir/config $dir
+fi
+
+if [ $stage -le 3 ]; then
+  rnnlm/train_rnnlm.sh --num-jobs-initial 1 --num-jobs-final 3 \
+                  --stage $train_stage --num-epochs 10 --cmd "queue.pl" $dir
+fi
+
+exit 0
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_f.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_f.sh
new file mode 100755
index 00000000000..2770fd35ae5
--- /dev/null
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_f.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
+#           2015  Guoguo Chen
+#           2017  Hainan Xu
+
+# This script trains LMs on the swbd LM-training data.
+# This script takes no command-line arguments but takes the --cmd option.
+
+# Begin configuration section.
+
+dir=exp/rnnlm_lstm_f
+embedding_dim=2048
+lstm_rpd=512
+lstm_nrpd=512
+stage=-10
+train_stage=-10
+
+. utils/parse_options.sh
+
+text=data/train/text
+lexicon=data/local/dict_nosp/lexiconp.txt
+text_dir=data/rnnlm/text_nosp
+mkdir -p $dir/config
+set -e
+
+for f in $text $lexicon; do
+  [ ! -f $f ] && \
+    echo "$0: expected file $f to exist; search for local/wsj_extend_dict.sh in run.sh" && exit 1
+done
+
+if [ $stage -le 0 ]; then
+  mkdir -p $text_dir
+  echo -n >$text_dir/dev.txt
+  # hold out one in every 500 lines as dev data.
+  cat $text | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%500 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/swbd.txt
+fi
+
+if [ $stage -le 1 ]; then
+  cp data/lang/words.txt $dir/config/
+  n=`cat $dir/config/words.txt | wc -l`
+  echo "<brk> $n" >> $dir/config/words.txt
+
+  # words that are not present in words.txt but are in the training or dev data, will be
+  # mapped to <SPOKEN_NOISE> during training.
+  echo "<unk>" >$dir/config/oov.txt
+
+  cat > $dir/config/data_weights.txt <<EOF
+swbd   1   1.0
+EOF
+
+  rnnlm/get_unigram_probs.py --vocab-file=$dir/config/words.txt \
+                             --unk-word="<unk>" \
+                             --data-weights-file=$dir/config/data_weights.txt \
+                             $text_dir | awk 'NF==2' >$dir/config/unigram_probs.txt
+
+  # choose features
+  rnnlm/choose_features.py --unigram-probs=$dir/config/unigram_probs.txt \
+                           --use-constant-feature=true \
+                           --special-words='<s>,</s>,<brk>,<unk>,[noise],[laughter]' \
+                           $dir/config/words.txt > $dir/config/features.txt
+
+  cat >$dir/config/xconfig <<EOF
+input dim=$embedding_dim name=input
+relu-renorm-layer name=tdnn1 dim=$embedding_dim input=Append(0, IfDefined(-1))
+fast-lstmp-layer name=lstm1 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd
+relu-renorm-layer name=tdnn2 dim=$embedding_dim input=Append(0, IfDefined(-3))
+fast-lstmp-layer name=lstm2 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd
+relu-renorm-layer name=tdnn3 dim=$embedding_dim input=Append(0, IfDefined(-3))
+output-layer name=output include-log-softmax=false dim=$embedding_dim
+EOF
+  rnnlm/validate_config_dir.sh $text_dir $dir/config
+fi
+
+if [ $stage -le 2 ]; then
+  # the --unigram-factor option is set larger than the default (100)
+  # in order to reduce the size of the sampling LM, because rnnlm-get-egs
+  # was taking up too much CPU (as much as 10 cores).
+  rnnlm/prepare_rnnlm_dir.sh $text_dir $dir/config $dir
+fi
+
+if [ $stage -le 3 ]; then
+  rnnlm/train_rnnlm.sh --num-jobs-initial 1 --num-jobs-final 3 \
+                  --stage $train_stage --num-epochs 10 --cmd "queue.pl" $dir
+fi
+
+exit 0
diff --git a/scripts/rnnlm/get_word_features.py b/scripts/rnnlm/get_word_features.py
index f0c15a2c8d9..744c486cfd9 100755
--- a/scripts/rnnlm/get_word_features.py
+++ b/scripts/rnnlm/get_word_features.py
@@ -142,12 +142,10 @@ def read_features(features_file):
     unigram_probs = None
 feats = read_features(args.features_file)
 
+treat_as_bos_word_set = args.treat_as_bos.split(',')
+
 def treat_as_bos(word):
-  words = args.treat_as_bos.split(',')
-  for w in words:
-    if w == word:
-      return True
-  return False
+  return word in treat_as_bos_word_set
 
 def get_feature_list(word, idx):
     """Return a dict from feat_id to value (as int or float), e.g.
diff --git a/scripts/rnnlm/lmrescore_rnnlm_lat.sh b/scripts/rnnlm/lmrescore_rnnlm_lat.sh
index 243d955e8b1..d9dfc43b527 100755
--- a/scripts/rnnlm/lmrescore_rnnlm_lat.sh
+++ b/scripts/rnnlm/lmrescore_rnnlm_lat.sh
@@ -11,7 +11,7 @@ cmd=run.pl
 skip_scoring=false
 max_ngram_order=4
 N=10
-inv_acwt=12
+inv_acwt=10
 weight=1.0  # Interpolation weight for RNNLM.
 normalize=false
 # End configuration section.
@@ -62,15 +62,15 @@ awk -v n=$0 -v w=$weight 'BEGIN {if (w < 0 || w > 1) {
 
 oldlm_command="fstproject --project_output=true $oldlm |"
 
-acwt=`perl -e "print (1.0/$inv_acwt);"`
+acwt=$(perl -e "print (1.0/$inv_acwt);")
 
-special_symbol_opts=`cat $dir/special_symbol_opts.txt | sed "s= =\n=g" | egrep "bos|eos" | tr "\n" " "`
+special_symbol_opts=$(cat $dir/special_symbol_opts.txt)
 
 word_embedding=
 if [ -f $rnnlm_dir/word_embedding.final.mat ]; then
   word_embedding=$rnnlm_dir/word_embedding.final.mat
 else
-  word_embedding="\"rnnlm-get-word-embedding $rnnlm_dir/word_feats.txt $rnnlm_dir/feat_embedding.final.mat -|\""
+  word_embedding="'rnnlm-get-word-embedding $rnnlm_dir/word_feats.txt $rnnlm_dir/feat_embedding.final.mat -|'"
 fi
 
 normalize_opt=
@@ -79,10 +79,10 @@ if $normalize; then
 fi
 
 mkdir -p $outdir/log
-nj=`cat $indir/num_jobs` || exit 1;
+nj=$(cat $indir/num_jobs) || exit 1;
 cp $indir/num_jobs $outdir
 
-oldlm_weight=`perl -e "print -1.0 * $weight;"`
+oldlm_weight=$(perl -e "print -1.0 * $weight;")
 if [ "$oldlm" == "$oldlang/G.fst" ]; then
   $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
     lattice-lmrescore --lm-scale=$oldlm_weight \
@@ -95,8 +95,7 @@ else
   $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
     lattice-lmrescore-const-arpa --lm-scale=$oldlm_weight \
     "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm" ark:-  \| \
-    lattice-lmrescore-kaldi-rnnlm --lm-scale=$weight \
-    --bos-symbol=$bos_symbol --eos-symbol=$eos_symbol \
+    lattice-lmrescore-kaldi-rnnlm --lm-scale=$weight $special_symbol_opts \
     --max-ngram-order=$max_ngram_order $normalize_opt \
     $word_embedding "$rnnlm_dir/final.raw" ark:- \
     "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1;
diff --git a/scripts/rnnlm/prepare_rnnlm_dir.sh b/scripts/rnnlm/prepare_rnnlm_dir.sh
index e68a0f1c7dd..1de91bb7232 100755
--- a/scripts/rnnlm/prepare_rnnlm_dir.sh
+++ b/scripts/rnnlm/prepare_rnnlm_dir.sh
@@ -158,7 +158,7 @@ if [ $stage -le 7 ]; then
     text_files=$(for n in $(seq $num_splits); do echo -n $dir/text/$n.txt ''; done)
     vocab_size=$(tail -n 1 $dir/config/words.txt | awk '{print $NF + 1}')
 
-    special_symbol_opts=`cat $dir/special_symbol_opts.txt | sed "s= =\n=g" | egrep "bos|eos" | tr "\n" " "`
+    special_symbol_opts=$(cat $dir/special_symbol_opts.txt)
 
     # this prints some nontrivial log information, so run using '$cmd' to ensure
     # the output gets saved.
diff --git a/scripts/rnnlm/train_rnnlm.sh b/scripts/rnnlm/train_rnnlm.sh
index c1e5c1ff373..091ccefa616 100755
--- a/scripts/rnnlm/train_rnnlm.sh
+++ b/scripts/rnnlm/train_rnnlm.sh
@@ -60,8 +60,6 @@ for f in $dir/config/{words,data_weights,oov}.txt \
   [ ! -f $f ] && echo "$0: expected $f to exist" && exit 1
 done
 
-special_symbol_opts=`cat $dir/special_symbol_opts.txt | sed "s= =\n=g" | egrep "bos|eos" | tr "\n" " "`
-
 # set some variables and check more files.
 num_splits=$(cat $dir/text/info/num_splits)
 num_repeats=$(cat $dir/text/info/num_repeats)
diff --git a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
index 8a8a53beaf8..b644a05a6ee 100644
--- a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
+++ b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
@@ -48,7 +48,7 @@ int main(int argc, char *argv[]) {
         "              final.raw ark:in.lats ark:out.lats\n";
 
     ParseOptions po(usage);
-    nnet3::RnnlmComputeStateComputationOptions opts;
+    rnnlm::RnnlmComputeStateComputationOptions opts;
 
     int32 max_ngram_order = 3;
     BaseFloat lm_scale = 1.0;
@@ -69,7 +69,7 @@ int main(int argc, char *argv[]) {
     }
 
     if (opts.bos_index == -1 || opts.eos_index == -1) {
-      KALDI_ERR << "must set --bos-symbol and --eos-symbol options";
+      KALDI_ERR << "You must set --bos-symbol and --eos-symbol options";
     }
 
     std::string lats_rspecifier, word_embedding_rxfilename,
@@ -83,15 +83,12 @@ int main(int argc, char *argv[]) {
     kaldi::nnet3::Nnet rnnlm;
     ReadKaldiObject(rnnlm_rxfilename, &rnnlm);
 
-    if (!IsSimpleNnet(rnnlm))
-      KALDI_ERR << "Input RNNLM in " << rnnlm_rxfilename
-                << " is not the type of neural net we were looking for; "
-          "failed IsSimpleNnet().";
+    KALDI_ASSERT(IsSimpleNnet(rnnlm));
 
     CuMatrix<BaseFloat> word_embedding_mat;
     ReadKaldiObject(word_embedding_rxfilename, &word_embedding_mat);
 
-    const nnet3::RnnlmComputeStateInfo info(opts, rnnlm, word_embedding_mat);
+    const rnnlm::RnnlmComputeStateInfo info(opts, rnnlm, word_embedding_mat);
 
     // Reads and writes as compact lattice.
     SequentialCompactLatticeReader compact_lattice_reader(lats_rspecifier);
@@ -99,7 +96,7 @@ int main(int argc, char *argv[]) {
 
     int32 n_done = 0, n_fail = 0;
 
-    nnet3::KaldiRnnlmDeterministicFst rnnlm_fst(max_ngram_order, info);
+    rnnlm::KaldiRnnlmDeterministicFst rnnlm_fst(max_ngram_order, info);
 
     for (; !compact_lattice_reader.Done(); compact_lattice_reader.Next()) {
       std::string key = compact_lattice_reader.Key();
diff --git a/src/rnnlm/rnnlm-compute-state.cc b/src/rnnlm/rnnlm-compute-state.cc
index 169721f6e29..63a26867e9b 100644
--- a/src/rnnlm/rnnlm-compute-state.cc
+++ b/src/rnnlm/rnnlm-compute-state.cc
@@ -24,7 +24,7 @@
 #include "nnet3/nnet-compile-looped.h"
 
 namespace kaldi {
-namespace nnet3 {
+namespace rnnlm {
 
 RnnlmComputeStateInfo::RnnlmComputeStateInfo(
     const RnnlmComputeStateComputationOptions &opts,
@@ -40,7 +40,7 @@ RnnlmComputeStateInfo::RnnlmComputeStateInfo(
   int32 embedding_dim = rnnlm.OutputDim("output");
   KALDI_ASSERT(embedding_dim == word_embedding_mat.NumCols());
 
-  ComputationRequest request1, request2, request3;
+  nnet3::ComputationRequest request1, request2, request3;
   CreateLoopedComputationRequestSimple(rnnlm,
                                        1, // num_frames
                                        frame_subsampling_factor,
@@ -93,7 +93,7 @@ void RnnlmComputeState::AddWord(int32 word_index) {
                         predicted_word_embedding_->Row(0), 0.0);
     log_probs.ApplyExp();
 
-    // excluding the <eps> symbol which is always 0
+    // We excluding the <eps> symbol which is always 0.
     normalization_factor_ = log(log_probs.Range(1, log_probs.Dim() - 1).Sum());
   }
 }
@@ -103,6 +103,9 @@ BaseFloat RnnlmComputeState::LogProbOfWord(int32 word_index) const {
 
   BaseFloat log_prob = VecVec(predicted_word_embedding_->Row(0),
                               word_embedding_mat.Row(word_index));
+
+  // Even without explicit normalization, the log-probs will be close to
+  // correctly normalized due to the way the model was trained.
   if (info_.opts.normalize_probs) {
     log_prob -= normalization_factor_;
   }
@@ -111,7 +114,8 @@ BaseFloat RnnlmComputeState::LogProbOfWord(int32 word_index) const {
 
 void RnnlmComputeState::AdvanceChunk() {
   CuMatrix<BaseFloat> input_embeddings(1, info_.word_embedding_mat.NumCols());
-  input_embeddings.RowRange(0, 1).AddMat(1.0, info_.word_embedding_mat.RowRange(previous_word_, 1), kNoTrans);
+  input_embeddings.Row(0).AddVec(1.0,
+                                 info_.word_embedding_mat.Row(previous_word_));
   computer_.AcceptInput("input", &input_embeddings);
   computer_.Run();
   {
@@ -123,5 +127,5 @@ void RnnlmComputeState::AdvanceChunk() {
   }
 }
 
-} // namespace nnet3
+} // namespace rnnlm
 } // namespace kaldi
diff --git a/src/rnnlm/rnnlm-compute-state.h b/src/rnnlm/rnnlm-compute-state.h
index 84e4b88b936..6df2d78f13b 100644
--- a/src/rnnlm/rnnlm-compute-state.h
+++ b/src/rnnlm/rnnlm-compute-state.h
@@ -24,30 +24,31 @@
 
 #include <vector>
 #include "base/kaldi-common.h"
-#include "gmm/am-diag-gmm.h"
-#include "hmm/transition-model.h"
 #include "nnet3/nnet-optimize.h"
 #include "nnet3/nnet-compute.h"
 #include "nnet3/am-nnet-simple.h"
 #include "rnnlm/rnnlm-core-compute.h"
 
 namespace kaldi {
-namespace nnet3 {
+namespace rnnlm {
 
 struct RnnlmComputeStateComputationOptions {
   bool debug_computation;
   bool normalize_probs;
-  // we need this when we initialize the RnnlmComputeState and pass the BOS history
+  // We need this when we initialize the RnnlmComputeState and pass the BOS history.
   int32 bos_index;
-  // we need this to compute the Final() cost of a state
+  // We need this to compute the Final() cost of a state.
   int32 eos_index;
-  NnetOptimizeOptions optimize_config;
-  NnetComputeOptions compute_config;
+  // This is not needed for computation; included only for ease of scripting.
+  int32 brk_index;
+  nnet3::NnetOptimizeOptions optimize_config;
+  nnet3::NnetComputeOptions compute_config;
   RnnlmComputeStateComputationOptions():
       debug_computation(false),
       normalize_probs(false),
       bos_index(-1),
-      eos_index(-1)
+      eos_index(-1),
+      brk_index(-1)
       { }
 
   void Register(OptionsItf *opts) {
@@ -56,23 +57,26 @@ struct RnnlmComputeStateComputationOptions {
     opts->Register("normalize-probs", &normalize_probs, "If true, word "
        "probabilities will be correctly normalized (otherwise the sum-to-one "
        "normalization is approximate)");
-    opts->Register("bos-symbol", &bos_index, "index in wordlist representing "
+    opts->Register("bos-symbol", &bos_index, "Index in wordlist representing "
                    "the begin-of-sentence symbol");
-    opts->Register("eos-symbol", &eos_index, "index in wordlist representing "
+    opts->Register("eos-symbol", &eos_index, "Index in wordlist representing "
                    "the end-of-sentence symbol");
+    opts->Register("brk-symbol", &brk_index, "Index in wordlist representing "
+                   "the break symbol. It is not needed in the computation "
+                   "and we are including it for ease of scripting");
 
-    // register the optimization options with the prefix "optimization".
+    // Register the optimization options with the prefix "optimization".
     ParseOptions optimization_opts("optimization", opts);
     optimize_config.Register(&optimization_opts);
 
-    // register the compute options with the prefix "computation".
+    // Register the compute options with the prefix "computation".
     ParseOptions compute_opts("computation", opts);
     compute_config.Register(&compute_opts);
   }
 };
 
 /*
-  this class const references to the word-embedding, nnet3 part of rnnlm and
+  This class const references to the word-embedding, nnet3 part of rnnlm and
 the RnnlmComputeStateComputationOptions. It handles the computation of the nnet3
 object
 */
@@ -88,23 +92,25 @@ class RnnlmComputeStateInfo  {
   const CuMatrix<BaseFloat> &word_embedding_mat;
 
   // The compiled, 'looped' computation.
-  NnetComputation computation;
+  nnet3::NnetComputation computation;
 };
 
 /*
   This class handles the neural net computation; it's mostly accessed
-  via other wrapper classes.
+  via other wrapper classes. 
+ 
+  Each time this class takes a new word and advance the NNET computation by
+  one step, and works out log-prob of words to be used in lattice rescoring. */
 
-  It accept just input word as features */
 class RnnlmComputeState {
  public:
-  /// we compile the computation and generate the state after the BOS history
+  /// We compile the computation and generate the state after the BOS history.
   RnnlmComputeState(const RnnlmComputeStateInfo &info, int32 bos_index);
-  /// copy constructor
+
   RnnlmComputeState(const RnnlmComputeState &other);
 
-  /// generate another state by passing the next-word
-  /// pointer owned by the caller
+  /// Generate another state by passing the next-word.
+  /// The pointer is owned by the caller.
   RnnlmComputeState* GetSuccessorState(int32 next_word) const;
 
   /// Return the log-prob that the model predicts for the provided word-index,
@@ -118,19 +124,20 @@ class RnnlmComputeState {
   void AdvanceChunk();
 
   const RnnlmComputeStateInfo &info_;
-  NnetComputer computer_;
+  nnet3::NnetComputer computer_;
   int32 previous_word_;
 
-  // this is the log of the sum of the exp'ed values in the output
+  // This is the log of the sum of the exp'ed values in the output.
+  // Only used if config_.normalize_probs is set to be true.
   BaseFloat normalization_factor_;
 
-  // this points to the matrix returned by GetOutput() on the Nnet object
-  // pointer not owned here
+  // This points to the matrix returned by GetOutput() on the Nnet object.
+  // This pointer is not owned by this class.
   const CuMatrixBase<BaseFloat> *predicted_word_embedding_;
 };
 
 
-} // namespace nnet3
+} // namespace rnnlm
 } // namespace kaldi
 
 #endif  // KALDI_RNNLM_COMPUTE_STATE_H_
diff --git a/src/rnnlm/rnnlm-lattice-rescoring.cc b/src/rnnlm/rnnlm-lattice-rescoring.cc
index f7949dc85da..1f145b63910 100644
--- a/src/rnnlm/rnnlm-lattice-rescoring.cc
+++ b/src/rnnlm/rnnlm-lattice-rescoring.cc
@@ -26,13 +26,12 @@
 #include "util/text-utils.h"
 
 namespace kaldi {
-namespace nnet3 {
+namespace rnnlm {
 
 KaldiRnnlmDeterministicFst::~KaldiRnnlmDeterministicFst() {
-  int size = state_to_rnnlm_state_.size();
-  for (int i = 0; i < size; i++) {
+  int32 size = state_to_rnnlm_state_.size();
+  for (int32 i = 0; i < size; i++)
     delete state_to_rnnlm_state_[i];
-  }
   
   state_to_rnnlm_state_.resize(0);
   state_to_wseq_.resize(0);
@@ -40,12 +39,11 @@ KaldiRnnlmDeterministicFst::~KaldiRnnlmDeterministicFst() {
 }
 
 void KaldiRnnlmDeterministicFst::Clear() {
-  // similar to the destructor but we retain the 0-th entries in each map
-  // which corresponds to the <bos> state
-  int size = state_to_rnnlm_state_.size();
-  for (int i = 1; i < size; i++) {
+  // This function is similar to the destructor but we retain the 0-th entries
+  // in each map which corresponds to the <bos> state.
+  int32 size = state_to_rnnlm_state_.size();
+  for (int32 i = 1; i < size; i++)
     delete state_to_rnnlm_state_[i];
-  }
   
   state_to_rnnlm_state_.resize(1);
   state_to_wseq_.resize(1);
@@ -103,7 +101,7 @@ bool KaldiRnnlmDeterministicFst::GetArc(StateId s, Label ilabel,
   typedef MapType::iterator IterType;
   std::pair<IterType, bool> result = wseq_to_state_.insert(wseq_state_pair);
 
-  // If the pair was just inserted, then also add it to state_to_* structures
+  // If the pair was just inserted, then also add it to state_to_* structures.
   if (result.second == true) {
     RnnlmComputeState *rnnlm2 = rnnlm->GetSuccessorState(ilabel);
     state_to_wseq_.push_back(word_seq);
@@ -118,5 +116,5 @@ bool KaldiRnnlmDeterministicFst::GetArc(StateId s, Label ilabel,
   return true;
 }
 
-}  // namespace nnet3
+}  // namespace rnnlm
 }  // namespace kaldi
diff --git a/src/rnnlm/rnnlm-lattice-rescoring.h b/src/rnnlm/rnnlm-lattice-rescoring.h
index ddc0ef0e98b..1389b028b13 100644
--- a/src/rnnlm/rnnlm-lattice-rescoring.h
+++ b/src/rnnlm/rnnlm-lattice-rescoring.h
@@ -31,7 +31,7 @@
 #include "util/common-utils.h"
 
 namespace kaldi {
-namespace nnet3 {
+namespace rnnlm {
 
 class KaldiRnnlmDeterministicFst
     : public fst::DeterministicOnDemandFst<fst::StdArc> {
@@ -67,16 +67,16 @@ class KaldiRnnlmDeterministicFst
 
   MapType wseq_to_state_;
 
-  // mapping from state-id to history sequence
+  // Mapping from state-id to history sequence>
   std::vector<std::vector<Label> > state_to_wseq_;
 
-  // mapping from state-id to RNNLM states
-  // the pointers are owned here
+  // Mapping from state-id to RNNLM states.
+  // The pointers are owned in this class
   std::vector<RnnlmComputeState*> state_to_rnnlm_state_;
 
 };
 
-}  // namespace nnet3
+}  // namespace rnnlm
 }  // namespace kaldi
 
 #endif  // KALDI_RNNLM_RNNLM_LATTICE_RESCORING_H_
diff --git a/src/rnnlm/sampling-lm-estimate.h b/src/rnnlm/sampling-lm-estimate.h
index bb6017e0085..564993f5d68 100644
--- a/src/rnnlm/sampling-lm-estimate.h
+++ b/src/rnnlm/sampling-lm-estimate.h
@@ -40,6 +40,7 @@ struct SamplingLmEstimatorOptions {
 
   int32 bos_symbol;
   int32 eos_symbol;
+  int32 brk_symbol;
 
   SamplingLmEstimatorOptions(): vocab_size(-1),
                                 ngram_order(3),
@@ -49,7 +50,8 @@ struct SamplingLmEstimatorOptions {
                                 bos_factor(5.0),
                                 unigram_power(0.8),
                                 bos_symbol(1),
-                                eos_symbol(2) { }
+                                eos_symbol(2),
+                                brk_symbol(-1) { }
 
   void Register(OptionsItf *po) {
     po->Register("vocab-size", &vocab_size, "If set, must be set to the "
@@ -80,6 +82,9 @@ struct SamplingLmEstimatorOptions {
                  "Integer id for the BOS word (<s>)");
     po->Register("eos-symbol", &eos_symbol,
                  "Integer id for the EOS word (</s>)");
+    po->Register("brk-symbol", &brk_symbol,
+                 "Integer id for the Break word (<brk>). Not needed but "
+                 "included for ease of scripting");
     po->Register("unigram-power", &unigram_power, "Important configuration "
                  "value.  After all other stages of estimating the model, "
                  "the unigram probabilities are taken to this power and then "

From bd9936b0e76a3b5bd6884d7c8e1f7fdaf6f81004 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Sat, 14 Oct 2017 17:08:05 -0400
Subject: [PATCH 17/23] fix wrong variable used in
 scripts/rnnlm/lmrescore_rnnlm_lat.sh

---
 scripts/rnnlm/lmrescore_rnnlm_lat.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/rnnlm/lmrescore_rnnlm_lat.sh b/scripts/rnnlm/lmrescore_rnnlm_lat.sh
index d9dfc43b527..262ef42edf8 100755
--- a/scripts/rnnlm/lmrescore_rnnlm_lat.sh
+++ b/scripts/rnnlm/lmrescore_rnnlm_lat.sh
@@ -64,7 +64,7 @@ oldlm_command="fstproject --project_output=true $oldlm |"
 
 acwt=$(perl -e "print (1.0/$inv_acwt);")
 
-special_symbol_opts=$(cat $dir/special_symbol_opts.txt)
+special_symbol_opts=$(cat $rnnlm_dir/special_symbol_opts.txt)
 
 word_embedding=
 if [ -f $rnnlm_dir/word_embedding.final.mat ]; then

From 9cc7ba1b65751ff592a9b074d75fe5707eb009ce Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Sun, 15 Oct 2017 00:17:21 -0400
Subject: [PATCH 18/23] add rnnlm softlink in swbd/s5c

---
 egs/swbd/s5c/rnnlm | 1 +
 1 file changed, 1 insertion(+)
 create mode 120000 egs/swbd/s5c/rnnlm

diff --git a/egs/swbd/s5c/rnnlm b/egs/swbd/s5c/rnnlm
new file mode 120000
index 00000000000..e136939ba72
--- /dev/null
+++ b/egs/swbd/s5c/rnnlm
@@ -0,0 +1 @@
+../../../scripts/rnnlm/
\ No newline at end of file

From 267177f4fd79f4a47a4f9add360db28a0db3527a Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Mon, 30 Oct 2017 15:47:50 -0400
Subject: [PATCH 19/23] small style changes

---
 egs/swbd/s5/local/score.sh                                    | 2 +-
 egs/swbd/s5c/local/rnnlm/run_rnnlm.sh                         | 4 ++--
 egs/swbd/s5c/local/rnnlm/tuning/run_lstm.sh                   | 2 +-
 .../s5c/local/rnnlm/tuning/{run_lstm_f.sh => run_lstm_1a.sh}  | 2 +-
 .../s5c/local/rnnlm/tuning/{run_lstm_d.sh => run_lstm_1b.sh}  | 2 +-
 .../s5c/local/rnnlm/tuning/{run_lstm_e.sh => run_lstm_1c.sh}  | 2 +-
 scripts/rnnlm/lmrescore_rnnlm_lat.sh                          | 3 ---
 7 files changed, 7 insertions(+), 10 deletions(-)
 rename egs/swbd/s5c/local/rnnlm/tuning/{run_lstm_f.sh => run_lstm_1a.sh} (99%)
 rename egs/swbd/s5c/local/rnnlm/tuning/{run_lstm_d.sh => run_lstm_1b.sh} (99%)
 rename egs/swbd/s5c/local/rnnlm/tuning/{run_lstm_e.sh => run_lstm_1c.sh} (99%)

diff --git a/egs/swbd/s5/local/score.sh b/egs/swbd/s5/local/score.sh
index 2006b68e653..da71d126a86 100755
--- a/egs/swbd/s5/local/score.sh
+++ b/egs/swbd/s5/local/score.sh
@@ -31,7 +31,7 @@ data=$1
 
 if [ -f $data/stm ]; then # use sclite scoring.
   echo "$data/stm exists: using local/score_sclite.sh"
-  eval local/score_sclite.sh "$orig_args"
+  eval local/score_sclite.sh $orig_args
 else
   echo "$data/stm does not exist: using local/score_basic.sh"
   eval local/score_basic.sh $orig_args
diff --git a/egs/swbd/s5c/local/rnnlm/run_rnnlm.sh b/egs/swbd/s5c/local/rnnlm/run_rnnlm.sh
index e6d2421243f..fc2d2c4f0ce 100755
--- a/egs/swbd/s5c/local/rnnlm/run_rnnlm.sh
+++ b/egs/swbd/s5c/local/rnnlm/run_rnnlm.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 ngram_order=4
-rnndir=exp/rnnlm_lstm_e
+rnndir=exp/rnnlm_lstm_1c
 
 . ./utils/parse_options.sh
 . ./cmd.sh
@@ -11,7 +11,7 @@ set -e
 
 LM=fsh_sw1_tg
 
-./local/rnnlm/tuning/run_lstm_e.sh
+./local/rnnlm/tuning/run_lstm_1c.sh
 
 for decode_set in eval2000; do
   dir=exp/chain/tdnn_lstm_1e_sp
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm.sh
index 41842a8c82b..1b3a1bb3dd1 120000
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm.sh
@@ -1 +1 @@
-run_lstm_e.sh
\ No newline at end of file
+run_lstm_1c.sh
\ No newline at end of file
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_f.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1a.sh
similarity index 99%
rename from egs/swbd/s5c/local/rnnlm/tuning/run_lstm_f.sh
rename to egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1a.sh
index 2770fd35ae5..a447ad931f4 100755
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_f.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1a.sh
@@ -9,7 +9,7 @@
 
 # Begin configuration section.
 
-dir=exp/rnnlm_lstm_f
+dir=exp/rnnlm_lstm_1a
 embedding_dim=2048
 lstm_rpd=512
 lstm_nrpd=512
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_d.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1b.sh
similarity index 99%
rename from egs/swbd/s5c/local/rnnlm/tuning/run_lstm_d.sh
rename to egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1b.sh
index fb0d8f61d96..c87588fdb1c 100755
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_d.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1b.sh
@@ -8,7 +8,7 @@
 # This script takes no command-line arguments but takes the --cmd option.
 
 # Begin configuration section.
-dir=exp/rnnlm_lstm_d
+dir=exp/rnnlm_lstm_1b
 embedding_dim=800
 lstm_rpd=200
 lstm_nrpd=200
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_e.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1c.sh
similarity index 99%
rename from egs/swbd/s5c/local/rnnlm/tuning/run_lstm_e.sh
rename to egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1c.sh
index 4457e18a188..348f3416b8b 100755
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_e.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1c.sh
@@ -8,7 +8,7 @@
 # This script takes no command-line arguments but takes the --cmd option.
 
 # Begin configuration section.
-dir=exp/rnnlm_lstm_e
+dir=exp/rnnlm_lstm_1c
 embedding_dim=1024
 lstm_rpd=256
 lstm_nrpd=256
diff --git a/scripts/rnnlm/lmrescore_rnnlm_lat.sh b/scripts/rnnlm/lmrescore_rnnlm_lat.sh
index 262ef42edf8..505884e0025 100755
--- a/scripts/rnnlm/lmrescore_rnnlm_lat.sh
+++ b/scripts/rnnlm/lmrescore_rnnlm_lat.sh
@@ -11,7 +11,6 @@ cmd=run.pl
 skip_scoring=false
 max_ngram_order=4
 N=10
-inv_acwt=10
 weight=1.0  # Interpolation weight for RNNLM.
 normalize=false
 # End configuration section.
@@ -62,8 +61,6 @@ awk -v n=$0 -v w=$weight 'BEGIN {if (w < 0 || w > 1) {
 
 oldlm_command="fstproject --project_output=true $oldlm |"
 
-acwt=$(perl -e "print (1.0/$inv_acwt);")
-
 special_symbol_opts=$(cat $rnnlm_dir/special_symbol_opts.txt)
 
 word_embedding=

From c9bf5e09432d7f16ce9c2e4caf62caa82af41fda Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Tue, 7 Nov 2017 18:15:43 -0500
Subject: [PATCH 20/23] move rescoring into rnnlm training scripts

---
 egs/swbd/s5c/local/rnnlm/run_rescoring2.sh    | 27 -------
 egs/swbd/s5c/local/rnnlm/run_rnnlm.sh         | 28 -------
 egs/swbd/s5c/local/rnnlm/tuning/run_lstm.sh   |  1 -
 egs/swbd/s5c/local/rnnlm/tuning/run_lstm_a.sh | 80 -------------------
 4 files changed, 136 deletions(-)
 delete mode 100755 egs/swbd/s5c/local/rnnlm/run_rescoring2.sh
 delete mode 100755 egs/swbd/s5c/local/rnnlm/run_rnnlm.sh
 delete mode 120000 egs/swbd/s5c/local/rnnlm/tuning/run_lstm.sh
 delete mode 100755 egs/swbd/s5c/local/rnnlm/tuning/run_lstm_a.sh

diff --git a/egs/swbd/s5c/local/rnnlm/run_rescoring2.sh b/egs/swbd/s5c/local/rnnlm/run_rescoring2.sh
deleted file mode 100755
index 8994b42b58b..00000000000
--- a/egs/swbd/s5c/local/rnnlm/run_rescoring2.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-
-ngram_order=3
-rnndir=exp/rnnlm_lstm_e
-
-. ./utils/parse_options.sh
-. ./cmd.sh
-. ./path.sh
-
-set -e
-
-LM=fsh_sw1_tg
-weight=0.8
-
-for decode_set in eval2000; do
-  dir=exp/chain/tdnn_lstm_1e_sp
-  decode_dir=${dir}/decode_${decode_set}_$LM
-
-  # Lattice rescoring
-  rnnlm/lmrescore_rnnlm_lat_pruned.sh \
-    --cmd "$decode_cmd -l hostname=b*" \
-    --weight $weight --max-ngram-order $ngram_order \
-    data/lang_$LM $rnndir \
-    data/${decode_set}_hires ${decode_dir} \
-    ${decode_dir}.kaldirnnlm.lat.${ngram_order}gram.pruned.e
-
-done
diff --git a/egs/swbd/s5c/local/rnnlm/run_rnnlm.sh b/egs/swbd/s5c/local/rnnlm/run_rnnlm.sh
deleted file mode 100755
index fc2d2c4f0ce..00000000000
--- a/egs/swbd/s5c/local/rnnlm/run_rnnlm.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-
-ngram_order=4
-rnndir=exp/rnnlm_lstm_1c
-
-. ./utils/parse_options.sh
-. ./cmd.sh
-. ./path.sh
-
-set -e
-
-LM=fsh_sw1_tg
-
-./local/rnnlm/tuning/run_lstm_1c.sh
-
-for decode_set in eval2000; do
-  dir=exp/chain/tdnn_lstm_1e_sp
-  decode_dir=${dir}/decode_${decode_set}_$LM
-
-  # Lattice rescoring
-  rnnlm/lmrescore_rnnlm_lat.sh \
-    --cmd "$decode_cmd --mem 16G -l hostname=b*" \
-    --weight 0.5 --max-ngram-order $ngram_order \
-    data/lang_$LM $rnndir \
-    data/${decode_set}_hires ${decode_dir} \
-    ${decode_dir}.kaldirnnlm.lat.${ngram_order}gram
-
-done
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm.sh
deleted file mode 120000
index 1b3a1bb3dd1..00000000000
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm.sh
+++ /dev/null
@@ -1 +0,0 @@
-run_lstm_1c.sh
\ No newline at end of file
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_a.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_a.sh
deleted file mode 100755
index 7919574d07d..00000000000
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_a.sh
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/bin/bash
-
-# Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
-#           2015  Guoguo Chen
-
-# Begin configuration section.
-cmd=run.pl
-dir=exp/rnnlm_lstm_a
-embedding_dim=800
-stage=-10
-train_stage=0
-
-. utils/parse_options.sh
-
-text=data/train/text
-lexicon=data/local/dict_nosp/lexiconp.txt
-text_dir=data/rnnlm/text_nosp
-mkdir -p $dir/config
-set -e
-
-for f in $text $lexicon; do
-  [ ! -f $f ] && \
-    echo "$0: expected file $f to exist; search for local/swbd1_data_prep.sh and utils/prepare_lang.sh in run.sh" && exit 1
-done
-
-if [ $stage -le 0 ]; then
-  mkdir -p $text_dir
-  echo -n >$text_dir/dev.txt
-  # hold out one in every 500 lines as dev data.
-  cat $text | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%500 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/swbd.txt
-fi
-
-if [ $stage -le 1 ]; then
-  # the training scripts require that <s>, </s> and <brk> be present in a particular
-  # order.
-  awk '{print $1}' $lexicon | sort | uniq | \
-    awk 'BEGIN{print "<eps> 0";print "<s> 1"; print "</s> 2"; print "<brk> 3";n=4;} {print $1, n++}' \
-        >$dir/config/words.txt
-  # words that are not present in words.txt but are in the training or dev data, will be
-  # mapped to <SPOKEN_NOISE> during training.
-  echo "<unk>" >$dir/config/oov.txt
-
-  cat > $dir/config/data_weights.txt <<EOF
-swbd  1   1.0
-EOF
-
-  rnnlm/get_unigram_probs.py --vocab-file=$dir/config/words.txt \
-                             --unk-word="<unk>" \
-                             --data-weights-file=$dir/config/data_weights.txt \
-                             $text_dir | awk 'NF==2' >$dir/config/unigram_probs.txt
-
-  # choose features
-  rnnlm/choose_features.py --unigram-probs=$dir/config/unigram_probs.txt \
-                           --use-constant-feature=true \
-                           --special-words='<s>,</s>,<brk>,<unk>,[noise],[laughter]' \
-                           $dir/config/words.txt > $dir/config/features.txt
-
-  cat >$dir/config/xconfig <<EOF
-input dim=$embedding_dim name=input
-lstm-layer name=lstm1 cell-dim=$embedding_dim 
-lstm-layer name=lstm2 cell-dim=$embedding_dim 
-output-layer name=output include-log-softmax=false dim=$embedding_dim
-EOF
-  rnnlm/validate_config_dir.sh $text_dir $dir/config
-fi
-
-if [ $stage -le 2 ]; then
-  # the --unigram-factor option is set larger than the default (100)
-  # in order to reduce the size of the sampling LM, because rnnlm-get-egs
-  # was taking up too much CPU (as much as 10 cores).
-  rnnlm/prepare_rnnlm_dir.sh --unigram-factor 200.0 \
-                             $text_dir $dir/config $dir
-fi
-
-if [ $stage -le 3 ]; then
-  rnnlm/train_rnnlm.sh --initial_effective_lrate 0.05 --stage $train_stage \
-                       --num-epochs 35 --cmd "queue.pl" $dir
-fi
-
-exit 0

From 091d4d53b19345d5175ad5e6fe91f4b0b0ff8da8 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Wed, 8 Nov 2017 15:56:08 -0500
Subject: [PATCH 21/23] move rescoring into rnnlm training scripts

---
 .../s5c/local/rnnlm/tuning/run_lstm_1a.sh     | 34 +++++++++++++--
 .../s5c/local/rnnlm/tuning/run_lstm_1b.sh     | 34 +++++++++++++--
 .../s5c/local/rnnlm/tuning/run_lstm_1c.sh     | 34 +++++++++++++--
 .../s5c/local/rnnlm/tuning/run_lstm_1d.sh     | 41 ++++++++++++++++---
 .../s5c/local/rnnlm/tuning/run_lstm_1e.sh     | 41 ++++++++++++++++---
 src/latbin/Makefile                           |  2 -
 src/latbin/lattice-lmrescore-kaldi-rnnlm.cc   |  3 +-
 src/latbin/lattice-lmrescore-rnnlm.cc         |  3 +-
 src/rnnlm/rnnlm-compute-state.cc              | 11 +++--
 src/tfrnnlmbin/lattice-lmrescore-tf-rnnlm.cc  |  3 +-
 10 files changed, 174 insertions(+), 32 deletions(-)

diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1a.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1a.sh
index a447ad931f4..8b4c105d95b 100755
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1a.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1a.sh
@@ -1,11 +1,13 @@
 #!/bin/bash
 
-# Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
+# Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 #           2015  Guoguo Chen
 #           2017  Hainan Xu
 
 # This script trains LMs on the swbd LM-training data.
-# This script takes no command-line arguments but takes the --cmd option.
+
+# Train objf: -299.20 -4.42 -4.24 -4.16 -4.10 -4.06 -4.03 -4.01 -3.98 -3.95 -3.94 -3.92 -3.90 -3.88 -3.86 -3.85 -3.84 -3.82 -3.81 -3.81 -3.79 -3.78 -3.77 -3.76 -3.74
+# Dev objf:   -10.65 -4.67 -4.37 -4.25 -4.19 -4.14 -4.10 -4.07 -4.03 -4.00 -3.99 -3.98 -3.96 -3.95 -3.93 -3.93 -3.91 -3.91 -3.90 -3.90 -3.88 -3.88 -3.87 -3.87 -3.86
 
 # Begin configuration section.
 
@@ -16,6 +18,16 @@ lstm_nrpd=512
 stage=-10
 train_stage=-10
 
+# variables for lattice rescoring
+run_rescore=false
+ac_model_dir=exp/chain/tdnn_lstm_1e_sp
+decode_dir_suffix=rnnlm
+ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-order
+              # if it's set, it merges histories in the lattice if they share
+              # the same ngram history and this prevents the lattice from 
+              # exploding exponentially
+
+. cmd.sh
 . utils/parse_options.sh
 
 text=data/train/text
@@ -81,7 +93,23 @@ fi
 
 if [ $stage -le 3 ]; then
   rnnlm/train_rnnlm.sh --num-jobs-initial 1 --num-jobs-final 3 \
-                  --stage $train_stage --num-epochs 10 --cmd "queue.pl" $dir
+                  --stage $train_stage --num-epochs 10 --cmd "$train_cmd" $dir
+fi
+
+if [ $stage -le 4 ] && $run_rescore; then
+  echo Perform lattice-rescoring on $ac_model_dir
+  LM=fsh_sw1_tg
+  for decode_set in eval2000; do
+    decode_dir=${ac_model_dir}/decode_${decode_set}_$LM
+
+    # Lattice rescoring
+    rnnlm/lmrescore_rnnlm_lat.sh \
+      --cmd "$decode_cmd --mem 4G" \
+      --weight 0.5 --max-ngram-order $ngram_order \
+      data/lang_$LM $dir \
+      data/${decode_set}_hires ${decode_dir} \
+      ${decode_dir}_${decode_dir_suffix}
+  done
 fi
 
 exit 0
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1b.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1b.sh
index c87588fdb1c..6d406a5f4ce 100755
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1b.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1b.sh
@@ -1,11 +1,13 @@
 #!/bin/bash
 
-# Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
+# Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 #           2015  Guoguo Chen
 #           2017  Hainan Xu
 
 # This script trains LMs on the swbd LM-training data.
-# This script takes no command-line arguments but takes the --cmd option.
+
+# Train objf: -342.40 -4.48 -4.30 -4.21 -4.16 -4.12 -4.08 -4.07 -4.04 -4.00 -3.99 -3.97 -3.95 -3.94 -3.92 -3.91 -3.90 -3.89 -3.88 -3.88 -3.87 -3.86 -3.85 -3.84 -3.83
+# Dev objf:   -10.65 -4.72 -4.43 -4.31 -4.24 -4.18 -4.15 -4.12 -4.08 -4.06 -4.04 -4.02 -4.01 -3.99 -3.98 -3.97 -3.96 -3.95 -3.95 -3.94 -3.93 -3.92 -3.92 -3.91 -3.91
 
 # Begin configuration section.
 dir=exp/rnnlm_lstm_1b
@@ -15,6 +17,16 @@ lstm_nrpd=200
 stage=-10
 train_stage=-10
 
+# variables for lattice rescoring
+run_rescore=false
+ac_model_dir=exp/chain/tdnn_lstm_1e_sp
+decode_dir_suffix=rnnlm
+ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-order
+              # if it's set, it merges histories in the lattice if they share
+              # the same ngram history and this prevents the lattice from 
+              # exploding exponentially
+
+. cmd.sh
 . utils/parse_options.sh
 
 text=data/train/text
@@ -77,7 +89,23 @@ fi
 
 if [ $stage -le 3 ]; then
   rnnlm/train_rnnlm.sh --num-jobs-initial 1 --num-jobs-final 3 \
-                  --stage $train_stage --num-epochs 10 --cmd "queue.pl" $dir
+                  --stage $train_stage --num-epochs 10 --cmd "$train_cmd" $dir
+fi
+
+if [ $stage -le 4 ] && $run_rescore; then
+  echo Perform lattice-rescoring on $ac_model_dir
+  LM=fsh_sw1_tg
+  for decode_set in eval2000; do
+    decode_dir=${ac_model_dir}/decode_${decode_set}_$LM
+
+    # Lattice rescoring
+    rnnlm/lmrescore_rnnlm_lat.sh \
+      --cmd "$decode_cmd --mem 4G" \
+      --weight 0.5 --max-ngram-order $ngram_order \
+      data/lang_$LM $dir \
+      data/${decode_set}_hires ${decode_dir} \
+      ${decode_dir}_${decode_dir_suffix}
+  done
 fi
 
 exit 0
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1c.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1c.sh
index 348f3416b8b..927c644aa55 100755
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1c.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1c.sh
@@ -1,11 +1,13 @@
 #!/bin/bash
 
-# Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
+# Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 #           2015  Guoguo Chen
 #           2017  Hainan Xu
 
 # This script trains LMs on the swbd LM-training data.
-# This script takes no command-line arguments but takes the --cmd option.
+
+# Train objf: -341.90 -4.45 -4.27 -4.19 -4.13 -4.09 -4.05 -4.04 -4.01 -3.98 -3.96 -3.95 -3.93 -3.91 -3.90 -3.89 -3.88 -3.86 -3.86 -3.85 -3.84 -3.83 -3.82 -3.81 -3.80
+# Dev objf:   -10.65 -4.68 -4.40 -4.28 -4.21 -4.16 -4.13 -4.10 -4.07 -4.04 -4.02 -4.00 -3.99 -3.97 -3.96 -3.95 -3.94 -3.94 -3.92 -3.92 -3.91 -3.90 -3.90 -3.89 -3.89
 
 # Begin configuration section.
 dir=exp/rnnlm_lstm_1c
@@ -15,6 +17,16 @@ lstm_nrpd=256
 stage=-10
 train_stage=-10
 
+# variables for lattice rescoring
+run_rescore=false
+ac_model_dir=exp/chain/tdnn_lstm_1e_sp
+decode_dir_suffix=rnnlm
+ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-order
+              # if it's set, it merges histories in the lattice if they share
+              # the same ngram history and this prevents the lattice from 
+              # exploding exponentially
+
+. cmd.sh
 . utils/parse_options.sh
 
 text=data/train/text
@@ -77,7 +89,23 @@ fi
 
 if [ $stage -le 3 ]; then
   rnnlm/train_rnnlm.sh --num-jobs-initial 1 --num-jobs-final 3 \
-                  --stage $train_stage --num-epochs 10 --cmd "queue.pl" $dir
+                  --stage $train_stage --num-epochs 10 --cmd "$train_cmd" $dir
+fi
+
+if [ $stage -le 4 ] && $run_rescore; then
+  echo Perform lattice-rescoring on $ac_model_dir
+  LM=fsh_sw1_tg
+  for decode_set in eval2000; do
+    decode_dir=${ac_model_dir}/decode_${decode_set}_$LM
+
+    # Lattice rescoring
+    rnnlm/lmrescore_rnnlm_lat.sh \
+      --cmd "$decode_cmd --mem 4G" \
+      --weight 0.5 --max-ngram-order $ngram_order \
+      data/lang_$LM $dir \
+      data/${decode_set}_hires ${decode_dir} \
+      ${decode_dir}_${decode_dir_suffix}
+  done
 fi
 
 exit 0
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1d.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1d.sh
index 8729dfecde7..6c9e0fad01b 100755
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1d.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1d.sh
@@ -1,13 +1,16 @@
 #!/bin/bash
 
-# Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
+# Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 #           2015  Guoguo Chen
 #           2017  Hainan Xu
 
 # This script trains LMs on the swbd LM-training data.
-# This script takes no command-line arguments but takes the --cmd option.
+
+# Train objf: -525.90 -4.55 -4.34 -4.24 -4.18 -4.13 -4.10 -4.08 -4.04 -4.01 -3.99 -3.97 -3.96 -3.94 -3.92 -3.91 -3.90 -3.89 -3.88 -3.87 -3.86 -3.85 -3.84 -3.83 -3.83
+# Dev objf:   -10.65 -4.89 -4.55 -4.42 -4.35 -4.30 -4.25 -4.23 -4.19 -4.16 -4.14 -4.12 -4.11 -4.09 -4.08 -4.07 -4.06 -4.05 -4.04 -4.04 -4.03 -4.02 -4.01 -4.01 -4.00
 
 # Begin configuration section.
+
 dir=exp/rnnlm_lstm_1d
 embedding_dim=1024
 lstm_rpd=256
@@ -15,11 +18,21 @@ lstm_nrpd=256
 stage=-10
 train_stage=-10
 
+# variables for lattice rescoring
+run_rescore=false
+ac_model_dir=exp/chain/tdnn_lstm_1e_sp
+decode_dir_suffix=rnnlm
+ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-order
+              # if it's set, it merges histories in the lattice if they share
+              # the same ngram history and this prevents the lattice from 
+              # exploding exponentially
+
+. cmd.sh
 . utils/parse_options.sh
 
 text=data/train/text
 lexicon=data/local/dict_nosp/lexiconp.txt
-text_dir=data/rnnlm/text_nosp
+text_dir=data/rnnlm/text_nosp_fisher
 mkdir -p $dir/config
 set -e
 
@@ -32,8 +45,8 @@ if [ $stage -le 0 ]; then
   mkdir -p $text_dir
   echo -n >$text_dir/dev.txt
   # hold out one in every 500 lines as dev data.
-  cat $text | grep ^sw | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%500 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/swbd.txt
-  cat $text | grep ^fe | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%500 == 0) { print >>text_dir"/dev.txt"; } else {print;}}' >$text_dir/fisher.txt
+  cat $text | grep ^sw | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%50 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/swbd.txt
+  cat $text | grep ^fe | cut -d ' ' -f2-  >$text_dir/fisher.txt
 fi
 
 if [ $stage -le 1 ]; then
@@ -79,7 +92,23 @@ fi
 
 if [ $stage -le 3 ]; then
   rnnlm/train_rnnlm.sh --num-jobs-initial 1 --num-jobs-final 3 \
-                  --stage $train_stage --num-epochs 10 --cmd "queue.pl" $dir
+                  --stage $train_stage --num-epochs 10 --cmd "$train_cmd" $dir
+fi
+
+if [ $stage -le 4 ] && $run_rescore; then
+  echo Perform lattice-rescoring on $ac_model_dir
+  LM=fsh_sw1_tg
+  for decode_set in eval2000; do
+    decode_dir=${ac_model_dir}/decode_${decode_set}_$LM
+
+    # Lattice rescoring
+    rnnlm/lmrescore_rnnlm_lat.sh \
+      --cmd "$decode_cmd --mem 4G" \
+      --weight 0.5 --max-ngram-order $ngram_order \
+      data/lang_$LM $dir \
+      data/${decode_set}_hires ${decode_dir} \
+      ${decode_dir}_${decode_dir_suffix}
+  done
 fi
 
 exit 0
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1e.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1e.sh
index 4bf827cbcfa..681789b0be2 100755
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1e.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1e.sh
@@ -1,13 +1,16 @@
 #!/bin/bash
 
-# Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
+# Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 #           2015  Guoguo Chen
 #           2017  Hainan Xu
 
 # This script trains LMs on the swbd LM-training data.
-# This script takes no command-line arguments but takes the --cmd option.
+
+# Train objf: -1579.00 -4.95 -4.55 -4.40 -4.31 -4.25 -4.21 -4.19 -4.14 -4.12 -4.09 -4.06 -4.04 -4.02 -4.00 -3.99 -3.97 -3.96 -3.94 -3.94 -3.92 -3.91 -3.90 -3.90 -3.88
+# Dev objf:   -10.65 -5.27 -4.71 -4.51 -4.41 -4.34 -4.29 -4.25 -4.23 -4.20 -4.17 -4.15 -4.13 -4.11 -4.10 -4.09 -4.08 -4.06 -4.06 -4.05 -4.05 -4.04 -4.03 -4.02 -4.02
 
 # Begin configuration section.
+
 dir=exp/rnnlm_lstm_1e
 embedding_dim=1024
 lstm_rpd=256
@@ -15,11 +18,21 @@ lstm_nrpd=256
 stage=-10
 train_stage=-10
 
+# variables for lattice rescoring
+run_rescore=false
+ac_model_dir=exp/chain/tdnn_lstm_1e_sp
+decode_dir_suffix=rnnlm
+ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-order
+              # if it's set, it merges histories in the lattice if they share
+              # the same ngram history and this prevents the lattice from 
+              # exploding exponentially
+
+. cmd.sh
 . utils/parse_options.sh
 
 text=data/train/text
 lexicon=data/local/dict_nosp/lexiconp.txt
-text_dir=data/rnnlm/text_nosp
+text_dir=data/rnnlm/text_nosp_fisher0.1
 mkdir -p $dir/config
 set -e
 
@@ -32,8 +45,8 @@ if [ $stage -le 0 ]; then
   mkdir -p $text_dir
   echo -n >$text_dir/dev.txt
   # hold out one in every 500 lines as dev data.
-  cat $text | grep ^sw | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%500 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/swbd.txt
-  cat $text | grep ^fe | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%500 == 0) { print >>text_dir"/dev.txt"; } else {print;}}' >$text_dir/fisher.txt
+  cat $text | grep ^sw | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%50 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/swbd.txt
+  cat $text | grep ^fe | cut -d ' ' -f2- >$text_dir/fisher.txt
 fi
 
 if [ $stage -le 1 ]; then
@@ -79,7 +92,23 @@ fi
 
 if [ $stage -le 3 ]; then
   rnnlm/train_rnnlm.sh --num-jobs-initial 1 --num-jobs-final 3 \
-                  --stage $train_stage --num-epochs 10 --cmd "queue.pl" $dir
+                  --stage $train_stage --num-epochs 10 --cmd "$train_cmd" $dir
+fi
+
+if [ $stage -le 4 ] && $run_rescore; then
+  echo Perform lattice-rescoring on $ac_model_dir
+  LM=fsh_sw1_tg
+  for decode_set in eval2000; do
+    decode_dir=${ac_model_dir}/decode_${decode_set}_$LM
+
+    # Lattice rescoring
+    rnnlm/lmrescore_rnnlm_lat.sh \
+      --cmd "$decode_cmd --mem 4G" \
+      --weight 0.5 --max-ngram-order $ngram_order \
+      data/lang_$LM $dir \
+      data/${decode_set}_hires ${decode_dir} \
+      ${decode_dir}_${decode_dir_suffix}
+  done
 fi
 
 exit 0
diff --git a/src/latbin/Makefile b/src/latbin/Makefile
index 4cc7fb7272c..7bab32bf25e 100644
--- a/src/latbin/Makefile
+++ b/src/latbin/Makefile
@@ -29,8 +29,6 @@ BINFILES = lattice-best-path lattice-prune lattice-equivalent lattice-to-nbest \
 
 OBJFILES =
 
-cuda-compiled.o: ../kaldi.mk
-
 
 TESTFILES =
 
diff --git a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
index b644a05a6ee..53b2d2eaef9 100644
--- a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
+++ b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
@@ -100,8 +100,7 @@ int main(int argc, char *argv[]) {
 
     for (; !compact_lattice_reader.Done(); compact_lattice_reader.Next()) {
       std::string key = compact_lattice_reader.Key();
-      CompactLattice clat = compact_lattice_reader.Value();
-      compact_lattice_reader.FreeCurrent();
+      CompactLattice &clat = compact_lattice_reader.Value();
 
       if (lm_scale != 0.0) {
         // Before composing with the LM FST, we scale the lattice weights
diff --git a/src/latbin/lattice-lmrescore-rnnlm.cc b/src/latbin/lattice-lmrescore-rnnlm.cc
index 92a9b014297..91d207b2630 100644
--- a/src/latbin/lattice-lmrescore-rnnlm.cc
+++ b/src/latbin/lattice-lmrescore-rnnlm.cc
@@ -91,8 +91,7 @@ int main(int argc, char *argv[]) {
     int32 n_done = 0, n_fail = 0;
     for (; !compact_lattice_reader.Done(); compact_lattice_reader.Next()) {
       std::string key = compact_lattice_reader.Key();
-      CompactLattice clat = compact_lattice_reader.Value();
-      compact_lattice_reader.FreeCurrent();
+      CompactLattice &clat = compact_lattice_reader.Value();
 
       if (lm_scale != 0.0) {
         // Before composing with the LM FST, we scale the lattice weights
diff --git a/src/rnnlm/rnnlm-compute-state.cc b/src/rnnlm/rnnlm-compute-state.cc
index 63a26867e9b..37862868e20 100644
--- a/src/rnnlm/rnnlm-compute-state.cc
+++ b/src/rnnlm/rnnlm-compute-state.cc
@@ -34,11 +34,16 @@ RnnlmComputeStateInfo::RnnlmComputeStateInfo(
   KALDI_ASSERT(IsSimpleNnet(rnnlm));
   int32 left_context, right_context;
   ComputeSimpleNnetContext(rnnlm, &left_context, &right_context);
-  KALDI_ASSERT(0 == left_context);
-  KALDI_ASSERT(0 == right_context);
+  if (0 != left_context || 0 != right_context) {
+    KALDI_ERR << "Non-zero left or right context. Please check your script";
+  }
   int32 frame_subsampling_factor = 1;
   int32 embedding_dim = rnnlm.OutputDim("output");
-  KALDI_ASSERT(embedding_dim == word_embedding_mat.NumCols());
+  if (embedding_dim != rnnlm.OutputDim("output")) {
+    KALDI_ERR << "Embedding file and nnet have different number of words. "
+              << "You might be using a different wordlist "
+                 "here from what is used in training";
+  }
 
   nnet3::ComputationRequest request1, request2, request3;
   CreateLoopedComputationRequestSimple(rnnlm,
diff --git a/src/tfrnnlmbin/lattice-lmrescore-tf-rnnlm.cc b/src/tfrnnlmbin/lattice-lmrescore-tf-rnnlm.cc
index 6b0ba07926d..26ad4ab95ff 100644
--- a/src/tfrnnlmbin/lattice-lmrescore-tf-rnnlm.cc
+++ b/src/tfrnnlmbin/lattice-lmrescore-tf-rnnlm.cc
@@ -91,8 +91,7 @@ int main(int argc, char *argv[]) {
     int32 n_done = 0, n_fail = 0;
     for (; !compact_lattice_reader.Done(); compact_lattice_reader.Next()) {
       std::string key = compact_lattice_reader.Key();
-      CompactLattice clat = compact_lattice_reader.Value();
-      compact_lattice_reader.FreeCurrent();
+      CompactLattice &clat = compact_lattice_reader.Value();
 
       if (lm_scale != 0.0) {
         // Before composing with the LM FST, we scale the lattice weights

From a192adaa848bd3dbebd4eb3e778bcb880b30850d Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Thu, 9 Nov 2017 14:01:42 -0500
Subject: [PATCH 22/23] fix small issues mentioned by @danoneata

---
 egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1d.sh | 2 +-
 egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1e.sh | 2 +-
 scripts/rnnlm/choose_features.py               | 4 ++--
 scripts/rnnlm/prepare_split_data.py            | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1d.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1d.sh
index 6c9e0fad01b..b57aee9387d 100755
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1d.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1d.sh
@@ -44,7 +44,7 @@ done
 if [ $stage -le 0 ]; then
   mkdir -p $text_dir
   echo -n >$text_dir/dev.txt
-  # hold out one in every 500 lines as dev data.
+  # hold out one in every 50 lines as dev data.
   cat $text | grep ^sw | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%50 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/swbd.txt
   cat $text | grep ^fe | cut -d ' ' -f2-  >$text_dir/fisher.txt
 fi
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1e.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1e.sh
index 681789b0be2..f660a7cf8a5 100755
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1e.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1e.sh
@@ -44,7 +44,7 @@ done
 if [ $stage -le 0 ]; then
   mkdir -p $text_dir
   echo -n >$text_dir/dev.txt
-  # hold out one in every 500 lines as dev data.
+  # hold out one in every 50 lines as dev data.
   cat $text | grep ^sw | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%50 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/swbd.txt
   cat $text | grep ^fe | cut -d ' ' -f2- >$text_dir/fisher.txt
 fi
diff --git a/scripts/rnnlm/choose_features.py b/scripts/rnnlm/choose_features.py
index 1c3c195c24f..dd742cb82e1 100755
--- a/scripts/rnnlm/choose_features.py
+++ b/scripts/rnnlm/choose_features.py
@@ -105,7 +105,7 @@ def read_vocab(vocab_file):
         wordlist[index] = word
 
     if wordlist[0] != '<eps>' and wordlist[0] != '<EPS>':
-        sys.exit(argv[0] + ": expected word numbered zero to be epsilon.")
+        sys.exit(sys.argv[0] + ": expected word numbered zero to be epsilon.")
     return (vocab, wordlist)
 
 
@@ -169,7 +169,7 @@ def get_feature_scale(rms):
 if args.special_words != '':
     for word in args.special_words.split(','):
         if not word in vocab:
-            sys.exit(argv[0] + ": error: element {0} of --special-words option "
+            sys.exit(sys.argv[0] + ": error: element {0} of --special-words option "
                      "is not in the vocabulary file {1}".format(word, args.vocab_file))
         word_indexes_to_exclude.add(vocab[word])
         this_word_prob = unigram_probs[vocab[word]]
diff --git a/scripts/rnnlm/prepare_split_data.py b/scripts/rnnlm/prepare_split_data.py
index 0cd6c3068b1..9cc4f69d09f 100755
--- a/scripts/rnnlm/prepare_split_data.py
+++ b/scripts/rnnlm/prepare_split_data.py
@@ -207,7 +207,7 @@ def distribute_to_outputs(source_filename, weight, output_filehandles):
                                            # because it has {}'s awhich would
                                            # otherwise be interpreted.
     input_file="{0}/dev.txt".format(args.text_dir),
-        output_file="{0}/dev.txt".format(args.split_dir))
+    output_file="{0}/dev.txt".format(args.split_dir))
 ret = os.system(command)
 if ret != 0:
     sys.exit(sys.argv[0] + ": command '{0}' returned with status {1}".format(

From 697f21950af70451abcb3277583908b7e2ab25e2 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Sun, 19 Nov 2017 23:55:40 -0500
Subject: [PATCH 23/23] change SWBD script to accommodate s5_c; add paper link
 to RNNLM scripts for reference

---
 .../s5c/local/rnnlm/tuning/run_lstm_1a.sh     | 32 +++++++++----------
 .../s5c/local/rnnlm/tuning/run_lstm_1b.sh     | 29 +++++++++--------
 .../s5c/local/rnnlm/tuning/run_lstm_1c.sh     | 28 ++++++++--------
 .../s5c/local/rnnlm/tuning/run_lstm_1d.sh     | 28 ++++++++--------
 .../s5c/local/rnnlm/tuning/run_lstm_1e.sh     | 28 ++++++++--------
 .../{lmrescore_rnnlm_lat.sh => lmrescore.sh}  | 18 ++++++++---
 src/latbin/lattice-lmrescore-kaldi-rnnlm.cc   | 11 +++----
 src/rnnlm/rnnlm-core-training.cc              | 15 ++++-----
 8 files changed, 95 insertions(+), 94 deletions(-)
 rename scripts/rnnlm/{lmrescore_rnnlm_lat.sh => lmrescore.sh} (79%)

diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1a.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1a.sh
index 8b4c105d95b..4e2c6dc972c 100755
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1a.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1a.sh
@@ -6,9 +6,6 @@
 
 # This script trains LMs on the swbd LM-training data.
 
-# Train objf: -299.20 -4.42 -4.24 -4.16 -4.10 -4.06 -4.03 -4.01 -3.98 -3.95 -3.94 -3.92 -3.90 -3.88 -3.86 -3.85 -3.84 -3.82 -3.81 -3.81 -3.79 -3.78 -3.77 -3.76 -3.74
-# Dev objf:   -10.65 -4.67 -4.37 -4.25 -4.19 -4.14 -4.10 -4.07 -4.03 -4.00 -3.99 -3.98 -3.96 -3.95 -3.93 -3.93 -3.91 -3.91 -3.90 -3.90 -3.88 -3.88 -3.87 -3.87 -3.86
-
 # Begin configuration section.
 
 dir=exp/rnnlm_lstm_1a
@@ -20,8 +17,8 @@ train_stage=-10
 
 # variables for lattice rescoring
 run_rescore=false
-ac_model_dir=exp/chain/tdnn_lstm_1e_sp
-decode_dir_suffix=rnnlm
+ac_model_dir=exp/nnet3/tdnn_lstm_1a_adversarial0.3_epochs12_ld5_sp
+decode_dir_suffix=rnnlm_1a
 ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-order
               # if it's set, it merges histories in the lattice if they share
               # the same ngram history and this prevents the lattice from 
@@ -30,9 +27,11 @@ ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-orde
 . cmd.sh
 . utils/parse_options.sh
 
-text=data/train/text
+text=data/train_nodev/text
+fisher_text=data/local/lm//fisher/text1.gz
 lexicon=data/local/dict_nosp/lexiconp.txt
-text_dir=data/rnnlm/text_nosp
+text_dir=data/rnnlm/text_nosp_1a
+
 mkdir -p $dir/config
 set -e
 
@@ -44,8 +43,9 @@ done
 if [ $stage -le 0 ]; then
   mkdir -p $text_dir
   echo -n >$text_dir/dev.txt
-  # hold out one in every 500 lines as dev data.
-  cat $text | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%500 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/swbd.txt
+  # hold out one in every 50 lines as dev data.
+  cat $text | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%50 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/swbd.txt
+  zcat $fisher_text > $text_dir/fisher.txt
 fi
 
 if [ $stage -le 1 ]; then
@@ -58,7 +58,8 @@ if [ $stage -le 1 ]; then
   echo "<unk>" >$dir/config/oov.txt
 
   cat > $dir/config/data_weights.txt <<EOF
-swbd   1   1.0
+swbd   3   1.0
+fisher 1   1.0
 EOF
 
   rnnlm/get_unigram_probs.py --vocab-file=$dir/config/words.txt \
@@ -85,9 +86,6 @@ EOF
 fi
 
 if [ $stage -le 2 ]; then
-  # the --unigram-factor option is set larger than the default (100)
-  # in order to reduce the size of the sampling LM, because rnnlm-get-egs
-  # was taking up too much CPU (as much as 10 cores).
   rnnlm/prepare_rnnlm_dir.sh $text_dir $dir/config $dir
 fi
 
@@ -97,13 +95,13 @@ if [ $stage -le 3 ]; then
 fi
 
 if [ $stage -le 4 ] && $run_rescore; then
-  echo Perform lattice-rescoring on $ac_model_dir
-  LM=fsh_sw1_tg
+  echo "$0: Perform lattice-rescoring on $ac_model_dir"
+  LM=sw1_fsh_fg
   for decode_set in eval2000; do
-    decode_dir=${ac_model_dir}/decode_${decode_set}_$LM
+    decode_dir=${ac_model_dir}/decode_${decode_set}_${LM}_looped
 
     # Lattice rescoring
-    rnnlm/lmrescore_rnnlm_lat.sh \
+    rnnlm/lmrescore.sh \
       --cmd "$decode_cmd --mem 4G" \
       --weight 0.5 --max-ngram-order $ngram_order \
       data/lang_$LM $dir \
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1b.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1b.sh
index 6d406a5f4ce..60b521ec121 100755
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1b.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1b.sh
@@ -6,9 +6,6 @@
 
 # This script trains LMs on the swbd LM-training data.
 
-# Train objf: -342.40 -4.48 -4.30 -4.21 -4.16 -4.12 -4.08 -4.07 -4.04 -4.00 -3.99 -3.97 -3.95 -3.94 -3.92 -3.91 -3.90 -3.89 -3.88 -3.88 -3.87 -3.86 -3.85 -3.84 -3.83
-# Dev objf:   -10.65 -4.72 -4.43 -4.31 -4.24 -4.18 -4.15 -4.12 -4.08 -4.06 -4.04 -4.02 -4.01 -3.99 -3.98 -3.97 -3.96 -3.95 -3.95 -3.94 -3.93 -3.92 -3.92 -3.91 -3.91
-
 # Begin configuration section.
 dir=exp/rnnlm_lstm_1b
 embedding_dim=800
@@ -19,8 +16,8 @@ train_stage=-10
 
 # variables for lattice rescoring
 run_rescore=false
-ac_model_dir=exp/chain/tdnn_lstm_1e_sp
-decode_dir_suffix=rnnlm
+ac_model_dir=exp/nnet3/tdnn_lstm_1a_adversarial0.3_epochs12_ld5_sp
+decode_dir_suffix=rnnlm_1b
 ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-order
               # if it's set, it merges histories in the lattice if they share
               # the same ngram history and this prevents the lattice from 
@@ -29,9 +26,11 @@ ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-orde
 . cmd.sh
 . utils/parse_options.sh
 
-text=data/train/text
+text=data/train_nodev/text
+fisher_text=data/local/lm//fisher/text1.gz
 lexicon=data/local/dict_nosp/lexiconp.txt
-text_dir=data/rnnlm/text_nosp
+text_dir=data/rnnlm/text_nosp_1b
+
 mkdir -p $dir/config
 set -e
 
@@ -43,8 +42,9 @@ done
 if [ $stage -le 0 ]; then
   mkdir -p $text_dir
   echo -n >$text_dir/dev.txt
-  # hold out one in every 500 lines as dev data.
-  cat $text | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%500 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/swbd.txt
+  # hold out one in every 50 lines as dev data.
+  cat $text | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%50 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/swbd.txt
+  zcat $fisher_text > $text_dir/fisher.txt
 fi
 
 if [ $stage -le 1 ]; then
@@ -57,7 +57,8 @@ if [ $stage -le 1 ]; then
   echo "<unk>" >$dir/config/oov.txt
 
   cat > $dir/config/data_weights.txt <<EOF
-swbd   1   1.0
+swbd   3   1.0
+fisher 1   1.0
 EOF
 
   rnnlm/get_unigram_probs.py --vocab-file=$dir/config/words.txt \
@@ -93,13 +94,13 @@ if [ $stage -le 3 ]; then
 fi
 
 if [ $stage -le 4 ] && $run_rescore; then
-  echo Perform lattice-rescoring on $ac_model_dir
-  LM=fsh_sw1_tg
+  echo "$0: Perform lattice-rescoring on $ac_model_dir"
+  LM=sw1_fsh_fg
   for decode_set in eval2000; do
-    decode_dir=${ac_model_dir}/decode_${decode_set}_$LM
+    decode_dir=${ac_model_dir}/decode_${decode_set}_${LM}_looped
 
     # Lattice rescoring
-    rnnlm/lmrescore_rnnlm_lat.sh \
+    rnnlm/lmrescore.sh \
       --cmd "$decode_cmd --mem 4G" \
       --weight 0.5 --max-ngram-order $ngram_order \
       data/lang_$LM $dir \
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1c.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1c.sh
index 927c644aa55..903364f674a 100755
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1c.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1c.sh
@@ -6,9 +6,6 @@
 
 # This script trains LMs on the swbd LM-training data.
 
-# Train objf: -341.90 -4.45 -4.27 -4.19 -4.13 -4.09 -4.05 -4.04 -4.01 -3.98 -3.96 -3.95 -3.93 -3.91 -3.90 -3.89 -3.88 -3.86 -3.86 -3.85 -3.84 -3.83 -3.82 -3.81 -3.80
-# Dev objf:   -10.65 -4.68 -4.40 -4.28 -4.21 -4.16 -4.13 -4.10 -4.07 -4.04 -4.02 -4.00 -3.99 -3.97 -3.96 -3.95 -3.94 -3.94 -3.92 -3.92 -3.91 -3.90 -3.90 -3.89 -3.89
-
 # Begin configuration section.
 dir=exp/rnnlm_lstm_1c
 embedding_dim=1024
@@ -19,8 +16,8 @@ train_stage=-10
 
 # variables for lattice rescoring
 run_rescore=false
-ac_model_dir=exp/chain/tdnn_lstm_1e_sp
-decode_dir_suffix=rnnlm
+ac_model_dir=exp/nnet3/tdnn_lstm_1a_adversarial0.3_epochs12_ld5_sp
+decode_dir_suffix=rnnlm_1c
 ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-order
               # if it's set, it merges histories in the lattice if they share
               # the same ngram history and this prevents the lattice from 
@@ -29,9 +26,10 @@ ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-orde
 . cmd.sh
 . utils/parse_options.sh
 
-text=data/train/text
+text=data/train_nodev/text
+fisher_text=data/local/lm/fisher/text1.gz
 lexicon=data/local/dict_nosp/lexiconp.txt
-text_dir=data/rnnlm/text_nosp
+text_dir=data/rnnlm/text_nosp_1c
 mkdir -p $dir/config
 set -e
 
@@ -43,8 +41,9 @@ done
 if [ $stage -le 0 ]; then
   mkdir -p $text_dir
   echo -n >$text_dir/dev.txt
-  # hold out one in every 500 lines as dev data.
-  cat $text | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%500 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/swbd.txt
+  # hold out one in every 50 lines as dev data.
+  cat $text | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%50 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/swbd.txt
+  zcat $fisher_text > $text_dir/fisher.txt
 fi
 
 if [ $stage -le 1 ]; then
@@ -57,7 +56,8 @@ if [ $stage -le 1 ]; then
   echo "<unk>" >$dir/config/oov.txt
 
   cat > $dir/config/data_weights.txt <<EOF
-swbd   1   1.0
+swbd   3   1.0
+fisher 1   1.0
 EOF
 
   rnnlm/get_unigram_probs.py --vocab-file=$dir/config/words.txt \
@@ -93,13 +93,13 @@ if [ $stage -le 3 ]; then
 fi
 
 if [ $stage -le 4 ] && $run_rescore; then
-  echo Perform lattice-rescoring on $ac_model_dir
-  LM=fsh_sw1_tg
+  echo "$0: Perform lattice-rescoring on $ac_model_dir"
+  LM=sw1_fsh_fg
   for decode_set in eval2000; do
-    decode_dir=${ac_model_dir}/decode_${decode_set}_$LM
+    decode_dir=${ac_model_dir}/decode_${decode_set}_${LM}_looped
 
     # Lattice rescoring
-    rnnlm/lmrescore_rnnlm_lat.sh \
+    rnnlm/lmrescore.sh \
       --cmd "$decode_cmd --mem 4G" \
       --weight 0.5 --max-ngram-order $ngram_order \
       data/lang_$LM $dir \
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1d.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1d.sh
index b57aee9387d..53e94b7f069 100755
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1d.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1d.sh
@@ -6,9 +6,6 @@
 
 # This script trains LMs on the swbd LM-training data.
 
-# Train objf: -525.90 -4.55 -4.34 -4.24 -4.18 -4.13 -4.10 -4.08 -4.04 -4.01 -3.99 -3.97 -3.96 -3.94 -3.92 -3.91 -3.90 -3.89 -3.88 -3.87 -3.86 -3.85 -3.84 -3.83 -3.83
-# Dev objf:   -10.65 -4.89 -4.55 -4.42 -4.35 -4.30 -4.25 -4.23 -4.19 -4.16 -4.14 -4.12 -4.11 -4.09 -4.08 -4.07 -4.06 -4.05 -4.04 -4.04 -4.03 -4.02 -4.01 -4.01 -4.00
-
 # Begin configuration section.
 
 dir=exp/rnnlm_lstm_1d
@@ -20,8 +17,8 @@ train_stage=-10
 
 # variables for lattice rescoring
 run_rescore=false
-ac_model_dir=exp/chain/tdnn_lstm_1e_sp
-decode_dir_suffix=rnnlm
+ac_model_dir=exp/nnet3/tdnn_lstm_1a_adversarial0.3_epochs12_ld5_sp
+decode_dir_suffix=rnnlm_1d
 ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-order
               # if it's set, it merges histories in the lattice if they share
               # the same ngram history and this prevents the lattice from 
@@ -30,9 +27,10 @@ ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-orde
 . cmd.sh
 . utils/parse_options.sh
 
-text=data/train/text
+text=data/train_nodev/text
+fisher_text=data/local/lm/fisher/text1.gz
 lexicon=data/local/dict_nosp/lexiconp.txt
-text_dir=data/rnnlm/text_nosp_fisher
+text_dir=data/rnnlm/text_nosp_1d
 mkdir -p $dir/config
 set -e
 
@@ -45,8 +43,8 @@ if [ $stage -le 0 ]; then
   mkdir -p $text_dir
   echo -n >$text_dir/dev.txt
   # hold out one in every 50 lines as dev data.
-  cat $text | grep ^sw | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%50 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/swbd.txt
-  cat $text | grep ^fe | cut -d ' ' -f2-  >$text_dir/fisher.txt
+  cat $text | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%50 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/swbd.txt
+  zcat $fisher_text > $text_dir/fisher.txt
 fi
 
 if [ $stage -le 1 ]; then
@@ -59,8 +57,8 @@ if [ $stage -le 1 ]; then
   echo "<unk>" >$dir/config/oov.txt
 
   cat > $dir/config/data_weights.txt <<EOF
-swbd   1   1.0
-fisher   1   0.5
+swbd   3   1.0
+fisher   1   1.0
 EOF
 
   rnnlm/get_unigram_probs.py --vocab-file=$dir/config/words.txt \
@@ -96,13 +94,13 @@ if [ $stage -le 3 ]; then
 fi
 
 if [ $stage -le 4 ] && $run_rescore; then
-  echo Perform lattice-rescoring on $ac_model_dir
-  LM=fsh_sw1_tg
+  echo "$0: Perform lattice-rescoring on $ac_model_dir"
+  LM=sw1_fsh_fg
   for decode_set in eval2000; do
-    decode_dir=${ac_model_dir}/decode_${decode_set}_$LM
+    decode_dir=${ac_model_dir}/decode_${decode_set}_${LM}_looped
 
     # Lattice rescoring
-    rnnlm/lmrescore_rnnlm_lat.sh \
+    rnnlm/lmrescore.sh \
       --cmd "$decode_cmd --mem 4G" \
       --weight 0.5 --max-ngram-order $ngram_order \
       data/lang_$LM $dir \
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1e.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1e.sh
index f660a7cf8a5..fead2a12152 100755
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1e.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_lstm_1e.sh
@@ -6,9 +6,6 @@
 
 # This script trains LMs on the swbd LM-training data.
 
-# Train objf: -1579.00 -4.95 -4.55 -4.40 -4.31 -4.25 -4.21 -4.19 -4.14 -4.12 -4.09 -4.06 -4.04 -4.02 -4.00 -3.99 -3.97 -3.96 -3.94 -3.94 -3.92 -3.91 -3.90 -3.90 -3.88
-# Dev objf:   -10.65 -5.27 -4.71 -4.51 -4.41 -4.34 -4.29 -4.25 -4.23 -4.20 -4.17 -4.15 -4.13 -4.11 -4.10 -4.09 -4.08 -4.06 -4.06 -4.05 -4.05 -4.04 -4.03 -4.02 -4.02
-
 # Begin configuration section.
 
 dir=exp/rnnlm_lstm_1e
@@ -20,8 +17,8 @@ train_stage=-10
 
 # variables for lattice rescoring
 run_rescore=false
-ac_model_dir=exp/chain/tdnn_lstm_1e_sp
-decode_dir_suffix=rnnlm
+ac_model_dir=exp/nnet3/tdnn_lstm_1a_adversarial0.3_epochs12_ld5_sp
+decode_dir_suffix=rnnlm_1e
 ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-order
               # if it's set, it merges histories in the lattice if they share
               # the same ngram history and this prevents the lattice from 
@@ -30,9 +27,10 @@ ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-orde
 . cmd.sh
 . utils/parse_options.sh
 
-text=data/train/text
+text=data/train_nodev/text
+fisher_text=data/local/lm/fisher/text1.gz
 lexicon=data/local/dict_nosp/lexiconp.txt
-text_dir=data/rnnlm/text_nosp_fisher0.1
+text_dir=data/rnnlm/text_nosp_1e
 mkdir -p $dir/config
 set -e
 
@@ -45,8 +43,8 @@ if [ $stage -le 0 ]; then
   mkdir -p $text_dir
   echo -n >$text_dir/dev.txt
   # hold out one in every 50 lines as dev data.
-  cat $text | grep ^sw | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%50 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/swbd.txt
-  cat $text | grep ^fe | cut -d ' ' -f2- >$text_dir/fisher.txt
+  cat $text | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%50 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/swbd.txt
+  zcat $fisher_text > $text_dir/fisher.txt
 fi
 
 if [ $stage -le 1 ]; then
@@ -59,8 +57,8 @@ if [ $stage -le 1 ]; then
   echo "<unk>" >$dir/config/oov.txt
 
   cat > $dir/config/data_weights.txt <<EOF
-swbd   1   1.0
-fisher   1   0.1
+swbd   3   1.0
+fisher   1   1.0
 EOF
 
   rnnlm/get_unigram_probs.py --vocab-file=$dir/config/words.txt \
@@ -96,13 +94,13 @@ if [ $stage -le 3 ]; then
 fi
 
 if [ $stage -le 4 ] && $run_rescore; then
-  echo Perform lattice-rescoring on $ac_model_dir
-  LM=fsh_sw1_tg
+  echo "$0: Perform lattice-rescoring on $ac_model_dir"
+  LM=sw1_fsh_fg
   for decode_set in eval2000; do
-    decode_dir=${ac_model_dir}/decode_${decode_set}_$LM
+    decode_dir=${ac_model_dir}/decode_${decode_set}_${LM}_looped
 
     # Lattice rescoring
-    rnnlm/lmrescore_rnnlm_lat.sh \
+    rnnlm/lmrescore.sh \
       --cmd "$decode_cmd --mem 4G" \
       --weight 0.5 --max-ngram-order $ngram_order \
       data/lang_$LM $dir \
diff --git a/scripts/rnnlm/lmrescore_rnnlm_lat.sh b/scripts/rnnlm/lmrescore.sh
similarity index 79%
rename from scripts/rnnlm/lmrescore_rnnlm_lat.sh
rename to scripts/rnnlm/lmrescore.sh
index 505884e0025..84f42443710 100755
--- a/scripts/rnnlm/lmrescore_rnnlm_lat.sh
+++ b/scripts/rnnlm/lmrescore.sh
@@ -9,10 +9,20 @@
 # Begin configuration section.
 cmd=run.pl
 skip_scoring=false
-max_ngram_order=4
-N=10
-weight=1.0  # Interpolation weight for RNNLM.
-normalize=false
+max_ngram_order=4 # Approximate the lattice-rescoring by limiting the max-ngram-order
+                  # if it's set, it merges histories in the lattice if they share
+                  # the same ngram history and this prevents the lattice from 
+                  # exploding exponentially. Details of the n-gram approximation
+                  # method are described in section 2.3 of the paper
+                  # http://www.cs.jhu.edu/~hxu/tf.pdf
+
+weight=0.5  # Interpolation weight for RNNLM.
+normalize=false # If true, we add a normalization step to the output of the RNNLM
+                # so that it adds up to *exactly* 1. Note that this is not necessary
+                # as in our RNNLM setup, a properly trained network would automatically
+                # have its normalization term close to 1. The details of this
+                # could be found at http://www.cs.jhu.edu/~hxu/rnnlm.pdf
+
 # End configuration section.
 
 echo "$0 $@"  # Print the command line for logging
diff --git a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
index 53b2d2eaef9..22f713b5620 100644
--- a/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
+++ b/src/latbin/lattice-lmrescore-kaldi-rnnlm.cc
@@ -72,13 +72,10 @@ int main(int argc, char *argv[]) {
       KALDI_ERR << "You must set --bos-symbol and --eos-symbol options";
     }
 
-    std::string lats_rspecifier, word_embedding_rxfilename,
-        word_symbols_rxfilename, rnnlm_rxfilename, lats_wspecifier;
-
-    word_embedding_rxfilename = po.GetArg(1);
-    rnnlm_rxfilename = po.GetArg(2);
-    lats_rspecifier = po.GetArg(3);
-    lats_wspecifier = po.GetArg(4);
+    std::string word_embedding_rxfilename = po.GetArg(1),
+                rnnlm_rxfilename = po.GetArg(2),
+                lats_rspecifier = po.GetArg(3),
+                lats_wspecifier = po.GetArg(4);
 
     kaldi::nnet3::Nnet rnnlm;
     ReadKaldiObject(rnnlm_rxfilename, &rnnlm);
diff --git a/src/rnnlm/rnnlm-core-training.cc b/src/rnnlm/rnnlm-core-training.cc
index f305239fa85..ba6fcb0c074 100644
--- a/src/rnnlm/rnnlm-core-training.cc
+++ b/src/rnnlm/rnnlm-core-training.cc
@@ -93,10 +93,10 @@ void ObjectiveTracker::PrintStatsThisInterval() const {
      << interval_end << " is (" << num_objf << " + "
      << den_objf << ") = " << tot_objf << " over "
      << weight << " words (weighted)";
-  if (exact_den_objf != 0.0) {
-    os << "; exact = (" << num_objf << " + " << exact_den_objf
-       << ") = " << exact_tot_objf ;
-  }
+
+  os << "; exact = (" << num_objf << " + " << exact_den_objf
+     << ") = " << exact_tot_objf ;
+
   KALDI_LOG << os.str();
 }
 
@@ -112,10 +112,9 @@ void ObjectiveTracker::PrintStatsOverall() const {
   os << "Overall objf is (" << num_objf << " + " << den_objf
      << ") = " << tot_objf << " over " << weight << " words (weighted) in "
      << num_egs_ << " minibatches";
-  if (exact_den_objf != 0.0) {
-    os << "; exact = (" << num_objf << " + " << exact_den_objf
-       << ") = " << exact_tot_objf ;
-  }
+  os << "; exact = (" << num_objf << " + " << exact_den_objf
+     << ") = " << exact_tot_objf ;
+
   KALDI_LOG << os.str();
 }