From 7087c4fca7031f54d7edc9f6d836770254b823ff Mon Sep 17 00:00:00 2001
From: "Jan \"yenda\" Trmal" <jtrmal@gmail.com>
Date: Fri, 12 Aug 2022 05:42:16 -0400
Subject: [PATCH 01/18] fix portaudio install script, closes #4755 (#4773)

fixed portaudio archive location
fixed case when the directory portaudio exists
fixed whitespace formatting
---
 tools/extras/install_portaudio.sh | 43 ++++++++++++++++---------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/tools/extras/install_portaudio.sh b/tools/extras/install_portaudio.sh
index 571358e140d..3d83a294565 100755
--- a/tools/extras/install_portaudio.sh
+++ b/tools/extras/install_portaudio.sh
@@ -43,31 +43,32 @@ portaudio_tarball="pa_stable_${VERSION}.tgz"
 portaudio_github_tarball="pa_stable_${VERSION}_r1788.tar.gz"
 
 if [ ! -e $portaudio_tarball ]; then
-    echo "Could not find portaudio tarball $portaudio_tarball locally, downloading it..."
-
-    if [ -d "$DOWNLOAD_DIR" ]; then
-        cp -p "$DOWNLOAD_DIR/$portaudio_tarball" .
-    else
-        if ! $WGET --version >&/dev/null; then
-            echo "This script requires you to first install wget"
-            echo "You can also just download pa_stable_$VERSION.tgz from"
-            echo "http://www.portaudio.com/download.html)"
-            exit 1;
-        fi
-	$WGET -nv -T 10 -t 3 -O $portaudio_tarball https://github.com/PortAudio/portaudio/archive/refs/tags/${portaudio_github_tarball} || \
-	$WGET -nv -T 10 -t 3 -O $portaudio_tarball http://www.portaudio.com/archives/$portaudio_tarball || \
-	rm ${portaudio_tarball}
-
+  echo "Could not find portaudio tarball $portaudio_tarball locally, downloading it..."
+
+  if [ -d "$DOWNLOAD_DIR" ]; then
+    cp -p "$DOWNLOAD_DIR/$portaudio_tarball" .
+  else
+    if ! $WGET --version >&/dev/null; then
+      echo "This script requires you to first install wget"
+      echo "You can also just download pa_stable_$VERSION.tgz from"
+      echo "http://www.portaudio.com/download.html)"
+      exit 1;
     fi
 
-    if [ ! -e $portaudio_tarball ]; then
-        echo "Download of $portaudio_tarball - failed."
-        echo "Aborting script. Please download and install port audio manually."
-        exit 1;
-    fi
+    $WGET --no-check-certificate -nv -T 10 -t 3 -O $portaudio_tarball https://github.com/PortAudio/portaudio/archive/refs/tags/${portaudio_github_tarball} || \
+      $WGET --no-check-certificate -nv -T 10 -t 3 -O $portaudio_tarball http://files.portaudio.com/archives/$portaudio_tarball || \
+      rm ${portaudio_tarball}
+
+  fi
+
+  if [ ! -e $portaudio_tarball ]; then
+    echo "Download of $portaudio_tarball - failed."
+    echo "Aborting script. Please download and install port audio manually."
+    exit 1;
+  fi
 fi
 
-mkdir portaudio && tar -xzf $portaudio_tarball -C portaudio --strip-components 1 || exit 1
+mkdir -p  portaudio && tar -xzf $portaudio_tarball -C portaudio --strip-components 1 || exit 1
 
 read -d '' pa_patch << "EOF"
 --- portaudio/Makefile.in	2012-08-05 10:42:05.000000000 +0300

From 0cf557ed7f991abc5859ac240970cff7aad78a5d Mon Sep 17 00:00:00 2001
From: "Jan \"yenda\" Trmal" <jtrmal@gmail.com>
Date: Fri, 12 Aug 2022 05:43:57 -0400
Subject: [PATCH 02/18] Kaldi recipe for SPGISpeech (#4772)

* Kaldi recipe for SPGISpeech

* adding readme to spgispeech

* fixing some style issues

* more syntax checks cleared

* more style fixes

* yaml format fix

* fix issues reported by @desh2608

* remove the conda calls

* remove more shellcheck errors

* fix one more grumpy shellcheck
---
 .shellcheck.yaml                              |   7 +
 egs/spgispeech/s5/README.txt                  |   5 +
 egs/spgispeech/s5/RESULTS                     |  55 ++++
 egs/spgispeech/s5/cmd.sh                      |  15 +
 egs/spgispeech/s5/conf/decode.config          |   1 +
 egs/spgispeech/s5/conf/mfcc.conf              |   1 +
 egs/spgispeech/s5/conf/mfcc_hires.conf        |  10 +
 egs/spgispeech/s5/conf/online_cmvn.conf       |   1 +
 egs/spgispeech/s5/local/chain/compare_wer.sh  | 139 +++++++++
 .../s5/local/chain/run_chain_common.sh        |  82 ++++++
 egs/spgispeech/s5/local/chain/run_tdnn.sh     |   1 +
 .../s5/local/chain/run_tdnn_lstm.sh           |   1 +
 .../s5/local/chain/tuning/run_tdnn_1a.sh      | 253 ++++++++++++++++
 .../s5/local/chain/tuning/run_tdnn_lstm_1a.sh | 278 ++++++++++++++++++
 egs/spgispeech/s5/local/g2p.sh                |  45 +++
 egs/spgispeech/s5/local/g2p/train_g2p.sh      |  88 ++++++
 .../s5/local/nnet3/run_ivector_common.sh      | 153 ++++++++++
 egs/spgispeech/s5/local/prepare_dict.sh       | 121 ++++++++
 .../s5/local/run_cleanup_segmentation.sh      |  59 ++++
 egs/spgispeech/s5/local/score.sh              |   1 +
 egs/spgispeech/s5/local/train_lms_srilm.sh    | 265 +++++++++++++++++
 egs/spgispeech/s5/path.sh                     |   9 +
 egs/spgispeech/s5/run.sh                      | 131 +++++++++
 egs/spgispeech/s5/steps                       |   1 +
 egs/spgispeech/s5/utils                       |   1 +
 25 files changed, 1723 insertions(+)
 create mode 100644 .shellcheck.yaml
 create mode 100644 egs/spgispeech/s5/README.txt
 create mode 100644 egs/spgispeech/s5/RESULTS
 create mode 100644 egs/spgispeech/s5/cmd.sh
 create mode 100644 egs/spgispeech/s5/conf/decode.config
 create mode 100644 egs/spgispeech/s5/conf/mfcc.conf
 create mode 100644 egs/spgispeech/s5/conf/mfcc_hires.conf
 create mode 100644 egs/spgispeech/s5/conf/online_cmvn.conf
 create mode 100755 egs/spgispeech/s5/local/chain/compare_wer.sh
 create mode 100755 egs/spgispeech/s5/local/chain/run_chain_common.sh
 create mode 120000 egs/spgispeech/s5/local/chain/run_tdnn.sh
 create mode 120000 egs/spgispeech/s5/local/chain/run_tdnn_lstm.sh
 create mode 100755 egs/spgispeech/s5/local/chain/tuning/run_tdnn_1a.sh
 create mode 100755 egs/spgispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
 create mode 100755 egs/spgispeech/s5/local/g2p.sh
 create mode 100755 egs/spgispeech/s5/local/g2p/train_g2p.sh
 create mode 100755 egs/spgispeech/s5/local/nnet3/run_ivector_common.sh
 create mode 100755 egs/spgispeech/s5/local/prepare_dict.sh
 create mode 100755 egs/spgispeech/s5/local/run_cleanup_segmentation.sh
 create mode 120000 egs/spgispeech/s5/local/score.sh
 create mode 100755 egs/spgispeech/s5/local/train_lms_srilm.sh
 create mode 100755 egs/spgispeech/s5/path.sh
 create mode 100755 egs/spgispeech/s5/run.sh
 create mode 120000 egs/spgispeech/s5/steps
 create mode 120000 egs/spgispeech/s5/utils

diff --git a/.shellcheck.yaml b/.shellcheck.yaml
new file mode 100644
index 00000000000..0671c62c4d7
--- /dev/null
+++ b/.shellcheck.yaml
@@ -0,0 +1,7 @@
+# Reference:
+# #   https://github.com/codefactor-io/shellcheck-config
+# #   https://github.com/koalaman/shellcheck/wiki/Checks
+ignored:
+-SC2181
+-SC2145
+
diff --git a/egs/spgispeech/s5/README.txt b/egs/spgispeech/s5/README.txt
new file mode 100644
index 00000000000..cd47723ffb8
--- /dev/null
+++ b/egs/spgispeech/s5/README.txt
@@ -0,0 +1,5 @@
+Adding a recipe for SPGISpeech (https://datasets.kensho.com/datasets/spgispeech) corpus
+The workflow is based on egs/librispeech. Provides numbers comparable with the baseline in https://arxiv.org/abs/2104.02014
+(see RESULTS.txt for exact results)
+This recipe utilizes Lhotse (https://lhotse.readthedocs.io/) for data preparation
+
diff --git a/egs/spgispeech/s5/RESULTS b/egs/spgispeech/s5/RESULTS
new file mode 100644
index 00000000000..56d70ac4959
--- /dev/null
+++ b/egs/spgispeech/s5/RESULTS
@@ -0,0 +1,55 @@
+# In the results below, "tgsmall" is 3-gram LM, which is used for lattice generation.
+# The following language models are then used for rescoring:
+# *) "rescored"- non-pruned maximum entropy 4-gram LM
+
+## for documentation purposes, we include the perplexities of the individual LMs -- see the training script local/train_lms_srilm.sh for details
+data/srilm/7gram.me.gz      file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  0     zeroprobs,  logprob=  -8902029       ppl=  64.32612  ppl1=  76.49254
+data/srilm/5gram.me.gz      file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  0     zeroprobs,  logprob=  -8935219       ppl=  65.33255  ppl1=  77.73952
+data/srilm/6gram.me.gz      file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  0     zeroprobs,  logprob=  -8936829       ppl=  65.38179  ppl1=  77.80054
+data/srilm/4gram.me.gz      file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  0     zeroprobs,  logprob=  -9050586       ppl=  68.95498  ppl1=  82.23427
+data/srilm/4gram.gt0112.gz  file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  5187  zeroprobs,  logprob=  -9184350       ppl=  73.74063  ppl1=  88.20471
+data/srilm/4gram.kn0122.gz  file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  0     zeroprobs,  logprob=  -9196606       ppl=  73.82929  ppl1=  88.29782
+data/srilm/4gram.gt0122.gz  file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  5187  zeroprobs,  logprob=  -9194675       ppl=  74.09803  ppl1=  88.65006
+data/srilm/4gram.kn0112.gz  file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  0     zeroprobs,  logprob=  -9207785       ppl=  74.21637  ppl1=  88.78006
+data/srilm/4gram.gt0113.gz  file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  5187  zeroprobs,  logprob=  -9198757       ppl=  74.23981  ppl1=  88.82675
+data/srilm/4gram.gt0123.gz  file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  5187  zeroprobs,  logprob=  -9209083       ppl=  74.59962  ppl1=  89.27524
+data/srilm/4gram.kn0222.gz  file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  0     zeroprobs,  logprob=  -9223550       ppl=  74.76567  ppl1=  89.46459
+data/srilm/4gram.kn0123.gz  file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  0     zeroprobs,  logprob=  -9235385       ppl=  75.1807   ppl1=  89.98194
+data/srilm/4gram.kn0113.gz  file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  0     zeroprobs,  logprob=  -9242119       ppl=  75.41787  ppl1=  90.27763
+data/srilm/4gram.gt0222.gz  file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  5187  zeroprobs,  logprob=  -9237725       ppl=  75.60686  ppl1=  90.53118
+data/srilm/4gram.gt0111.gz  file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  5187  zeroprobs,  logprob=  -9247490       ppl=  75.95336  ppl1=  90.96339
+data/srilm/4gram.gt0223.gz  file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  5187  zeroprobs,  logprob=  -9252133       ppl=  76.11867  ppl1=  91.16962
+data/srilm/4gram.kn0223.gz  file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  0     zeroprobs,  logprob=  -9264101       ppl=  76.19736  ppl1=  91.24972
+data/srilm/4gram.kn0111.gz  file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  0     zeroprobs,  logprob=  -9268668       ppl=  76.36029  ppl1=  91.45296
+data/srilm/3gram.me.gz      file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  0     zeroprobs,  logprob=  -9318474       ppl=  78.16015  ppl1=  93.69934
+data/srilm/3gram.kn012.gz   file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  0     zeroprobs,  logprob=  -9408681       ppl=  81.52866  ppl1=  97.90927
+data/srilm/3gram.gt011.gz   file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  5187  zeroprobs,  logprob=  -9399744       ppl=  81.56597  ppl1=  97.97564
+data/srilm/3gram.kn011.gz   file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  0     zeroprobs,  logprob=  -9411733       ppl=  81.64512  ppl1=  98.05495
+data/srilm/3gram.gt012.gz   file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  5187  zeroprobs,  logprob=  -9410070       ppl=  81.96129  ppl1=  98.47032
+data/srilm/3gram.kn022.gz   file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  0     zeroprobs,  logprob=  -9435192       ppl=  82.54594  ppl1=  99.18209
+data/srilm/3gram.gt022.gz   file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  5187  zeroprobs,  logprob=  -9453120       ppl=  83.63024  ppl1=  100.5598
+data/srilm/3gram.kn023.gz   file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  0     zeroprobs,  logprob=  -9483614       ppl=  84.43693  ppl1=  101.5498
+data/srilm/3gram.gt023.gz   file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  5187  zeroprobs,  logprob=  -9494343       ppl=  85.26021  ppl1=  102.6022
+data/srilm/2gram.gt01.gz    file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  5187  zeroprobs,  logprob=  -1.031115e+07  ppl=  124.9826  ppl1=  152.8191
+data/srilm/2gram.me.gz      file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  0     zeroprobs,  logprob=  -1.033317e+07  ppl=  125.6362  ppl1=  153.6176
+data/srilm/2gram.kn01.gz    file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  0     zeroprobs,  logprob=  -1.033766e+07  ppl=  125.9001  ppl1=  153.9539
+data/srilm/2gram.kn02.gz    file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  0     zeroprobs,  logprob=  -1.036267e+07  ppl=  127.3817  ppl1=  155.8414
+data/srilm/2gram.gt02.gz    file  data/srilm/dev.txt:  196610  sentences,  4726024  words,  0  OOVs  5187  zeroprobs,  logprob=  -1.03542e+07   ppl=  127.5276  ppl1=  156.0618
+##
+#
+
+# Bare TDNN-F model
+#for x in exp/chain_cleaned/tdnn_1a_sp/decode_*; do grep WER $x/wer_* | utils/best_wer.sh ; done
+%WER 5.96 [ 56419 / 946469, 11482 ins, 9916 del, 35021 sub ] exp/chain_cleaned/tdnn_1a_sp/decode_val_rescored/wer_10_0.5
+%WER 6.30 [ 59597 / 946469, 14063 ins, 8943 del, 36591 sub ] exp/chain_cleaned/tdnn_1a_sp/decode_val_tgsmall/wer_11_0.0
+#for x in exp/chain_cleaned/tdnn_1a_sp_online/decode_*; do grep WER $x/wer_* | utils/best_wer.sh ; done
+%WER 5.97 [ 56463 / 946469, 11537 ins, 9884 del, 35042 sub ] exp/chain_cleaned/tdnn_1a_sp_online/decode_val_rescored/wer_10_0.5
+%WER 6.30 [ 59616 / 946469, 12326 ins, 10792 del, 36498 sub ] exp/chain_cleaned/tdnn_1a_sp_online/decode_val_tgsmall/wer_11_0.5
+
+#TDNN+LSTM model
+# for x in exp/chain_cleaned/tdnn_lstm_1a_sp/decode_*; do grep WER $x/wer_* | utils/best_wer.sh ; done
+%WER 5.79 [ 54844 / 946469, 9971 ins, 10397 del, 34476 sub ] exp/chain_cleaned/tdnn_lstm_1a_sp/decode_val_rescored/wer_9_0.5
+%WER 6.04 [ 57165 / 946469, 10304 ins, 11651 del, 35210 sub ] exp/chain_cleaned/tdnn_lstm_1a_sp/decode_val_tgsmall/wer_10_0.5
+# for x in exp/chain_cleaned/tdnn_lstm_1a_sp_online/decode_*; do grep WER $x/wer_* | utils/best_wer.sh ; done
+%WER 5.60 [ 53036 / 946469, 10812 ins, 8601 del, 33623 sub ] exp/chain_cleaned/tdnn_lstm_1a_sp_online/decode_val_rescored/wer_10_0.0
+%WER 5.84 [ 55282 / 946469, 9973 ins, 10879 del, 34430 sub ] exp/chain_cleaned/tdnn_lstm_1a_sp_online/decode_val_tgsmall/wer_11_0.5
diff --git a/egs/spgispeech/s5/cmd.sh b/egs/spgispeech/s5/cmd.sh
new file mode 100644
index 00000000000..71dd849a93b
--- /dev/null
+++ b/egs/spgispeech/s5/cmd.sh
@@ -0,0 +1,15 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl --mem 2G"
+export decode_cmd="queue.pl --mem 4G"
+export mkgraph_cmd="queue.pl --mem 8G"
diff --git a/egs/spgispeech/s5/conf/decode.config b/egs/spgispeech/s5/conf/decode.config
new file mode 100644
index 00000000000..7ba966f2b83
--- /dev/null
+++ b/egs/spgispeech/s5/conf/decode.config
@@ -0,0 +1 @@
+# empty config, just use the defaults.
diff --git a/egs/spgispeech/s5/conf/mfcc.conf b/egs/spgispeech/s5/conf/mfcc.conf
new file mode 100644
index 00000000000..7361509099f
--- /dev/null
+++ b/egs/spgispeech/s5/conf/mfcc.conf
@@ -0,0 +1 @@
+--use-energy=false   # only non-default option.
diff --git a/egs/spgispeech/s5/conf/mfcc_hires.conf b/egs/spgispeech/s5/conf/mfcc_hires.conf
new file mode 100644
index 00000000000..3fb460822ac
--- /dev/null
+++ b/egs/spgispeech/s5/conf/mfcc_hires.conf
@@ -0,0 +1,10 @@
+# config for high-resolution MFCC features, intended for neural network training
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why
+# we prefer this method.
+--use-energy=false   # use average of log energy, not energy.
+--num-mel-bins=40     # similar to Google's setup.
+--num-ceps=40     # there is no dimensionality reduction.
+--low-freq=20     # low cutoff frequency for mel bins... this is high-bandwidth data, so
+                  # there might be some information at the low end.
+--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600)
diff --git a/egs/spgispeech/s5/conf/online_cmvn.conf b/egs/spgispeech/s5/conf/online_cmvn.conf
new file mode 100644
index 00000000000..7748a4a4dd3
--- /dev/null
+++ b/egs/spgispeech/s5/conf/online_cmvn.conf
@@ -0,0 +1 @@
+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
diff --git a/egs/spgispeech/s5/local/chain/compare_wer.sh b/egs/spgispeech/s5/local/chain/compare_wer.sh
new file mode 100755
index 00000000000..b12ef0200b7
--- /dev/null
+++ b/egs/spgispeech/s5/local/chain/compare_wer.sh
@@ -0,0 +1,139 @@
+#!/usr/bin/env bash
+
+# this script is used for comparing decoding results between systems.
+# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
+# For use with discriminatively trained systems you specify the epochs after a colon:
+# for instance,
+# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3}
+
+
+if [ $# == 0 ]; then
+  echo "Usage: $0: [--looped] [--online] <dir1> [<dir2> ... ]"
+  echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp"
+  echo "or (with epoch numbers for discriminative training):"
+  echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}"
+  exit 1
+fi
+
+echo "# $0 $*"
+
+include_looped=false
+if [ "$1" == "--looped" ]; then
+  include_looped=true
+  shift
+fi
+include_online=false
+if [ "$1" == "--online" ]; then
+  include_online=true
+  shift
+fi
+
+
+used_epochs=false
+
+# this function set_names is used to separate the epoch-related parts of the name
+# [for discriminative training] and the regular parts of the name.
+# If called with a colon-free directory name, like:
+#  set_names exp/chain/tdnn_lstm1e_sp_bi_smbr
+# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix=""
+# If called with something like:
+#  set_names exp/chain/tdnn_d_sp_smbr:3
+# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3"
+
+
+set_names() {
+  if [ $# != 1 ]; then
+    echo "compare_wer_general.sh: internal error"
+    exit 1  # exit the program
+  fi
+  dirname=$(echo $1 | cut -d: -f1)
+  epoch=$(echo $1 | cut -s -d: -f2)
+  if [ -z $epoch ]; then
+    epoch_infix=""
+  else
+    used_epochs=true
+    epoch_infix=_epoch${epoch}
+  fi
+}
+
+
+
+echo -n "# System                     "
+for x in $*; do   printf "% 10s" " $(basename $x)";   done
+echo
+
+strings=(
+  "# WER on val(tgsmall)      "
+  "# WER on val(fglarge)      "
+  )
+
+for n in 0 1 ; do
+   echo -n "${strings[$n]}"
+   for x in $*; do
+     set_names $x  # sets $dirname and $epoch_infix
+     decode_names=(val_tgsmall val_rescored)
+
+     wer=$( (grep WER $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') 2>/dev/null)
+     printf "% 10s" $wer
+   done
+   echo
+   if $include_looped; then
+     echo -n "#             [looped:]      "
+     for x in $*; do
+       set_names $x  # sets $dirname and $epoch_infix
+       wer=$(grep WER $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+       printf "% 10s" $wer
+     done
+     echo
+   fi
+   if $include_online; then
+     echo -n "#            [online:]     "
+     for x in $*; do
+       set_names $x  # sets $dirname and $epoch_infix
+       wer=$(grep WER ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+       printf "% 10s" $wer
+     done
+     echo
+   fi
+done
+
+
+if $used_epochs; then
+  exit 0;  # the diagnostics aren't comparable between regular and discriminatively trained systems.
+fi
+
+
+echo -n "# Final train prob           "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob           "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final train prob (xent)    "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob (xent)    "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Num-parameters             "
+for x in $*; do
+  num_params=$(grep num-parameters $x/log/progress.1.log | awk '{print $2}')
+  printf "% 10d" $num_params
+done
+echo
diff --git a/egs/spgispeech/s5/local/chain/run_chain_common.sh b/egs/spgispeech/s5/local/chain/run_chain_common.sh
new file mode 100755
index 00000000000..8ec52969231
--- /dev/null
+++ b/egs/spgispeech/s5/local/chain/run_chain_common.sh
@@ -0,0 +1,82 @@
+#!/usr/bin/env bash
+
+# this script has common stages shared across spgispeech chain recipes.
+# It generates a new topology in a new lang directory, gets the alignments as
+# lattices, and builds a tree for the new topology
+set -e
+
+stage=11
+
+# input directory names. These options are actually compulsory, and they have
+# been named for convenience
+gmm_dir=
+ali_dir=
+lores_train_data_dir=
+
+num_leaves=6000
+
+# output directory names. They are also compulsory.
+lang=
+lat_dir=
+tree_dir=
+# End configuration section.
+echo "$0" "$@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+[ -z $lang ] && echo "Set --lang, this specifies the new lang directory which will have the new topology" && exit 1;
+[ -z $lat_dir ] && echo "Set --lat-dir, this specifies the experiment directory to store lattice" && exit 1;
+[ -z $tree_dir ] && echo "Set --tree-dir, this specifies the directory to store new tree " && exit 1;
+
+for f in $gmm_dir/final.mdl $ali_dir/ali.1.gz $lores_train_data_dir/feats.scp; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 11 ]; then
+  echo "$0: creating lang directory with one state per phone."
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 12 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  nj=$(cat ${ali_dir}/num_jobs) || exit 1;
+  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
+    $lang $gmm_dir $lat_dir
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 13 ]; then
+  # Build a tree using our new topology. We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.
+  if [ -f $tree_dir/final.mdl ]; then
+    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+    exit 1;
+  fi
+  steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
+      --context-opts "--context-width=2 --central-position=1" \
+      --cmd "$train_cmd" $num_leaves ${lores_train_data_dir} $lang $ali_dir $tree_dir
+fi
+
+exit 0;
diff --git a/egs/spgispeech/s5/local/chain/run_tdnn.sh b/egs/spgispeech/s5/local/chain/run_tdnn.sh
new file mode 120000
index 00000000000..34499362831
--- /dev/null
+++ b/egs/spgispeech/s5/local/chain/run_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1a.sh
\ No newline at end of file
diff --git a/egs/spgispeech/s5/local/chain/run_tdnn_lstm.sh b/egs/spgispeech/s5/local/chain/run_tdnn_lstm.sh
new file mode 120000
index 00000000000..8e647598556
--- /dev/null
+++ b/egs/spgispeech/s5/local/chain/run_tdnn_lstm.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_lstm_1a.sh
\ No newline at end of file
diff --git a/egs/spgispeech/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/spgispeech/s5/local/chain/tuning/run_tdnn_1a.sh
new file mode 100755
index 00000000000..28b31049292
--- /dev/null
+++ b/egs/spgispeech/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -0,0 +1,253 @@
+#!/usr/bin/env bash
+set -e
+# config taken from librispeech run_tdnn_1d.sh
+
+# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_1a_sp
+# exp/chain_cleaned/tdnn_1a_sp: num-iters=2496 nj=4..16 num-params=20.7M dim=40+100->6088 combine=-0.043->-0.042 (over 10) xent:train/valid[1661,2495,final]=(-0.983,-0.777,-0.763/-0.920,-0.729,-0.720) logprob:train/valid[1661,2495,final]=(-0.061,-0.046,-0.045/-0.053,-0.040,-0.040)
+
+# local/chain/compare_wer.sh --online exp/chain_cleaned/tdnn_1a_sp
+# System                      tdnn_1a_sp
+# WER on val(tgsmall)            6.30
+#            [online:]           6.30
+# WER on val(fglarge)            5.96
+#            [online:]           5.97
+# Final train prob              -0.0450
+# Final valid prob              -0.0397
+# Final train prob (xent)       -0.7635
+# Final valid prob (xent)       -0.7196
+# Num-parameters               20699536
+
+# configs for 'chain'
+stage=0
+decode_nj=32
+train_set=train_cleaned
+gmm=tri5b_cleaned
+nnet3_affix=_cleaned
+
+# The rest are configs specific to this script.  Most of the parameters
+# are just hardcoded at this level, in the commands below.
+affix=1a
+tree_affix=
+train_stage=-10
+get_egs_stage=-10
+decode_iter=
+
+# TDNN options
+frames_per_eg=150,110,100
+remove_egs=true
+common_egs_dir=
+xent_regularize=0.1
+dropout_schedule='0,0@0.20,0.5@0.50,0'
+
+test_online_decoding=true  # if true, it will run the last decoding stage.
+
+# End configuration section.
+echo "$0" "$@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+# The iVector-extraction and feature-dumping parts are the same as the standard
+# nnet3 setup, and you can skip them by setting "--stage 11" if you have already
+# run those things.
+
+local/nnet3/run_ivector_common.sh --stage $stage \
+                                  --train-set $train_set \
+                                  --gmm $gmm \
+                                  --num-threads-ubm 6 --num-processes 3 \
+                                  --nnet3-affix "$nnet3_affix" || exit 1;
+
+gmm_dir=exp/$gmm
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+tree_dir=exp/chain${nnet3_affix}/tree_sp${tree_affix:+_$tree_affix}
+lang=data/lang_chain
+lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_sp_lats
+dir=exp/chain${nnet3_affix}/tdnn${affix:+_$affix}_sp
+train_data_dir=data/${train_set}_sp_hires
+lores_train_data_dir=data/${train_set}_sp
+train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+
+# if we are using the speed-perturbed data we need to generate
+# alignments for it.
+
+for f in $gmm_dir/final.mdl $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+    $lores_train_data_dir/feats.scp $ali_dir/ali.1.gz; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+# Please take this as a reference on how to specify all the options of
+# local/chain/run_chain_common.sh
+local/chain/run_chain_common.sh --stage $stage \
+                                --gmm-dir $gmm_dir \
+                                --ali-dir $ali_dir \
+                                --lores-train-data-dir ${lores_train_data_dir} \
+                                --lang $lang \
+                                --lat-dir $lat_dir \
+                                --num-leaves 7000 \
+                                --tree-dir $tree_dir || exit 1;
+
+if [ $stage -le 14 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python)
+  affine_opts="l2-regularize=0.008 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
+  tdnnf_opts="l2-regularize=0.008 dropout-proportion=0.0 bypass-scale=0.75"
+  linear_opts="l2-regularize=0.008 orthonormal-constraint=-1.0"
+  prefinal_opts="l2-regularize=0.008"
+  output_opts="l2-regularize=0.002"
+
+  mkdir -p $dir/configs
+
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=1536
+  tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+  tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+  tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+  tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=0
+  tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf14 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf15 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf16 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf17 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  linear-component name=prefinal-l dim=256 $linear_opts
+
+  prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256
+  output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+  prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+if [ $stage -le 15 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b{09,10,11,12}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir $train_ivector_dir \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.0 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.dir "$common_egs_dir" \
+    --egs.stage $get_egs_stage \
+    --egs.opts "--frames-overlap-per-eg 0 --constrained false" \
+    --egs.chunk-width $frames_per_eg \
+    --trainer.dropout-schedule $dropout_schedule \
+    --trainer.add-option="--optimization.memory-compression-level=2" \
+    --trainer.num-chunk-per-minibatch 64 \
+    --trainer.frames-per-iter 2500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 4 \
+    --trainer.optimization.num-jobs-step 4 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.00015 \
+    --trainer.optimization.final-effective-lrate 0.000015 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs $remove_egs \
+    --feat-dir $train_data_dir \
+    --tree-dir $tree_dir \
+    --lat-dir $lat_dir \
+    --dir $dir  || exit 1;
+
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 16 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 --remove-oov data/lang_test $dir $graph_dir
+fi
+
+iter_opts=
+if [ ! -z $decode_iter ]; then
+  iter_opts=" --iter $decode_iter "
+fi
+
+if [ $stage -le 17 ]; then
+  rm $dir/.error 2>/dev/null || true
+  # shellcheck disable=SC2043
+  for decode_set in val ; do
+      (
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $decode_nj --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_tgsmall || exit 1
+      steps/lmrescore_const_arpa.sh \
+          --cmd "$decode_cmd" data/lang_test data/lang_test \
+          data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_{tgsmall,rescored} || exit 1
+      ) || touch $dir/.error &
+  done
+  wait
+  if [ -f $dir/.error ]; then
+    echo "$0: something went wrong in decoding"
+    exit 1
+  fi
+fi
+
+if $test_online_decoding && [ $stage -le 18 ]; then
+  # note: if the features change (e.g. you add pitch features), you will have to
+  # change the options of the following command line.
+  steps/online/nnet3/prepare_online_decoding.sh \
+       --mfcc-config conf/mfcc_hires.conf \
+       $lang exp/nnet3${nnet3_affix}/extractor $dir ${dir}_online
+
+
+  rm $dir/.error 2>/dev/null || true
+  # shellcheck disable=SC2043
+  for data in val; do
+    (
+      # note: we just give it "data/${data}" as it only uses the wav.scp, the
+      # feature type does not matter.
+      steps/online/nnet3/decode.sh \
+          --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $decode_nj --cmd "$decode_cmd" \
+          $graph_dir data/${data} ${dir}_online/decode_${data}_tgsmall || exit 1
+
+      steps/lmrescore_const_arpa.sh \
+          --cmd "$decode_cmd" data/lang_test data/lang_test  \
+          data/${data} ${dir}_online/decode_${data}_{tgsmall,rescored} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  if [ -f $dir/.error ]; then
+    echo "$0: something went wrong in decoding"
+    exit 1
+  fi
+fi
+
+
+exit 0;
diff --git a/egs/spgispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/spgispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
new file mode 100755
index 00000000000..04b986a431d
--- /dev/null
+++ b/egs/spgispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -0,0 +1,278 @@
+#!/usr/bin/env bash
+set -e
+# config taken from librispeech run_tdnn_lstm_1d.sh
+
+# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm_1a_sp
+# exp/chain_cleaned/tdnn_lstm_1a_sp: num-iters=6237 nj=4..16 num-params=45.3M dim=40+100->6088 combine=-0.033->-0.032 (over 3) xent:train/valid[4153,6236,final]=(-1.29,-0.857,-0.857/-1.31,-0.872,-0.872) logprob:train/valid[4153,6236,final]=(-0.046,-0.032,-0.032/-0.051,-0.035,-0.035)
+
+# local/chain/compare_wer.sh --online exp/chain_cleaned/tdnn_lstm_1a_sp
+# System                      tdnn_lstm_1a_sp
+# WER on val(tgsmall)            6.04
+#            [online:]           5.84
+# WER on val(fglarge)            5.79
+#            [online:]           5.60
+# Final train prob              -0.0324
+# Final valid prob              -0.0353
+# Final train prob (xent)       -0.8566
+# Final valid prob (xent)       -0.8718
+# Num-parameters               45294736
+
+# configs for 'chain'
+stage=14
+decode_nj=32
+train_set=train_cleaned
+gmm=tri5b_cleaned
+nnet3_affix=_cleaned
+
+# The rest are configs specific to this script.  Most of the parameters
+# are just hardcoded at this level, in the commands below.
+affix=1a
+tree_affix=
+train_stage=-10
+get_egs_stage=-10
+decode_iter=
+
+# LSTM training options
+frames_per_chunk=140,100,160
+remove_egs=true
+frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1)
+chunk_left_context=40
+chunk_right_context=0
+self_repair_scale=0.00001
+label_delay=5
+# decode options
+extra_left_context=50
+extra_right_context=0
+
+common_egs_dir=
+xent_regularize=0.025
+dropout_schedule='0,0@0.20,0.3@0.50,0'
+
+test_online_decoding=true  # if true, it will run the last decoding stage.
+
+# End configuration section.
+echo "$0" "$@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+# The iVector-extraction and feature-dumping parts are the same as the standard
+# nnet3 setup, and you can skip them by setting "--stage 11" if you have already
+# run those things.
+
+local/nnet3/run_ivector_common.sh --stage $stage \
+                                  --train-set $train_set \
+                                  --gmm $gmm \
+                                  --num-threads-ubm 6 --num-processes 3 \
+                                  --nnet3-affix "$nnet3_affix" || exit 1;
+
+gmm_dir=exp/$gmm
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+tree_dir=exp/chain${nnet3_affix}/tree_sp${tree_affix:+_$tree_affix}
+lang=data/lang_chain
+lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_sp_lats
+dir=exp/chain${nnet3_affix}/tdnn_lstm${affix:+_$affix}_sp
+train_data_dir=data/${train_set}_sp_hires
+lores_train_data_dir=data/${train_set}_sp
+train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+
+# if we are using the speed-perturbed data we need to generate
+# alignments for it.
+
+for f in $gmm_dir/final.mdl $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+    $lores_train_data_dir/feats.scp $ali_dir/ali.1.gz; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+# Please take this as a reference on how to specify all the options of
+# local/chain/run_chain_common.sh
+local/chain/run_chain_common.sh --stage $stage \
+                                --gmm-dir $gmm_dir \
+                                --ali-dir $ali_dir \
+                                --lores-train-data-dir ${lores_train_data_dir} \
+                                --lang $lang \
+                                --lat-dir $lat_dir \
+                                --num-leaves 7000 \
+                                --tree-dir $tree_dir || exit 1;
+
+if [ $stage -le 14 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python)
+
+  opts="l2-regularize=0.002"
+  linear_opts="orthonormal-constraint=1.0"
+  lstm_opts="l2-regularize=0.0005 decay-time=40"
+  output_opts="l2-regularize=0.0005 output-delay=$label_delay max-change=1.5 dim=$num_targets"
+
+  mkdir -p $dir/configs
+
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 $opts dim=1280
+  linear-component name=tdnn2l dim=320 $linear_opts input=Append(-1,0)
+  relu-batchnorm-layer name=tdnn2 $opts input=Append(0,1) dim=1280
+  linear-component name=tdnn3l dim=320 $linear_opts
+  relu-batchnorm-layer name=tdnn3 $opts dim=1280
+  linear-component name=tdnn4l dim=320 $linear_opts input=Append(-1,0)
+  relu-batchnorm-layer name=tdnn4 $opts input=Append(0,1) dim=1280
+  linear-component name=tdnn5l dim=320 $linear_opts
+  relu-batchnorm-layer name=tdnn5 $opts dim=1280 input=Append(tdnn5l, tdnn3l)
+  linear-component name=tdnn6l dim=320 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn6 $opts input=Append(0,3) dim=1280
+  linear-component name=lstm1l dim=320 $linear_opts input=Append(-3,0)
+  fast-lstmp-layer name=lstm1 cell-dim=1536 recurrent-projection-dim=384 non-recurrent-projection-dim=384 delay=-3 dropout-proportion=0.0 $lstm_opts
+  relu-batchnorm-layer name=tdnn7 $opts input=Append(0,3,tdnn6l,tdnn4l,tdnn2l) dim=1280
+  linear-component name=tdnn8l dim=320 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn8 $opts input=Append(0,3) dim=1280
+  linear-component name=lstm2l dim=320 $linear_opts input=Append(-3,0)
+  fast-lstmp-layer name=lstm2 cell-dim=1536 recurrent-projection-dim=384 non-recurrent-projection-dim=384 delay=-3 dropout-proportion=0.0 $lstm_opts
+  relu-batchnorm-layer name=tdnn9 $opts input=Append(0,3,tdnn8l,tdnn6l,tdnn4l) dim=1280
+  linear-component name=tdnn10l dim=320 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn10 $opts input=Append(0,3) dim=1280
+  linear-component name=lstm3l dim=320 $linear_opts input=Append(-3,0)
+  fast-lstmp-layer name=lstm3 cell-dim=1536 recurrent-projection-dim=384 non-recurrent-projection-dim=384 delay=-3 dropout-proportion=0.0 $lstm_opts
+
+  output-layer name=output input=lstm3  include-log-softmax=false $output_opts
+
+  output-layer name=output-xent input=lstm3 learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+if [ $stage -le 15 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b{09,10,11,12}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir $train_ivector_dir \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.0 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.dir "$common_egs_dir" \
+    --egs.stage $get_egs_stage \
+    --egs.opts "--frames-overlap-per-eg 0 --constrained false" \
+    --egs.chunk-width $frames_per_chunk \
+    --egs.chunk-left-context $chunk_left_context \
+    --egs.chunk-right-context $chunk_right_context \
+    --egs.chunk-left-context-initial 0 \
+    --egs.chunk-right-context-final 0 \
+    --trainer.dropout-schedule $dropout_schedule \
+    --trainer.add-option="--optimization.memory-compression-level=2" \
+    --trainer.num-chunk-per-minibatch 64,32 \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 6 \
+    --trainer.optimization.num-jobs-initial 4 \
+    --trainer.optimization.num-jobs-step 4 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --trainer.optimization.momentum 0.0 \
+    --trainer.deriv-truncate-margin 8 \
+    --cleanup.remove-egs $remove_egs \
+    --feat-dir $train_data_dir \
+    --tree-dir $tree_dir \
+    --lat-dir $lat_dir \
+    --dir $dir  || exit 1;
+
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 16 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 --remove-oov data/lang_test $dir $graph_dir
+fi
+
+iter_opts=
+if [ ! -z $decode_iter ]; then
+  iter_opts=" --iter $decode_iter "
+fi
+
+if [ $stage -le 17 ]; then
+  rm $dir/.error 2>/dev/null || true
+  # shellcheck disable=SC2043
+  for decode_set in val ; do
+      (
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $decode_nj --cmd "$decode_cmd" $iter_opts \
+          --extra-left-context $extra_left_context \
+          --extra-right-context $extra_right_context \
+          --extra-left-context-initial 0 \
+          --extra-right-context-final 0 \
+          --frames-per-chunk "$frames_per_chunk_primary" \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_tgsmall || exit 1
+      steps/lmrescore_const_arpa.sh \
+          --cmd "$decode_cmd" data/lang_test data/lang_test \
+          data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_{tgsmall,rescored} || exit 1
+      ) || touch $dir/.error &
+  done
+  wait
+  if [ -f $dir/.error ]; then
+    echo "$0: something went wrong in decoding"
+    exit 1
+  fi
+fi
+
+if $test_online_decoding && [ $stage -le 18 ]; then
+  # note: if the features change (e.g. you add pitch features), you will have to
+  # change the options of the following command line.
+  steps/online/nnet3/prepare_online_decoding.sh \
+       --mfcc-config conf/mfcc_hires.conf \
+       $lang exp/nnet3${nnet3_affix}/extractor $dir ${dir}_online
+
+
+  rm $dir/.error 2>/dev/null || true
+  # shellcheck disable=SC2043
+  for data in val; do
+    (
+      # note: we just give it "data/${data}" as it only uses the wav.scp, the
+      # feature type does not matter.
+      steps/online/nnet3/decode.sh \
+          --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $decode_nj --cmd "$decode_cmd" \
+          --extra-left-context-initial 0 \
+          --frames-per-chunk "$frames_per_chunk_primary" \
+          $graph_dir data/${data} ${dir}_online/decode_${data}_tgsmall || exit 1
+
+      steps/lmrescore_const_arpa.sh \
+          --cmd "$decode_cmd" data/lang_test data/lang_test  \
+          data/${data} ${dir}_online/decode_${data}_{tgsmall,rescored} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  if [ -f $dir/.error ]; then
+    echo "$0: something went wrong in decoding"
+    exit 1
+  fi
+fi
+
+
+exit 0;
diff --git a/egs/spgispeech/s5/local/g2p.sh b/egs/spgispeech/s5/local/g2p.sh
new file mode 100755
index 00000000000..4b9dcf37ae8
--- /dev/null
+++ b/egs/spgispeech/s5/local/g2p.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+
+# Copyright 2014 Vassil Panayotov
+# Apache 2.0
+
+# Auto-generates pronunciations using Sequitur G2P
+
+. ./path.sh || exit 1
+
+
+if [ $# -ne 3 ]; then
+  echo "Usage: $0 <vocab> <g2p-model-dir> <out-lexicon>"
+  echo "e.g.: $0 data/local/dict/g2p/vocab_autogen.1 /export/a15/vpanayotov/data/g2p data/local/dict/g2p/lexicon_autogen.1"
+  echo ", where:"
+  echo "    <vocab> - input vocabulary, that's words for which we want to generate pronunciations"
+  echo "    <g2p-model-dir> - source directory where g2p model is located"
+  echo "    <out-lexicon> - the output, i.e. the generated pronunciations"
+  exit 1
+fi
+
+vocab=$1
+g2p_model_dir=$2
+out_lexicon=$3
+
+[ ! -f $vocab ] && echo "Can't find the G2P input file: $vocab" && exit 1;
+
+sequitur_model=$g2p_model_dir/model-5
+
+# Turns out, that Sequitur has some sort of bug so it doesn't output pronunciations
+# for some (admittedly peculiar) words. We manually specify these exceptions below
+g2p_exceptions="hh hh" # more such entries can be added, separated by "\n"
+
+[ ! -f $sequitur_model ] && echo "Can't find the Sequitur model file: $sequitur_model" && exit 1
+set -x
+g2p.py \
+  --model=$sequitur_model --apply $vocab | awk '{print tolower($0);}' \
+  >${out_lexicon}.tmp || exit 1
+
+
+awk 'NR==FNR{p[$1]=$0; next;} {if ($1 in p) print p[$1]; else print}' \
+  <(echo -e $g2p_exceptions) ${out_lexicon}.tmp >$out_lexicon || exit 1
+
+rm ${out_lexicon}.tmp
+
+exit 0
diff --git a/egs/spgispeech/s5/local/g2p/train_g2p.sh b/egs/spgispeech/s5/local/g2p/train_g2p.sh
new file mode 100755
index 00000000000..28fa350ddba
--- /dev/null
+++ b/egs/spgispeech/s5/local/g2p/train_g2p.sh
@@ -0,0 +1,88 @@
+#!/usr/bin/env bash
+
+# Copyright 2014 Vassil Panayotov
+# Apache 2.0
+
+# Trains Sequitur G2P models on CMUdict
+
+# can be used to skip some of the initial steps
+stage=1
+
+. utils/parse_options.sh || exit 1
+. ./path.sh || exit 1
+
+if [ $# -ne "2" ]; then
+  echo "Usage: $0 <cmudict-download-dir> <g2p-dir>"
+  echo "e.g.: $0 data/local/dict/cmudict data/local/g2p_model"
+  exit 1
+fi
+
+cmudict_dir=$1
+g2p_dir=$2
+
+mkdir -p $cmudict_dir
+mkdir -p $g2p_dir
+
+cmudict_plain=$g2p_dir/cmudict.0.7a.plain
+cmudict_clean=$g2p_dir/cmudict.0.7a.clean
+
+if [ $stage -le 2 ]; then
+  echo "Removing the pronunciation variant markers ..."
+  grep -v ';;;' $cmudict_dir/cmudict.0.7a | \
+    perl -ane 'if(!m:^;;;:){ s:(\S+)\(\d+\) :$1 :; print; }' \
+    > $cmudict_plain || exit 1;
+  echo "Removing special pronunciations(not helpful for G2P modelling)..."
+  #egrep -v '^[^A-Z]' $cmudict_plain >$cmudict_clean
+  grep -E  -v '^[^A-Z]' $cmudict_plain >$cmudict_clean
+fi
+
+model_1=$g2p_dir/model-1
+sequitur=g2p.py
+if [ $stage -le 3 ]; then
+  echo "Training first-order G2P model (log in '$g2p_dir/model-1.log') ..."
+  $sequitur \
+    --train $cmudict_clean --devel 5% --write-model $model_1 >$g2p_dir/model-1.log 2>&1 || exit 1
+fi
+
+model_2=$g2p_dir/model-2
+
+if [ $stage -le 4 ]; then
+  echo "Training second-order G2P model (log in '$g2p_dir/model-2.log') ..."
+  PYTHONPATH=$sequitur_path:$PYTHONPATH $PYTHON $sequitur \
+    --model $model_1 --ramp-up --train $cmudict_clean \
+    --devel 5% --write-model $model_2 >$g2p_dir/model-2.log \
+    >$g2p_dir/model-2.log 2>&1 || exit 1
+fi
+
+model_3=$g2p_dir/model-3
+
+if [ $stage -le 5 ]; then
+  echo "Training third-order G2P model (log in '$g2p_dir/model-3.log') ..."
+  PYTHONPATH=$sequitur_path:$PYTHONPATH $PYTHON $sequitur \
+    --model $model_2 --ramp-up --train $cmudict_clean \
+    --devel 5% --write-model $model_3 \
+    >$g2p_dir/model-3.log 2>&1 || exit 1
+fi
+
+model_4=$g2p_dir/model-4
+
+if [ $stage -le 4 ]; then
+  echo "Training fourth-order G2P model (log in '$g2p_dir/model-4.log') ..."
+  PYTHONPATH=$sequitur_path:$PYTHONPATH $PYTHON $sequitur \
+    --model $model_3 --ramp-up --train $cmudict_clean \
+    --devel 5% --write-model $model_4 \
+    >$g2p_dir/model-4.log 2>&1 || exit 1
+fi
+
+model_5=$g2p_dir/model-5
+
+if [ $stage -le 5 ]; then
+  echo "Training fifth-order G2P model (log in '$g2p_dir/model-5.log') ..."
+  PYTHONPATH=$sequitur_path:$PYTHONPATH $PYTHON $sequitur \
+    --model $model_4 --ramp-up --train $cmudict_clean \
+    --devel 5% --write-model $model_5 \
+    >$g2p_dir/model-5.log 2>&1 || exit 1
+fi
+
+echo "G2P training finished OK!"
+exit 0
diff --git a/egs/spgispeech/s5/local/nnet3/run_ivector_common.sh b/egs/spgispeech/s5/local/nnet3/run_ivector_common.sh
new file mode 100755
index 00000000000..595054bb6c7
--- /dev/null
+++ b/egs/spgispeech/s5/local/nnet3/run_ivector_common.sh
@@ -0,0 +1,153 @@
+#!/usr/bin/env bash
+
+set -e -o pipefail
+
+
+# This script is called from local/nnet3/run_tdnn.sh and local/chain/run_tdnn.sh (and may eventually
+# be called by more scripts).  It contains the common feature preparation and iVector-related parts
+# of the script.  See those scripts for examples of usage.
+
+
+stage=0
+train_set=train_960_cleaned    # you might set this to e.g. train_960
+gmm=tri6b_cleaned         # This specifies a GMM-dir from the features of the type you're training the system on;
+                         # it should contain alignments for 'train_set'.
+num_threads_ubm=16
+num_processes=4
+nnet3_affix=_cleaned     # affix for exp/nnet3 directory to put iVector stuff in, so it
+                         # becomes exp/nnet3_cleaned or whatever.
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+gmm_dir=exp/${gmm}
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+
+for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do
+  if [ ! -f $f ]; then
+    echo "$0: expected file $f to exist"
+    exit 1
+  fi
+done
+
+if [ $stage -le 1 ]; then
+  #Although the nnet will be trained by high resolution data, we still have to
+  # perturb the normal data to get the alignment.  _sp stands for speed-perturbed
+  echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)"
+  utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp
+  echo "$0: making MFCC features for low-resolution speed-perturbed data"
+  steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 data/${train_set}_sp || exit 1;
+  steps/compute_cmvn_stats.sh data/${train_set}_sp || exit 1;
+  echo "$0: fixing input data-dir to remove nonexistent features, in case some "
+  echo ".. speed-perturbed segments were too short."
+  utils/fix_data_dir.sh data/${train_set}_sp
+fi
+
+if [ $stage -le 2 ]; then
+  if [ -f $ali_dir/ali.1.gz ]; then
+    echo "$0: alignments in $ali_dir appear to already exist.  Please either remove them "
+    echo " ... or use a later --stage option."
+    exit 1
+  fi
+  echo "$0: aligning with the perturbed low-resolution data"
+  steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \
+    data/${train_set}_sp data/lang $gmm_dir $ali_dir || exit 1
+fi
+
+if [ $stage -le 3 ]; then
+  # Create high-resolution MFCC features (with 40 cepstra instead of 13).
+  # this shows how you can split across multiple file-systems.  we'll split the
+  # MFCC dir across multiple locations.  You might want to be careful here, if you
+  # have multiple copies of Kaldi checked out and run the same recipe, not to let
+  # them overwrite each other.
+  echo "$0: creating high-resolution MFCC features"
+  mfccdir=data/${train_set}_sp_hires/data
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+    utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/mfcc/librispeech-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage
+  fi
+
+  for datadir in ${train_set}_sp ; do
+    utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+  done
+
+  # do volume-perturbation on the training data prior to extracting hires
+  # features; this helps make trained nnets more invariant to test data volume.
+  utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires
+
+  for datadir in ${train_set}_sp ; do
+    steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \
+      --cmd "$train_cmd" data/${datadir}_hires || exit 1;
+    steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1;
+    utils/fix_data_dir.sh data/${datadir}_hires
+  done
+
+  # now create a data subset.  60k is 1/5th of the training dataset (around 200 hours).
+  utils/subset_data_dir.sh data/${train_set}_sp_hires 60000 data/${train_set}_sp_hires_60k
+fi
+
+
+if [ $stage -le 4 ]; then
+  echo "$0: making a subset of data to train the diagonal UBM and the PCA transform."
+  # We'll one hundredth of the data, since Librispeech is very large.
+  mkdir -p exp/nnet3${nnet3_affix}/diag_ubm
+  temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm
+
+  num_utts_total=$(wc -l <data/${train_set}_sp_hires/utt2spk)
+  num_utts=$(($num_utts_total/100))
+  utils/data/subset_data_dir.sh data/${train_set}_sp_hires \
+     $num_utts ${temp_data_root}/${train_set}_sp_hires_subset
+
+  echo "$0: computing a PCA transform from the hires data."
+  steps/online/nnet2/get_pca_transform.sh --cmd "$train_cmd" \
+      --splice-opts "--left-context=3 --right-context=3" \
+      --max-utts 10000 --subsample 2 \
+       ${temp_data_root}/${train_set}_sp_hires_subset \
+       exp/nnet3${nnet3_affix}/pca_transform
+
+  echo "$0: training the diagonal UBM."
+  # Use 512 Gaussians in the UBM.
+  steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 \
+    --num-frames 700000 \
+    --num-threads $num_threads_ubm \
+    ${temp_data_root}/${train_set}_sp_hires_subset 512 \
+    exp/nnet3${nnet3_affix}/pca_transform exp/nnet3${nnet3_affix}/diag_ubm
+fi
+
+
+if [ $stage -le 5 ]; then
+  # iVector extractors can in general be sensitive to the amount of data, but
+  # this one has a fairly small dim (defaults to 100) so we don't use all of it,
+  # we use just the 60k subset (about one fifth of the data, or 200 hours).
+  echo "$0: training the iVector extractor"
+  steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 --num-processes $num_processes \
+    data/${train_set}_sp_hires_60k exp/nnet3${nnet3_affix}/diag_ubm exp/nnet3${nnet3_affix}/extractor || exit 1;
+fi
+
+if [ $stage -le 6 ]; then
+  echo "$0: extracting iVectors for training data"
+  ivectordir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $ivectordir/storage ]; then
+    utils/create_split_dir.pl /export/b{09,10,11,12}/$USER/kaldi-data/ivectors/librispeech-$(date +'%m_%d_%H_%M')/s5/$ivectordir/storage $ivectordir/storage
+  fi
+  # We extract iVectors on the speed-perturbed training data after combining
+  # short segments, which will be what we train the system on.  With
+  # --utts-per-spk-max 2, the script pairs the utterances into twos, and treats
+  # each of these pairs as one speaker. this gives more diversity in iVectors..
+  # Note that these are extracted 'online'.
+
+  # having a larger number of speakers is helpful for generalization, and to
+  # handle per-utterance decoding well (iVector starts at zero).
+  utils/data/modify_speaker_info.sh --utts-per-spk-max 2 \
+    data/${train_set}_sp_hires ${ivectordir}/${train_set}_sp_hires_max2
+
+  steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 60 \
+    ${ivectordir}/${train_set}_sp_hires_max2 exp/nnet3${nnet3_affix}/extractor \
+    $ivectordir || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+  echo "$0: extracting iVectors for dev and test data"
+fi
+
+exit 0;
diff --git a/egs/spgispeech/s5/local/prepare_dict.sh b/egs/spgispeech/s5/local/prepare_dict.sh
new file mode 100755
index 00000000000..1ab7080e1ce
--- /dev/null
+++ b/egs/spgispeech/s5/local/prepare_dict.sh
@@ -0,0 +1,121 @@
+#!/usr/bin/env bash
+
+# Copyright 2014 Vassil Panayotov
+# Apache 2.0
+
+# Prepares the dictionary and auto-generates the pronunciations for the words,
+# that are in our vocabulary but not in CMUdict
+
+stage=0
+nj=16 # number of parallel Sequitur G2P jobs, we would like to use
+cmd=run.pl
+
+
+. utils/parse_options.sh || exit 1;
+. ./path.sh || exit 1
+
+
+if [ $# -ne 3 ]; then
+  echo "Usage: $0 [options] <lm-dir> <g2p-model-dir> <dst-dir>"
+  echo "e.g.: /export/a15/vpanayotov/data/lm /export/a15/vpanayotov/data/g2p data/local/dict"
+  echo "Options:"
+  echo "  --cmd '<command>'    # script to launch jobs with, default: run.pl"
+  echo "  --nj <nj>            # number of jobs to run, default: 4."
+  exit 1
+fi
+
+vocab=$1
+g2p_model_dir=$2
+dst_dir=$3
+
+[ ! -f $vocab ] && echo "$0: vocabulary file not found at $vocab" && exit 1;
+
+# this file is either a copy of the lexicon we download from openslr.org/11 or is
+# created by the G2P steps below
+lexicon_raw_nosil=$dst_dir/lexicon_raw_nosil.txt
+
+cmudict_plain=data/local/g2p//cmudict.0.7a.plain
+
+mkdir -p $dst_dir || exit 1;
+
+if [ $stage -le 1 ]; then
+
+  g2p_dir=$dst_dir/g2p
+  auto_vocab_prefix="$g2p_dir/vocab_autogen"
+  auto_lexicon_prefix="$g2p_dir/lexicon_autogen"
+
+  mkdir -p $g2p_dir/log
+  auto_vocab_splits=$(eval "echo $auto_vocab_prefix.{$(seq -s',' $nj | sed 's/,$//')}")
+  awk 'NR==FNR{a[$1] = 1; next} !(toupper($1) in a)' $cmudict_plain $vocab |\
+    sort | awk '{print toupper($0)}' | tee $g2p_dir/vocab_autogen.full |\
+    utils/split_scp.pl /dev/stdin $auto_vocab_splits || exit 1
+
+  echo "Autogenerating pronunciations for the words in $auto_vocab_prefix.* ..."
+  $cmd JOB=1:$nj $g2p_dir/log/g2p.JOB.log \
+    local/g2p.sh  $auto_vocab_prefix.JOB $g2p_model_dir $auto_lexicon_prefix.JOB || exit 1
+
+  g2p_vocab_size=$(wc -l <$g2p_dir/vocab_autogen.full)
+  g2p_lex_size=$(wc -l < <(cat $auto_lexicon_prefix.*))
+  [[ "$g2p_vocab_size" -eq "$g2p_lex_size" ]] || { echo "Unexpected G2P error"; exit 1; }
+  sort <(cat $auto_vocab_prefix.*) >$dst_dir/vocab_autogen.txt
+  sort <(cat $auto_lexicon_prefix.*) >$dst_dir/lexicon_autogen.txt
+  echo "$(wc -l <$g2p_dir/vocab_autogen.full) pronunciations autogenerated OK"
+fi
+
+if [ $stage -le 2 ]; then
+  echo "Combining the CMUdict pronunciations with the autogenerated ones ..."
+  awk 'NR==FNR{a[$1]=1; next} (tolower($1) in a)' $vocab $cmudict_plain |\
+    cat - $dst_dir/lexicon_autogen.txt | sort | awk '{print tolower($0);}'  >$lexicon_raw_nosil || exit 1
+
+  raw_lex_size=$(cat $lexicon_raw_nosil | awk '{print $1}' | sort -u | wc -l)
+  vocab_size=$(wc -l <$vocab)
+
+  [[ "$vocab_size" -eq "$raw_lex_size" ]] || {
+    echo "Inconsistent lexicon($raw_lex_size) vs vocabulary($vocab_size) size!";
+    exit 1; }
+
+  echo "Combined lexicon saved to '$lexicon_raw_nosil'"
+fi
+
+
+if [ $stage -le 3 ]; then
+  silence_phones=$dst_dir/silence_phones.txt
+  optional_silence=$dst_dir/optional_silence.txt
+  nonsil_phones=$dst_dir/nonsilence_phones.txt
+  extra_questions=$dst_dir/extra_questions.txt
+
+  echo "Preparing phone lists and clustering questions"
+  (echo SIL; echo SPN;) > $silence_phones
+  echo SIL > $optional_silence
+
+  # nonsilence phones; on each line is a list of phones that correspond
+  # really to the same base phone.
+  awk '{for (i=2; i<=NF; ++i) { print $i; gsub(/[0-9]/, "", $i); print $i}}' $lexicon_raw_nosil |\
+    sort -u |\
+    perl -e 'while(<>){
+      chop; m:^([^\d]+)(\d*)$: || die "Bad phone $_";
+      $phones_of{$1} .= "$_ "; }
+      foreach $list (values %phones_of) {print $list . "\n"; } ' | sort \
+        > $nonsil_phones || exit 1;
+
+    # A few extra questions that will be added to those obtained by automatically clustering
+    # the "real" phones.  These ask about stress; there's also one for silence.
+    cat $silence_phones| awk '{printf("%s ", $1);} END{printf "\n";}' > $extra_questions || exit 1;
+
+    cat $nonsil_phones | perl -e 'while(<>){ foreach $p (split(" ", $_)) {
+    $p =~ m:^([^\d]+)(\d*)$: || die "Bad phone $_"; $q{$2} .= "$p "; } } foreach $l (values %q) {print "$l\n";}' \
+      >> $extra_questions || exit 1;
+
+    echo "$(wc -l <$silence_phones) silence phones saved to: $silence_phones"
+    echo "$(wc -l <$optional_silence) optional silence saved to: $optional_silence"
+    echo "$(wc -l <$nonsil_phones) non-silence phones saved to: $nonsil_phones"
+    echo "$(wc -l <$extra_questions) extra triphone clustering-related questions saved to: $extra_questions"
+fi
+
+if [ $stage -le 4 ]; then
+  (echo '!SIL SIL'; echo '<SPOKEN_NOISE> SPN'; echo '<UNK> SPN'; ) |\
+    cat - $lexicon_raw_nosil | sort | uniq >$dst_dir/lexicon.txt
+      echo "Lexicon text file saved as: $dst_dir/lexicon.txt"
+fi
+
+exit 0
diff --git a/egs/spgispeech/s5/local/run_cleanup_segmentation.sh b/egs/spgispeech/s5/local/run_cleanup_segmentation.sh
new file mode 100755
index 00000000000..59424b66463
--- /dev/null
+++ b/egs/spgispeech/s5/local/run_cleanup_segmentation.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+
+# Copyright 2016  Vimal Manohar
+#           2016  Yiming Wang
+#           2016  Johns Hopkins University (author: Daniel Povey)
+# Apache 2.0
+
+# This script demonstrates how to re-segment training data selecting only the
+# "good" audio that matches the transcripts.
+# The basic idea is to decode with an existing in-domain acoustic model, and a
+# biased language model built from the reference, and then work out the
+# segmentation from a ctm like file.
+
+# For nnet3 and chain results after cleanup, see the scripts in
+# local/nnet3/run_tdnn.sh and local/chain/run_tdnn_6z.sh
+
+# GMM Results for speaker-independent (SI) and speaker adaptive training (SAT) systems on dev and test sets
+# [will add these later].
+
+set -e
+set -o pipefail
+set -u
+
+stage=0
+cleanup_stage=0
+data=data/train
+cleanup_affix=cleaned
+srcdir=exp/tri5b
+nj=100
+decode_nj=16
+decode_num_threads=4
+
+. ./path.sh
+. ./cmd.sh
+. ./utils/parse_options.sh
+
+cleaned_data=${data}_${cleanup_affix}
+
+dir=${srcdir}_${cleanup_affix}_work
+cleaned_dir=${srcdir}_${cleanup_affix}
+
+if [ $stage -le 1 ]; then
+  # This does the actual data cleanup.
+  steps/cleanup/clean_and_segment_data.sh --stage $cleanup_stage --nj $nj --cmd "$train_cmd" \
+    $data data/lang $srcdir $dir $cleaned_data
+fi
+
+if [ $stage -le 2 ]; then
+  steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \
+    $cleaned_data data/lang $srcdir ${srcdir}_ali_${cleanup_affix}
+fi
+
+if [ $stage -le 3 ]; then
+  steps/train_sat.sh --cmd "$train_cmd" \
+    7000 150000 $cleaned_data data/lang ${srcdir}_ali_${cleanup_affix} ${cleaned_dir}
+fi
+
+
+exit 0;
diff --git a/egs/spgispeech/s5/local/score.sh b/egs/spgispeech/s5/local/score.sh
new file mode 120000
index 00000000000..0afefc3158c
--- /dev/null
+++ b/egs/spgispeech/s5/local/score.sh
@@ -0,0 +1 @@
+../steps/score_kaldi.sh
\ No newline at end of file
diff --git a/egs/spgispeech/s5/local/train_lms_srilm.sh b/egs/spgispeech/s5/local/train_lms_srilm.sh
new file mode 100755
index 00000000000..fa7c3644f32
--- /dev/null
+++ b/egs/spgispeech/s5/local/train_lms_srilm.sh
@@ -0,0 +1,265 @@
+#!/usr/bin/env bash
+export LC_ALL=C
+
+words_file=
+train_text=
+dev_text=
+oov_symbol="<UNK>"
+
+echo "$0" "$@"
+
+. ./utils/parse_options.sh || exit 1
+
+[ -f path.sh ]  && . ./path.sh
+
+echo "-------------------------------------"
+echo "Building an SRILM language model     "
+echo "-------------------------------------"
+
+if [ $# -ne 2 ] ; then
+  echo "Incorrect number of parameters. "
+  echo "Script has to be called like this:"
+  echo "  $0 [switches] <datadir> <tgtdir>"
+  echo "For example: "
+  echo "  $0 data data/srilm"
+  echo "The allowed switches are: "
+  echo "    words_file=<word_file|>        word list file -- data/lang/words.txt by default"
+  echo "    train_text=<train_text|>       data/train/text is used in case when not specified"
+  echo "    dev_text=<dev_text|>           last 10 % of the train text is used by default"
+  echo "    oov_symbol=<unk_sumbol|<UNK>>  symbol to use for oov modeling -- <UNK> by default"
+  exit 1
+fi
+
+datadir=$1
+tgtdir=$2
+outlm=lm.gz
+
+
+##End of configuration
+loc=$(which ngram-count);
+if [ -z $loc ]; then
+  if uname -a | grep 64 >/dev/null; then # some kind of 64 bit...
+    sdir=$PWD/../../../tools/srilm/bin/i686-m64
+  else
+    sdir=$PWD/../../../tools/srilm/bin/i686
+  fi
+  if [ -f $sdir/ngram-count ]; then
+    echo Using SRILM tools from $sdir
+    export PATH=$PATH:$sdir
+  else
+    echo You appear to not have SRILM tools installed, either on your path,
+    echo or installed in $sdir.  See tools/install_srilm.sh for installation
+    echo instructions.
+    exit 1
+  fi
+fi
+
+# Prepare the destination directory
+mkdir -p $tgtdir
+
+for f in $train_text $dev_text; do
+  [ ! -s $f ] && echo "No such file $f" && exit 1;
+done
+
+if [ ! -z "$train_text" ] && [ -z "$dev_text" ] ; then
+  nr=$(cat  $train_text | wc -l)
+  nr_dev=$(($nr / 10 ))
+  nr_train=$(( $nr - $nr_dev ))
+  orig_train_text=$train_text
+  head -n $nr_train $train_text > $tgtdir/train_text
+  tail -n $nr_dev $train_text > $tgtdir/dev_text
+
+  train_text=$tgtdir/train_text
+  dev_text=$tgtdir/dev_text
+  echo "Using train text: 9/10 of $orig_train_text"
+  echo "Using dev text  : 1/10 of $orig_train_text"
+elif [ ! -z "$train_text" ] && [ ! -z "$dev_text" ] ; then
+  echo "Using train text: $train_text"
+  echo "Using dev text  : $dev_text"
+  #train_text=$train_text
+  #dev_text=$dev_text
+else
+  train_text=$datadir/train/text
+  dev_text=$datadir/dev2h/text
+  echo "Using train text: $train_text"
+  echo "Using dev text  : $dev_text"
+fi
+
+if [ "$words_file" == "auto" ]; then
+  sed 's/ /\n/g'  $train_text | sort -u | sed '/^ *$/d' > $tgtdir/words.txt
+  words_file=$tgtdir/words.txt
+else
+  [ -z $words_file ] && words_file=$datadir/lang/words.txt
+fi
+echo "Using words file: $words_file"
+
+for f in $words_file $train_text $dev_text; do
+  [ ! -s $f ] && echo "No such file $f" && exit 1;
+done
+
+
+# Extract the word list from the training dictionary; exclude special symbols
+sort $words_file | awk '{print $1}' | grep -v '\#0' | grep -v '<eps>' | grep -v -F "$oov_symbol" > $tgtdir/vocab
+# shellcheck disable=SC2181
+if (($?)); then
+  echo "Failed to create vocab from $words_file"
+  exit 1
+else
+  # wc vocab # doesn't work due to some encoding issues
+  echo vocab contains $(cat $tgtdir/vocab | perl -ne 'BEGIN{$l=$w=0;}{split; $w+=$#_; $w++; $l++;}END{print "$l lines, $w words\n";}')
+fi
+
+# Kaldi transcript files contain Utterance_ID as the first word; remove it
+cat $train_text | cut -f2- -d' ' > $tgtdir/train.txt
+# shellcheck disable=SC2181
+if (($?)); then
+    echo "Failed to create $tgtdir/train.txt from $train_text"
+    exit 1
+else
+    echo "Removed first word (uid) from every line of $train_text"
+    # wc text.train train.txt # doesn't work due to some encoding issues
+    echo $train_text contains $(cat $train_text | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $w--; $s++;}END{print "$w words, $s sentences\n";}')
+    echo train.txt contains $(cat $tgtdir/train.txt | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $s++;}END{print "$w words, $s sentences\n";}')
+fi
+
+# Kaldi transcript files contain Utterance_ID as the first word; remove it
+cat $dev_text | cut -f2- -d' ' > $tgtdir/dev.txt
+# shellcheck disable=SC2181
+if (($?)); then
+    echo "Failed to create $tgtdir/dev.txt from $dev_text"
+    exit 1
+else
+    echo "Removed first word (uid) from every line of $dev_text"
+    # wc text.train train.txt # doesn't work due to some encoding issues
+    echo $dev_text contains $(cat $dev_text | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $w--; $s++;}END{print "$w words, $s sentences\n";}')
+    echo $tgtdir/dev.txt contains $(cat $tgtdir/dev.txt | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F;  $s++;}END{print "$w words, $s sentences\n";}')
+fi
+
+echo "-------------------"
+echo "Good-Turing 2grams"
+echo "-------------------"
+ngram-count -lm $tgtdir/2gram.gt01.gz -gt1min 0 -gt2min 1 -order 2 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/2gram.gt02.gz -gt1min 0 -gt2min 2 -order 2 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+
+echo "-------------------"
+echo "Kneser-Ney 2grams"
+echo "-------------------"
+ngram-count -lm $tgtdir/2gram.kn01.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -order 2 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/2gram.kn02.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -order 2 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+
+echo "-------------------"
+echo "Good-Turing 3grams"
+echo "-------------------"
+ngram-count -lm $tgtdir/3gram.gt011.gz -gt1min 0 -gt2min 1 -gt3min 1 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.gt012.gz -gt1min 0 -gt2min 1 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.gt022.gz -gt1min 0 -gt2min 2 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.gt023.gz -gt1min 0 -gt2min 2 -gt3min 3 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+
+echo "-------------------"
+echo "Kneser-Ney 3grams"
+echo "-------------------"
+ngram-count -lm $tgtdir/3gram.kn011.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.kn012.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.kn022.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.kn023.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 3 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+
+
+echo "-------------------"
+echo "Good-Turing 4grams"
+echo "-------------------"
+ngram-count -lm $tgtdir/4gram.gt0111.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 1 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.gt0112.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.gt0122.gz -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.gt0123.gz -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.gt0113.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.gt0222.gz -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.gt0223.gz -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+
+echo "-------------------"
+echo "Kneser-Ney 4grams"
+echo "-------------------"
+ngram-count -lm $tgtdir/4gram.kn0111.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 1 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.kn0112.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.kn0113.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.kn0122.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.kn0123.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.kn0222.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.kn0223.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+
+if [ ! -z ${LIBLBFGS} ]; then
+  #please not that if the switch -map-unk "$oov_symbol" is used with -maxent-convert-to-arpa, ngram-count will segfault
+  #instead of that, we simply output the model in the maxent format and convert it using the "ngram"
+  echo "-------------------"
+  echo "Maxent 2grams"
+  echo "-------------------"
+  sed 's/'${oov_symbol}'/<unk>/g' $tgtdir/train.txt | \
+    ngram-count -lm - -order 2 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\
+    sed 's/<unk>/'${oov_symbol}'/g' | gzip -c > $tgtdir/2gram.me.gz || exit 1
+
+  echo "-------------------"
+  echo "Maxent 3grams"
+  echo "-------------------"
+  sed 's/'${oov_symbol}'/<unk>/g' $tgtdir/train.txt | \
+    ngram-count -lm - -order 3 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\
+    sed 's/<unk>/'${oov_symbol}'/g' | gzip -c > $tgtdir/3gram.me.gz || exit 1
+
+  echo "-------------------"
+  echo "Maxent 4grams"
+  echo "-------------------"
+  sed 's/'${oov_symbol}'/<unk>/g' $tgtdir/train.txt | \
+    ngram-count -lm - -order 4 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\
+    sed 's/<unk>/'${oov_symbol}'/g' | gzip -c > $tgtdir/4gram.me.gz || exit 1
+
+  echo "-------------------"
+  echo "Maxent 5grams"
+  echo "-------------------"
+  sed 's/'${oov_symbol}'/<unk>/g' $tgtdir/train.txt | \
+    ngram-count -lm - -order 5 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\
+    sed 's/<unk>/'${oov_symbol}'/g' | gzip -c > $tgtdir/5gram.me.gz || exit 1
+
+  echo "-------------------"
+  echo "Maxent 6grams"
+  echo "-------------------"
+  sed 's/'${oov_symbol}'/<unk>/g' $tgtdir/train.txt | \
+    ngram-count -lm - -order 6 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\
+    sed 's/<unk>/'${oov_symbol}'/g' | gzip -c > $tgtdir/6gram.me.gz || exit 1
+
+  echo "-------------------"
+  echo "Maxent 7grams"
+  echo "-------------------"
+  sed 's/'${oov_symbol}'/<unk>/g' $tgtdir/train.txt | \
+    ngram-count -lm - -order 7 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\
+    sed 's/<unk>/'${oov_symbol}'/g' | gzip -c > $tgtdir/7gram.me.gz || exit 1
+
+fi
+
+
+echo "--------------------"
+echo "Computing perplexity"
+echo "--------------------"
+(
+  for f in $tgtdir/2gram* ; do ( echo $f; ngram -order 2 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' - ; done
+  for f in $tgtdir/3gram* ; do ( echo $f; ngram -order 3 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' - ; done
+  for f in $tgtdir/4gram* ; do ( echo $f; ngram -order 4 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' - ; done
+  for f in $tgtdir/5gram* ; do ( echo $f; ngram -order 5 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' - ; done
+  for f in $tgtdir/6gram* ; do ( echo $f; ngram -order 6 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' - ; done
+  for f in $tgtdir/7gram* ; do ( echo $f; ngram -order 7 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' - ; done
+)  | sort  -r -n -k 15,15g | column -t | tee $tgtdir/perplexities.txt
+
+echo "The perlexity scores report is stored in $tgtdir/perplexities.txt "
+
+#This will link the lowest perplexity LM as the output LM.
+#ln -sf $tgtdir/$(head -n 1 $tgtdir/perplexities.txt | cut -f 1 -d ' ') $outlm
+
+#A slight modification of the previous approach:
+#We look at the two lowest perplexity LMs and use a 3gram LM if one of the two, even if the 4gram is of lower ppl
+nof_trigram_lm=$(head -n 2 $tgtdir/perplexities.txt | grep 3gram | wc -l)
+if [[ $nof_trigram_lm -eq 0 ]] ; then
+  lmfilename=$(head -n 1 $tgtdir/perplexities.txt | cut -f 1 -d ' ')
+elif [[ $nof_trigram_lm -eq 2 ]] ; then
+  lmfilename=$(head -n 1 $tgtdir/perplexities.txt | cut -f 1 -d ' ')
+else  #exactly one 3gram LM
+  lmfilename=$(head -n 2 $tgtdir/perplexities.txt | grep 3gram | cut -f 1 -d ' ')
+fi
+(cd $tgtdir; ln -sf $(basename $lmfilename) $outlm )
+
diff --git a/egs/spgispeech/s5/path.sh b/egs/spgispeech/s5/path.sh
new file mode 100755
index 00000000000..233e1dea4ea
--- /dev/null
+++ b/egs/spgispeech/s5/path.sh
@@ -0,0 +1,9 @@
+export KALDI_ROOT=$PWD/../../..
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+. $KALDI_ROOT/tools/env.sh
+
+export LC_ALL=C
+
+
diff --git a/egs/spgispeech/s5/run.sh b/egs/spgispeech/s5/run.sh
new file mode 100755
index 00000000000..fe493b8d337
--- /dev/null
+++ b/egs/spgispeech/s5/run.sh
@@ -0,0 +1,131 @@
+#!/usr/bin/env bash
+
+CORPUS=/pool/corpora/spgispeech/
+
+stage=1
+
+. ./cmd.sh
+. ./path.sh
+. parse_options.sh
+
+set -e -o pipefail
+
+if [ $stage -le 1 ] ; then
+  lhotse prepare spgispeech $CORPUS ./
+fi
+
+if [ $stage -le 2 ] ; then
+  lhotse kaldi export spgispeech_recordings_val.jsonl.gz spgispeech_supervisions_val.jsonl.gz  data/val
+  utils/fix_data_dir.sh data/val
+  utils/validate_data_dir.sh --no-feats data/val
+  lhotse kaldi export spgispeech_recordings_train.jsonl.gz spgispeech_supervisions_train.jsonl.gz  data/train
+  utils/fix_data_dir.sh data/train
+  utils/validate_data_dir.sh --no-feats data/train
+fi
+
+mfccdir=./mfcc
+if [ $stage -le 3 ] ; then
+  for part in val train ; do
+    steps/make_mfcc.sh --cmd "$train_cmd" --nj 40 data/$part exp/make_mfcc/$part $mfccdir
+    utils/fix_data_dir.sh data/$part
+    steps/compute_cmvn_stats.sh data/$part exp/make_mfcc/$part $mfccdir
+    utils/fix_data_dir.sh data/$part
+    utils/validate_data_dir.sh data/$part
+  done
+fi
+
+mkdir -p data/local/dict
+if [ $stage -le 4 ] ; then
+  cut -d ' ' -f 2- data/train/text  | sed 's/ /\n/g' | sort -u | grep -v  '[0-9]' > data/local/wordlist.txt
+  wget http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7a -o $cmudict_dir/
+  local/g2p/train_g2p.sh --cmd "$train_cmd" data/local/ data/local/g2p
+  local/prepare_dict.sh  --cmd "$train_cmd" data/local/wordlist.txt data/local/g2p data/local/dict
+
+fi
+
+if [ $stage -le 5 ] ; then
+  utils/prepare_lang.sh data/local/dict \
+    "<UNK>" data/local/lang_tmp data/lang
+fi
+
+if [ $stage -le 6 ] ; then
+  local/train_lms_srilm.sh --train-text data/train/text  data data/srilm
+  utils/format_lm.sh data/lang data/srilm/3gram.me.gz  data/local/dict/lexicon.txt data/lang_test
+  utils/build_const_arpa_lm.sh data/srilm/4gram.me.gz data/lang_test data/lang_test
+fi
+
+if [ $stage -le 7 ]; then
+  # Make some small data subsets for early system-build stages.  Note, there are 29k
+  # utterances in the train_clean_100 directory which has 100 hours of data.
+  # For the monophone stages we select the shortest utterances, which should make it
+  # easier to align the data from a flat start.
+
+  utils/subset_data_dir.sh --shortest data/train 2000 data/train_2kshort
+  utils/subset_data_dir.sh data/train 5000 data/train_5k
+  utils/subset_data_dir.sh data/train 10000 data/train_10k
+fi
+
+if [ $stage -le 8 ]; then
+  # train a monophone system
+  steps/train_mono.sh --boost-silence 1.25 --nj 20 --cmd "$train_cmd" \
+    data/train_2kshort data/lang exp/mono
+fi
+
+if [ $stage -le 9 ]; then
+  steps/align_si.sh --boost-silence 1.25 --nj 10 --cmd "$train_cmd" \
+    data/train_5k data/lang exp/mono exp/mono_ali_5k
+
+  # train a first delta + delta-delta triphone system on a subset of 5000 utterances
+  steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \
+    2000 10000 data/train_5k data/lang exp/mono_ali_5k exp/tri1
+fi
+
+if [ $stage -le 10 ]; then
+  steps/align_si.sh --nj 10 --cmd "$train_cmd" \
+    data/train_10k data/lang exp/tri1 exp/tri1_ali_10k
+
+
+  # train an LDA+MLLT system.
+  steps/train_lda_mllt.sh --cmd "$train_cmd" \
+    --splice-opts "--left-context=3 --right-context=3" 2500 15000 \
+    data/train_10k data/lang exp/tri1_ali_10k exp/tri2b
+fi
+
+if [ $stage -le 11 ]; then
+  # Align a 10k utts subset using the tri2b model
+  steps/align_si.sh  --nj 10 --cmd "$train_cmd" --use-graphs true \
+    data/train_10k data/lang exp/tri2b exp/tri2b_ali_10k
+
+  # Train tri3b, which is LDA+MLLT+SAT on 10k utts
+  steps/train_sat.sh --cmd "$train_cmd" 2500 15000 \
+    data/train_10k data/lang exp/tri2b_ali_10k exp/tri3b
+
+fi
+
+if [ $stage -le 12 ]; then
+  # align the entire train_clean_100 subset using the tri3b model
+  steps/align_fmllr.sh --nj 20 --cmd "$train_cmd" \
+    data/train data/lang \
+    exp/tri3b exp/tri3b_ali
+
+  # train another LDA+MLLT+SAT system on the entire 100 hour subset
+  steps/train_sat.sh  --cmd "$train_cmd" 4200 40000 \
+    data/train data/lang \
+    exp/tri3b_ali exp/tri4b
+fi
+
+if [ $stage -le 13 ]; then
+  # align the new, combined set, using the tri4b model
+  steps/align_fmllr.sh --nj 40 --cmd "$train_cmd" \
+    data/train data/lang exp/tri4b exp/tri4b_ali
+
+  # create a larger SAT model, trained on the 460 hours of data.
+  steps/train_sat.sh  --cmd "$train_cmd" 5000 100000 \
+    data/train data/lang exp/tri4b_ali exp/tri5b
+fi
+
+## you can continue by
+#./local/run_cleanup_segmentation.sh
+#./local/chain/run_tdnn.sh
+#./local/chain/run_tdnn_lstm.sh --stage 14
+#
diff --git a/egs/spgispeech/s5/steps b/egs/spgispeech/s5/steps
new file mode 120000
index 00000000000..6e99bf5b5ad
--- /dev/null
+++ b/egs/spgispeech/s5/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps
\ No newline at end of file
diff --git a/egs/spgispeech/s5/utils b/egs/spgispeech/s5/utils
new file mode 120000
index 00000000000..b240885218f
--- /dev/null
+++ b/egs/spgispeech/s5/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils
\ No newline at end of file

From dd97e1ba35aa016141627c155adadce6edce38b6 Mon Sep 17 00:00:00 2001
From: "Jan \"yenda\" Trmal" <jtrmal@gmail.com>
Date: Tue, 16 Aug 2022 03:14:09 -0400
Subject: [PATCH 03/18] Replace TravisCI with github actions (#4776)

* Github action for kaldi
* add ccache
* remove spaces
---
 .github/workflows/c-cpp.yml | 38 +++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 .github/workflows/c-cpp.yml

diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml
new file mode 100644
index 00000000000..c1f923cf58a
--- /dev/null
+++ b/.github/workflows/c-cpp.yml
@@ -0,0 +1,38 @@
+name: C/C++ CI
+
+on:
+  push:
+    branches: [ "master" ]
+  pull_request:
+    branches: [ "master" ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    env:
+      CCACHE_DIR: /home/runner/work/kaldi/kaldi/.ccache
+      CXX: "ccache g++"
+      CC: "ccache gcc"
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Install sox
+      run: sudo apt-get install -y sox intel-mkl
+    - name: ccache
+      uses: hendrikmuhs/ccache-action@v1.2
+      with:
+        verbose: 1
+        max-size: 3G
+    - name: make tools
+      run: cd tools && make -j3
+    - name: ccache stats
+      run: ccache -s
+    - name: configure
+      run: cd src && ./configure --shared
+    - name: make depend
+      run: cd src && make clean && make depend
+    - name: make
+      run: cd src &&  make -j 3
+    - name: make test
+      run: cd src && make test

From e786efa39e40777944aa88dbf181b818f79152c2 Mon Sep 17 00:00:00 2001
From: "Jan \"yenda\" Trmal" <jtrmal@gmail.com>
Date: Tue, 16 Aug 2022 09:18:49 -0400
Subject: [PATCH 04/18] Windows conda fixes (#4777)

* Fix missing cblas and lapack external symbols for netlib

* Remaining Conda-forge changes

* Fix indent

* usleep using C+11 constructs

* make codefactor happy

Co-authored-by: Michael McAuliffe <michael.e.mcauliffe@gmail.com>
---
 CMakeLists.txt                                | 25 +++++++++++++------
 cmake/gen_cmake_skeleton.py                   | 14 +++++------
 cmake/third_party/openfst.cmake               |  1 +
 ...hed-threaded-nnet3-cuda-online-pipeline.cc | 10 ++++++++
 4 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ff24df4340d..886af19c693 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -87,7 +87,7 @@ if(CONDA_ROOT)
     # https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/kaldi-blas.h#L95-L113
     add_definitions(-DHAVE_OPENBLAS=1)
     if(MSVC)
-        include_directories($ENV{LIBRARY_INC}/)
+        link_libraries(cblas lapack)
         # necessary macros to compile on windows, from here:
         # https://icl.cs.utk.edu/lapack-for-windows/lapack/
         add_definitions(-DADD_)
@@ -146,6 +146,17 @@ if(MSVC)
     # some warnings related with fst
     add_compile_options(/wd4018 /wd4244 /wd4267 /wd4291 /wd4305)
 
+    set(CompilerFlags
+        CMAKE_CXX_FLAGS
+        CMAKE_CXX_FLAGS_DEBUG
+        CMAKE_CXX_FLAGS_RELEASE
+        CMAKE_C_FLAGS
+        CMAKE_C_FLAGS_DEBUG
+        CMAKE_C_FLAGS_RELEASE
+        )
+    foreach(CompilerFlag ${CompilerFlags})
+      string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
+    endforeach()
     set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE INTERNAL "")
     if(NOT DEFINED ENV{CUDAHOSTCXX})
         set(ENV{CUDAHOSTCXX} ${CMAKE_CXX_COMPILER})
@@ -159,23 +170,21 @@ find_package(CUDAToolkit)
 find_package(CUDA)
 if(CUDA_FOUND)
     set(CUDA_PROPAGATE_HOST_FLAGS ON)
-    set(KALDI_CUDA_NVCC_FLAGS "--default-stream=per-thread;-std=c++${CMAKE_CXX_STANDARD}")
     if(MSVC)
+        set(KALDI_CUDA_NVCC_FLAGS "--default-stream=per-thread") # Fixes incompatibility with cxx14 and cxx17 for Kaldi vs cuda in VS2019
         list(APPEND KALDI_CUDA_NVCC_FLAGS "-Xcompiler /permissive-,/FS,/wd4819,/EHsc,/bigobj")
         list(APPEND KALDI_CUDA_NVCC_FLAGS "-Xcompiler /wd4018,/wd4244,/wd4267,/wd4291,/wd4305")
-        if(BUILD_SHARED_LIBS)
-            list(APPEND CUDA_NVCC_FLAGS_RELEASE -Xcompiler /MD)
-            list(APPEND CUDA_NVCC_FLAGS_DEBUG -Xcompiler /MDd)
-        endif()
+        list(APPEND CUDA_NVCC_FLAGS_RELEASE -Xcompiler /MD) # Kaldi will always be dynamically linked to Cuda
+        list(APPEND CUDA_NVCC_FLAGS_DEBUG -Xcompiler /MDd)
     else()
     #     list(APPEND KALDI_CUDA_NVCC_FLAGS "-Xcompiler -std=c++${CMAKE_CXX_STANDARD}")
         list(APPEND KALDI_CUDA_NVCC_FLAGS "-Xcompiler -fPIC")
+        set(KALDI_CUDA_NVCC_FLAGS "--default-stream=per-thread;-std=c++${CMAKE_CXX_STANDARD}")
     endif()
     set(CUDA_NVCC_FLAGS ${KALDI_CUDA_NVCC_FLAGS} ${CUDA_NVCC_FLAGS})
 
     add_definitions(-DHAVE_CUDA=1)
     add_definitions(-DCUDA_API_PER_THREAD_DEFAULT_STREAM=1)
-    include_directories(${CUDA_INCLUDE_DIRS})
     link_libraries(
         ${CUDA_LIBRARIES}
         ${CUDA_CUDA_LIBRARY}
@@ -244,8 +253,8 @@ add_subdirectory(src/chain)
 add_subdirectory(src/ivector)
 if(NOT MSVC)
     add_subdirectory(src/online)
-    add_subdirectory(src/online2)
 endif()
+add_subdirectory(src/online2)
 add_subdirectory(src/kws)
 
 add_subdirectory(src/itf)
diff --git a/cmake/gen_cmake_skeleton.py b/cmake/gen_cmake_skeleton.py
index fa9c5159938..81c4be88e91 100644
--- a/cmake/gen_cmake_skeleton.py
+++ b/cmake/gen_cmake_skeleton.py
@@ -143,10 +143,10 @@ def get_exe_additional_depends(t):
         "generate-proxy-keywords": ["fstext"],
         "transcripts-to-fsts": ["fstext"],
     }
-    l = []
+    libs = []
     for pattern in additional.keys():
         if fnmatch.fnmatch(t, pattern):
-            l.extend(list(map(lambda name: lib_dir_name_to_lib_target(name), additional[pattern])))
+            libs.extend(list(map(lambda name: lib_dir_name_to_lib_target(name), additional[pattern])))
     return sorted(list(set(l)))
 
 def disable_for_win32(t):
@@ -194,7 +194,7 @@ def gen_code(self):
         ret.append("""
 install(TARGETS {tgt} EXPORT kaldi-targets)
 
-install(FILES ${{PUBLIC_HEADERS}} DESTINATION include/kaldi/{dir})
+install(FILES ${{PUBLIC_HEADERS}} DESTINATION include/kaldi/{dir} COMPONENT kaldi)
 """.format(tgt=self.target_name, dir=self.dir_name))
 
         return "\n".join(ret)
@@ -233,8 +233,8 @@ def load_dependency_from_makefile(self, filename):
                 return
             libs = makefile.split("ADDLIBS")[-1].split("\n\n")[0]
             libs = re.findall("[^\s\\\\=]+", libs)
-            for l in libs:
-                self.depends.append(os.path.splitext(os.path.basename(l))[0])
+            for lib in libs:
+                self.depends.append(os.path.splitext(os.path.basename(lib))[0])
 
     def gen_code(self):
         ret = []
@@ -249,7 +249,7 @@ def gen_code(self):
             self.source_list.append("${CUDA_OBJS}")
             ret.append("if(CUDA_FOUND)")
             ret.append("    cuda_include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..)")
-            ret.append("    cuda_compile(CUDA_OBJS")
+            ret.append("    cuda_compile(CUDA_OBJS SHARED")
             for f in self.cuda_source_list:
                 ret.append("        " + f)
             ret.append("    )")
@@ -296,7 +296,7 @@ def get_test_exe_name(filename):
     LIBRARY DESTINATION ${{CMAKE_INSTALL_LIBDIR}} COMPONENT kaldi
     RUNTIME DESTINATION ${{CMAKE_INSTALL_BINDIR}} COMPONENT kaldi
 )
-install(FILES ${{PUBLIC_HEADERS}} DESTINATION include/kaldi/{dir})
+install(FILES ${{PUBLIC_HEADERS}} DESTINATION include/kaldi/{dir} COMPONENT kaldi)
 """.format(tgt=self.target_name, dir=self.dir_name))
 
         return "\n".join(ret)
diff --git a/cmake/third_party/openfst.cmake b/cmake/third_party/openfst.cmake
index 26d25baef00..f9fff8a5445 100644
--- a/cmake/third_party/openfst.cmake
+++ b/cmake/third_party/openfst.cmake
@@ -54,6 +54,7 @@ if(NOT openfst_POPULATED)
             )
 
             install(DIRECTORY ${openfst_SOURCE_DIR}/src/include/ DESTINATION include/
+                    COMPONENT kaldi
                     FILES_MATCHING PATTERN "*.h")
 
             install(TARGETS fst
diff --git a/src/cudadecoder/batched-threaded-nnet3-cuda-online-pipeline.cc b/src/cudadecoder/batched-threaded-nnet3-cuda-online-pipeline.cc
index 65e2f152ec5..6e78d7212fd 100644
--- a/src/cudadecoder/batched-threaded-nnet3-cuda-online-pipeline.cc
+++ b/src/cudadecoder/batched-threaded-nnet3-cuda-online-pipeline.cc
@@ -26,12 +26,22 @@
 #include <mutex>
 #include <numeric>
 #include <tuple>
+#include <chrono>
 
 #include "cudamatrix/cu-common.h"
 #include "feat/feature-window.h"
 #include "lat/lattice-functions.h"
 #include "nnet3/nnet-utils.h"
 
+// Necessary for supporting online2 libary on Windows
+#ifdef _WIN32
+  #include <windows.h>
+
+  void usleep(__int64 usec)
+  {
+    std::this_thread::sleep_for(std::chrono::microseconds(usec));
+  }
+#endif
 namespace kaldi {
 namespace cuda_decoder {
 

From b8f30acc8134f55cc064781c00880f73a64f3c75 Mon Sep 17 00:00:00 2001
From: "Jan \"yenda\" Trmal" <jtrmal@gmail.com>
Date: Tue, 16 Aug 2022 10:01:14 -0400
Subject: [PATCH 05/18] use last segment of the CXX as the ccbin compiler for
 cuda (#4778)

---
 src/makefiles/cuda_64bit.mk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/makefiles/cuda_64bit.mk b/src/makefiles/cuda_64bit.mk
index 62c25213d0a..8785371b3e1 100644
--- a/src/makefiles/cuda_64bit.mk
+++ b/src/makefiles/cuda_64bit.mk
@@ -9,7 +9,7 @@ CXXFLAGS += -DHAVE_CUDA -I$(CUDATKDIR)/include -fPIC -pthread -isystem $(OPENFST
 
 CUDA_INCLUDE= -I$(CUDATKDIR)/include -I$(CUBROOT) -I.. -isystem $(OPENFSTINC)
 CUDA_FLAGS = --compiler-options -fPIC --machine 64 -DHAVE_CUDA \
-             -ccbin $(CXX) -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \
+             -ccbin $(lastword $(CXX)) -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \
              -std=c++14 -DCUDA_API_PER_THREAD_DEFAULT_STREAM -lineinfo \
              --verbose -Wno-deprecated-gpu-targets
 

From 6c86e03e56c43eb4102149c787d6ec0945aa46f2 Mon Sep 17 00:00:00 2001
From: Michael McAuliffe <michael.e.mcauliffe@gmail.com>
Date: Tue, 16 Aug 2022 23:25:52 -0700
Subject: [PATCH 06/18] Fix typo in gen_cmake_skeleton (#4779)

---
 cmake/gen_cmake_skeleton.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/gen_cmake_skeleton.py b/cmake/gen_cmake_skeleton.py
index 81c4be88e91..5925c6369a8 100644
--- a/cmake/gen_cmake_skeleton.py
+++ b/cmake/gen_cmake_skeleton.py
@@ -147,7 +147,7 @@ def get_exe_additional_depends(t):
     for pattern in additional.keys():
         if fnmatch.fnmatch(t, pattern):
             libs.extend(list(map(lambda name: lib_dir_name_to_lib_target(name), additional[pattern])))
-    return sorted(list(set(l)))
+    return sorted(list(set(libs)))
 
 def disable_for_win32(t):
     disabled = [

From ae2efea0ff01a8b1ecfe01b70160649e86a64083 Mon Sep 17 00:00:00 2001
From: Sourya Kakarla <sk5057@columbia.edu>
Date: Wed, 17 Aug 2022 02:40:03 -0400
Subject: [PATCH 07/18] [egs] Set PYTHONUNBUFFERED=1 in all recipes (#4770)

* Set PYTHONUNBUFFERED=TRUE for wsj,tedlium

To force stdout, stderr to be unbuffered for python scripts.
Without this setting parts of the stream might be lost in a few cases.
Relevant mainly for python versions <3.7.

* Set PYTHONUNBUFFERED=TRUE for all recipes

Add to path.sh in all except the already updated wsj, tedlium.
To force stdout, stderr to be unbuffered for python scripts.
Relevant mainly for python versions <3.7.

* Set PYTHONUNBUFFERED=TRUE in remaining path.sh

These files were missed by mistake in earlier commits.

* Use 1 instead of TRUE for PYTHONUNBUFFERED
---
 egs/aidatatang_200zh/s5/path.sh         | 1 +
 egs/aishell/s5/path.sh                  | 1 +
 egs/aishell/v1/path.sh                  | 1 +
 egs/aishell2/s5/path.sh                 | 1 +
 egs/ami/s5/path.sh                      | 1 +
 egs/ami/s5b/path.sh                     | 1 +
 egs/ami/s5c/path.sh                     | 1 +
 egs/an4/s5/path.sh                      | 1 +
 egs/apiai_decode/s5/path.sh             | 1 +
 egs/aspire/s5/path.sh                   | 1 +
 egs/aurora4/s5/path.sh                  | 1 +
 egs/babel/s5/path.sh                    | 1 +
 egs/babel/s5b/path.sh                   | 1 +
 egs/babel/s5c/path.sh                   | 1 +
 egs/babel/s5d/path.sh                   | 1 +
 egs/babel_multilang/s5/path.sh          | 1 +
 egs/bentham/v1/path.sh                  | 1 +
 egs/bn_music_speech/v1/path.sh          | 1 +
 egs/callhome_diarization/v1/path.sh     | 1 +
 egs/callhome_diarization/v2/path.sh     | 1 +
 egs/callhome_egyptian/s5/path.sh        | 1 +
 egs/casia_hwdb/v1/path.sh               | 1 +
 egs/chime1/s5/path.sh                   | 1 +
 egs/chime2/s5/path.sh                   | 1 +
 egs/chime3/s5/path.sh                   | 1 +
 egs/chime4/s5_1ch/path.sh               | 1 +
 egs/chime4/s5_2ch/path.sh               | 1 +
 egs/chime4/s5_6ch/path.sh               | 1 +
 egs/chime5/s5/path.sh                   | 1 +
 egs/chime5/s5b/path.sh                  | 1 +
 egs/chime6/s5_track1/path.sh            | 1 +
 egs/chime6/s5_track2/path.sh            | 1 +
 egs/chime6/s5b_track1/path.sh           | 1 +
 egs/chime6/s5b_track2/path.sh           | 1 +
 egs/chime6/s5c_track2/path.sh           | 1 +
 egs/cifar/v1/path.sh                    | 1 +
 egs/cl_english/v1/path.sh               | 1 +
 egs/cmu_cslu_kids/s5/path.sh            | 1 +
 egs/cnceleb/v1/path.sh                  | 1 +
 egs/cnceleb/v2/path.sh                  | 1 +
 egs/commonvoice/s5/path.sh              | 1 +
 egs/csj/s5/path.sh                      | 1 +
 egs/dihard_2018/v1/path.sh              | 1 +
 egs/dihard_2018/v2/path.sh              | 1 +
 egs/fame/s5/path.sh                     | 1 +
 egs/fame/v1/path.sh                     | 1 +
 egs/fame/v2/path.sh                     | 1 +
 egs/farsdat/s5/path.sh                  | 1 +
 egs/fisher_callhome_spanish/s5/path.sh  | 1 +
 egs/fisher_english/s5/path.sh           | 1 +
 egs/fisher_swbd/s5/path.sh              | 1 +
 egs/formosa/s5/path.sh                  | 1 +
 egs/gale_arabic/s5/path.sh              | 1 +
 egs/gale_arabic/s5b/path.sh             | 1 +
 egs/gale_arabic/s5c/path.sh             | 1 +
 egs/gale_arabic/s5d/path.sh             | 1 +
 egs/gale_mandarin/s5/path.sh            | 1 +
 egs/gigaspeech/s5/path.sh               | 1 +
 egs/gop_speechocean762/s5/path.sh       | 1 +
 egs/gp/s1/path.sh                       | 1 +
 egs/gp/s5/path.sh                       | 1 +
 egs/heroico/s5/path.sh                  | 1 +
 egs/hi_mia/v1/path.sh                   | 1 +
 egs/hi_mia/w1/path.sh                   | 1 +
 egs/hkust/s5/path.sh                    | 1 +
 egs/hub4_english/s5/path.sh             | 1 +
 egs/hub4_spanish/s5/path.sh             | 1 +
 egs/iam/v1/path.sh                      | 1 +
 egs/iam/v2/path.sh                      | 1 +
 egs/iban/s5/path.sh                     | 1 +
 egs/icsi/s5/path.sh                     | 1 +
 egs/ifnenit/v1/path.sh                  | 1 +
 egs/libri_css/s5_css/path.sh            | 1 +
 egs/libri_css/s5_mono/path.sh           | 1 +
 egs/librispeech/s5/fairseq_ltlm/path.sh | 1 +
 egs/librispeech/s5/path.sh              | 1 +
 egs/lre/v1/path.sh                      | 1 +
 egs/lre07/v1/path.sh                    | 1 +
 egs/lre07/v2/path.sh                    | 1 +
 egs/madcat_ar/v1/path.sh                | 1 +
 egs/madcat_zh/v1/path.sh                | 1 +
 egs/malach/s5/path.sh                   | 1 +
 egs/mandarin_bn_bc/s5/path.sh           | 1 +
 egs/material/s5/path.sh                 | 1 +
 egs/mgb2_arabic/s5/path.sh              | 1 +
 egs/mgb5/s5/path.sh                     | 1 +
 egs/mini_librispeech/s5/path.sh         | 1 +
 egs/mobvoi/v1/path.sh                   | 1 +
 egs/mobvoihotwords/v1/path.sh           | 1 +
 egs/multi_cn/s5/path.sh                 | 1 +
 egs/multi_en/s5/path.sh                 | 1 +
 egs/nsc/s5/path.sh                      | 1 +
 egs/opensat20/s5/path.sh                | 1 +
 egs/ptb/s5/path.sh                      | 1 +
 egs/reverb/s5/path.sh                   | 1 +
 egs/rimes/v1/path.sh                    | 1 +
 egs/rm/s5/path.sh                       | 1 +
 egs/sad_rats/s5/path.sh                 | 1 +
 egs/sitw/v1/path.sh                     | 1 +
 egs/sitw/v2/path.sh                     | 1 +
 egs/snips/v1/path.sh                    | 1 +
 egs/spanish_dimex100/s5/path.sh         | 1 +
 egs/sprakbanken/s5/path.sh              | 1 +
 egs/sprakbanken_swe/s5/path.sh          | 1 +
 egs/sre08/v1/path.sh                    | 1 +
 egs/sre10/v1/path.sh                    | 1 +
 egs/sre10/v2/path.sh                    | 1 +
 egs/sre16/v1/path.sh                    | 1 +
 egs/sre16/v2/path.sh                    | 1 +
 egs/svhn/v1/path.sh                     | 1 +
 egs/swahili/s5/path.sh                  | 1 +
 egs/swbd/s5/path.sh                     | 1 +
 egs/swbd/s5b/path.sh                    | 1 +
 egs/swbd/s5c/path.sh                    | 1 +
 egs/tedlium/s5/path.sh                  | 1 +
 egs/tedlium/s5_r2/path.sh               | 1 +
 egs/tedlium/s5_r2_wsj/path.sh           | 1 +
 egs/tedlium/s5_r3/path.sh               | 1 +
 egs/thchs30/s5/path.sh                  | 1 +
 egs/tidigits/s5/path.sh                 | 1 +
 egs/timit/s5/path.sh                    | 1 +
 egs/tunisian_msa/s5/path.sh             | 1 +
 egs/uw3/v1/path.sh                      | 1 +
 egs/voxceleb/v1/path.sh                 | 1 +
 egs/voxceleb/v2/path.sh                 | 1 +
 egs/voxforge/s5/path.sh                 | 1 +
 egs/vystadial_cz/online_demo/path.sh    | 1 +
 egs/vystadial_cz/s5/path.sh             | 1 +
 egs/vystadial_cz/s5b/path.sh            | 1 +
 egs/vystadial_en/s5/path.sh             | 1 +
 egs/wenetspeech/s5/path.sh              | 1 +
 egs/wsj/s5/path.sh                      | 1 +
 egs/yesno/s5/path.sh                    | 1 +
 egs/yomdle_fa/v1/path.sh                | 1 +
 egs/yomdle_korean/v1/path.sh            | 1 +
 egs/yomdle_russian/v1/path.sh           | 1 +
 egs/yomdle_tamil/v1/path.sh             | 1 +
 egs/yomdle_zh/v1/path.sh                | 1 +
 egs/zeroth_korean/s5/path.sh            | 1 +
 139 files changed, 139 insertions(+)

diff --git a/egs/aidatatang_200zh/s5/path.sh b/egs/aidatatang_200zh/s5/path.sh
index 2d17b17a84a..ed0dc115a2c 100644
--- a/egs/aidatatang_200zh/s5/path.sh
+++ b/egs/aidatatang_200zh/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/aishell/s5/path.sh b/egs/aishell/s5/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/aishell/s5/path.sh
+++ b/egs/aishell/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/aishell/v1/path.sh b/egs/aishell/v1/path.sh
index e50f57c5271..e66dce74837 100755
--- a/egs/aishell/v1/path.sh
+++ b/egs/aishell/v1/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/aishell2/s5/path.sh b/egs/aishell2/s5/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/aishell2/s5/path.sh
+++ b/egs/aishell2/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/ami/s5/path.sh b/egs/ami/s5/path.sh
index b89788b7c73..c940a4d5f0b 100644
--- a/egs/ami/s5/path.sh
+++ b/egs/ami/s5/path.sh
@@ -11,4 +11,5 @@ SRILM=$KALDI_ROOT/tools/srilm/bin/i686-m64
 BEAMFORMIT=$KALDI_ROOT/tools/BeamformIt
 
 export PATH=$PATH:$LMBIN:$BEAMFORMIT:$SRILM
+export PYTHONUNBUFFERED=1
 
diff --git a/egs/ami/s5b/path.sh b/egs/ami/s5b/path.sh
index ad2c93b309b..d16860dde42 100644
--- a/egs/ami/s5b/path.sh
+++ b/egs/ami/s5b/path.sh
@@ -10,4 +10,5 @@ SRILM=$KALDI_ROOT/tools/srilm/bin/i686-m64
 BEAMFORMIT=$KALDI_ROOT/tools/BeamformIt
 
 export PATH=$PATH:$LMBIN:$BEAMFORMIT:$SRILM
+export PYTHONUNBUFFERED=1
 
diff --git a/egs/ami/s5c/path.sh b/egs/ami/s5c/path.sh
index ae38e737c1f..79ea30b200a 100755
--- a/egs/ami/s5c/path.sh
+++ b/egs/ami/s5c/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/an4/s5/path.sh b/egs/an4/s5/path.sh
index 1bea0e69779..a7c6294a65e 100755
--- a/egs/an4/s5/path.sh
+++ b/egs/an4/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 . $KALDI_ROOT/tools/config/common_path.sh
 
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/apiai_decode/s5/path.sh b/egs/apiai_decode/s5/path.sh
index 8b177b18ab2..999155d9677 100755
--- a/egs/apiai_decode/s5/path.sh
+++ b/egs/apiai_decode/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/aspire/s5/path.sh b/egs/aspire/s5/path.sh
index 7fb6d91c543..39889776ef7 100755
--- a/egs/aspire/s5/path.sh
+++ b/egs/aspire/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 . $KALDI_ROOT/tools/config/common_path.sh
 export PATH=$KALDI_ROOT/tools/sctk/bin:$PATH
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/aurora4/s5/path.sh b/egs/aurora4/s5/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/aurora4/s5/path.sh
+++ b/egs/aurora4/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/babel/s5/path.sh b/egs/babel/s5/path.sh
index a45a39d1f6a..11ef300c80f 100755
--- a/egs/babel/s5/path.sh
+++ b/egs/babel/s5/path.sh
@@ -3,3 +3,4 @@ export KALDI_ROOT=`pwd`/../../..
 . /export/babel/data/software/env.sh
 export PATH=$PWD/utils/:$KALDI_ROOT/tools/sph2pipe_v2.5/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lmbin/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/babel/s5b/path.sh b/egs/babel/s5b/path.sh
index 2d7dba09015..a6cd089a0a4 100755
--- a/egs/babel/s5b/path.sh
+++ b/egs/babel/s5b/path.sh
@@ -2,3 +2,4 @@ export KALDI_ROOT=`pwd`/../../..
 . /export/babel/data/software/env.sh
 export PATH=$PWD/utils/:$KALDI_ROOT/tools/sph2pipe_v2.5/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lmbin/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/babel/s5c/path.sh b/egs/babel/s5c/path.sh
index 2d7dba09015..a6cd089a0a4 100755
--- a/egs/babel/s5c/path.sh
+++ b/egs/babel/s5c/path.sh
@@ -2,3 +2,4 @@ export KALDI_ROOT=`pwd`/../../..
 . /export/babel/data/software/env.sh
 export PATH=$PWD/utils/:$KALDI_ROOT/tools/sph2pipe_v2.5/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lmbin/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/babel/s5d/path.sh b/egs/babel/s5d/path.sh
index 800a6bbf9ba..6147a025341 100755
--- a/egs/babel/s5d/path.sh
+++ b/egs/babel/s5d/path.sh
@@ -10,3 +10,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 . $KALDI_ROOT/tools/config/common_path.sh
 
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/babel_multilang/s5/path.sh b/egs/babel_multilang/s5/path.sh
index 1bea0e69779..a7c6294a65e 100755
--- a/egs/babel_multilang/s5/path.sh
+++ b/egs/babel_multilang/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 . $KALDI_ROOT/tools/config/common_path.sh
 
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/bentham/v1/path.sh b/egs/bentham/v1/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/bentham/v1/path.sh
+++ b/egs/bentham/v1/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/bn_music_speech/v1/path.sh b/egs/bn_music_speech/v1/path.sh
index e50f57c5271..e66dce74837 100755
--- a/egs/bn_music_speech/v1/path.sh
+++ b/egs/bn_music_speech/v1/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/callhome_diarization/v1/path.sh b/egs/callhome_diarization/v1/path.sh
index 851c14e27c3..3c2db0dc017 100755
--- a/egs/callhome_diarization/v1/path.sh
+++ b/egs/callhome_diarization/v1/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/callhome_diarization/v2/path.sh b/egs/callhome_diarization/v2/path.sh
index 851c14e27c3..3c2db0dc017 100755
--- a/egs/callhome_diarization/v2/path.sh
+++ b/egs/callhome_diarization/v2/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/callhome_egyptian/s5/path.sh b/egs/callhome_egyptian/s5/path.sh
index 1a6fb5f891b..80c74d959dc 100755
--- a/egs/callhome_egyptian/s5/path.sh
+++ b/egs/callhome_egyptian/s5/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/casia_hwdb/v1/path.sh b/egs/casia_hwdb/v1/path.sh
index 2d17b17a84a..ed0dc115a2c 100644
--- a/egs/casia_hwdb/v1/path.sh
+++ b/egs/casia_hwdb/v1/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/chime1/s5/path.sh b/egs/chime1/s5/path.sh
index 1a6fb5f891b..80c74d959dc 100755
--- a/egs/chime1/s5/path.sh
+++ b/egs/chime1/s5/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/chime2/s5/path.sh b/egs/chime2/s5/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/chime2/s5/path.sh
+++ b/egs/chime2/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/chime3/s5/path.sh b/egs/chime3/s5/path.sh
index a4772b7d89d..7048fb3b11c 100755
--- a/egs/chime3/s5/path.sh
+++ b/egs/chime3/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/irstlm/b
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/chime4/s5_1ch/path.sh b/egs/chime4/s5_1ch/path.sh
index e013af13469..b521c7d42ff 100755
--- a/egs/chime4/s5_1ch/path.sh
+++ b/egs/chime4/s5_1ch/path.sh
@@ -5,3 +5,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/chime4/s5_2ch/path.sh b/egs/chime4/s5_2ch/path.sh
index e013af13469..b521c7d42ff 100755
--- a/egs/chime4/s5_2ch/path.sh
+++ b/egs/chime4/s5_2ch/path.sh
@@ -5,3 +5,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/chime4/s5_6ch/path.sh b/egs/chime4/s5_6ch/path.sh
index e013af13469..b521c7d42ff 100755
--- a/egs/chime4/s5_6ch/path.sh
+++ b/egs/chime4/s5_6ch/path.sh
@@ -5,3 +5,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/chime5/s5/path.sh b/egs/chime5/s5/path.sh
index fb1c0489386..6bc42f5ac1c 100644
--- a/egs/chime5/s5/path.sh
+++ b/egs/chime5/s5/path.sh
@@ -4,4 +4,5 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
 
diff --git a/egs/chime5/s5b/path.sh b/egs/chime5/s5b/path.sh
index fb1c0489386..6bc42f5ac1c 100644
--- a/egs/chime5/s5b/path.sh
+++ b/egs/chime5/s5b/path.sh
@@ -4,4 +4,5 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
 
diff --git a/egs/chime6/s5_track1/path.sh b/egs/chime6/s5_track1/path.sh
index fb1c0489386..6bc42f5ac1c 100644
--- a/egs/chime6/s5_track1/path.sh
+++ b/egs/chime6/s5_track1/path.sh
@@ -4,4 +4,5 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
 
diff --git a/egs/chime6/s5_track2/path.sh b/egs/chime6/s5_track2/path.sh
index 2f4e4e4fb21..10e135bbf6c 100644
--- a/egs/chime6/s5_track2/path.sh
+++ b/egs/chime6/s5_track2/path.sh
@@ -6,4 +6,5 @@ export PYTHONPATH="${PYTHONPATH}:$PWD/dscore"
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
 
diff --git a/egs/chime6/s5b_track1/path.sh b/egs/chime6/s5b_track1/path.sh
index fb1c0489386..6bc42f5ac1c 100644
--- a/egs/chime6/s5b_track1/path.sh
+++ b/egs/chime6/s5b_track1/path.sh
@@ -4,4 +4,5 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
 
diff --git a/egs/chime6/s5b_track2/path.sh b/egs/chime6/s5b_track2/path.sh
index 2f4e4e4fb21..10e135bbf6c 100644
--- a/egs/chime6/s5b_track2/path.sh
+++ b/egs/chime6/s5b_track2/path.sh
@@ -6,4 +6,5 @@ export PYTHONPATH="${PYTHONPATH}:$PWD/dscore"
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
 
diff --git a/egs/chime6/s5c_track2/path.sh b/egs/chime6/s5c_track2/path.sh
index 2f4e4e4fb21..10e135bbf6c 100644
--- a/egs/chime6/s5c_track2/path.sh
+++ b/egs/chime6/s5c_track2/path.sh
@@ -6,4 +6,5 @@ export PYTHONPATH="${PYTHONPATH}:$PWD/dscore"
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
 
diff --git a/egs/cifar/v1/path.sh b/egs/cifar/v1/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/cifar/v1/path.sh
+++ b/egs/cifar/v1/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/cl_english/v1/path.sh b/egs/cl_english/v1/path.sh
index f75d631c628..836e74e9ceb 100755
--- a/egs/cl_english/v1/path.sh
+++ b/egs/cl_english/v1/path.sh
@@ -26,3 +26,4 @@ fi
 if [ -f $KALDI_ROOT/tools/env.sh ]; then
   . $KALDI_ROOT/tools/env.sh
 fi
+export PYTHONUNBUFFERED=1
diff --git a/egs/cmu_cslu_kids/s5/path.sh b/egs/cmu_cslu_kids/s5/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/cmu_cslu_kids/s5/path.sh
+++ b/egs/cmu_cslu_kids/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/cnceleb/v1/path.sh b/egs/cnceleb/v1/path.sh
index e50f57c5271..e66dce74837 100755
--- a/egs/cnceleb/v1/path.sh
+++ b/egs/cnceleb/v1/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/cnceleb/v2/path.sh b/egs/cnceleb/v2/path.sh
index e50f57c5271..e66dce74837 100755
--- a/egs/cnceleb/v2/path.sh
+++ b/egs/cnceleb/v2/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/commonvoice/s5/path.sh b/egs/commonvoice/s5/path.sh
index 705600ad47a..28587970a43 100644
--- a/egs/commonvoice/s5/path.sh
+++ b/egs/commonvoice/s5/path.sh
@@ -3,6 +3,7 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
 
 # For now, don't include any of the optional dependenices of the main
 # librispeech recipe
diff --git a/egs/csj/s5/path.sh b/egs/csj/s5/path.sh
index 96343eaf36b..497f5647233 100755
--- a/egs/csj/s5/path.sh
+++ b/egs/csj/s5/path.sh
@@ -10,3 +10,4 @@ export PATH=$PATH:/usr/local/cuda/bin
 export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/lib:/usr/local/lib64:/usr/local/cuda/bin/nvcc
 
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/dihard_2018/v1/path.sh b/egs/dihard_2018/v1/path.sh
index 851c14e27c3..3c2db0dc017 100755
--- a/egs/dihard_2018/v1/path.sh
+++ b/egs/dihard_2018/v1/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/dihard_2018/v2/path.sh b/egs/dihard_2018/v2/path.sh
index 851c14e27c3..3c2db0dc017 100755
--- a/egs/dihard_2018/v2/path.sh
+++ b/egs/dihard_2018/v2/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/fame/s5/path.sh b/egs/fame/s5/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/fame/s5/path.sh
+++ b/egs/fame/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/fame/v1/path.sh b/egs/fame/v1/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/fame/v1/path.sh
+++ b/egs/fame/v1/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/fame/v2/path.sh b/egs/fame/v2/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/fame/v2/path.sh
+++ b/egs/fame/v2/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/farsdat/s5/path.sh b/egs/farsdat/s5/path.sh
index 62794699b41..1ceead26ef7 100755
--- a/egs/farsdat/s5/path.sh
+++ b/egs/farsdat/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/irstlm/b
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/fisher_callhome_spanish/s5/path.sh b/egs/fisher_callhome_spanish/s5/path.sh
index 17ffb0369f8..80bc0ec3d77 100755
--- a/egs/fisher_callhome_spanish/s5/path.sh
+++ b/egs/fisher_callhome_spanish/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
 export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/dpovey/libs
+export PYTHONUNBUFFERED=1
diff --git a/egs/fisher_english/s5/path.sh b/egs/fisher_english/s5/path.sh
index 1a6fb5f891b..80c74d959dc 100755
--- a/egs/fisher_english/s5/path.sh
+++ b/egs/fisher_english/s5/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/fisher_swbd/s5/path.sh b/egs/fisher_swbd/s5/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/fisher_swbd/s5/path.sh
+++ b/egs/fisher_swbd/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/formosa/s5/path.sh b/egs/formosa/s5/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/formosa/s5/path.sh
+++ b/egs/formosa/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/gale_arabic/s5/path.sh b/egs/gale_arabic/s5/path.sh
index be11b34cbc6..0be3b11acbd 100755
--- a/egs/gale_arabic/s5/path.sh
+++ b/egs/gale_arabic/s5/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/gale_arabic/s5b/path.sh b/egs/gale_arabic/s5b/path.sh
index be11b34cbc6..0be3b11acbd 100755
--- a/egs/gale_arabic/s5b/path.sh
+++ b/egs/gale_arabic/s5b/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/gale_arabic/s5c/path.sh b/egs/gale_arabic/s5c/path.sh
index be11b34cbc6..0be3b11acbd 100755
--- a/egs/gale_arabic/s5c/path.sh
+++ b/egs/gale_arabic/s5c/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/gale_arabic/s5d/path.sh b/egs/gale_arabic/s5d/path.sh
index be11b34cbc6..0be3b11acbd 100755
--- a/egs/gale_arabic/s5d/path.sh
+++ b/egs/gale_arabic/s5d/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/gale_mandarin/s5/path.sh b/egs/gale_mandarin/s5/path.sh
index e875e4b585c..1afc1073224 100755
--- a/egs/gale_mandarin/s5/path.sh
+++ b/egs/gale_mandarin/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/kaldi_lm
 . $KALDI_ROOT/tools/config/common_path.sh
 . $KALDI_ROOT/tools/env.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/gigaspeech/s5/path.sh b/egs/gigaspeech/s5/path.sh
index e01cd6e8b7a..9ae2ddc1f96 100755
--- a/egs/gigaspeech/s5/path.sh
+++ b/egs/gigaspeech/s5/path.sh
@@ -12,3 +12,4 @@ PYTHON='python2.7'
 sequitur=$KALDI_ROOT/tools/sequitur-g2p/g2p.py
 sequitur_path="$(dirname $sequitur)/lib/$PYTHON/site-packages"
 export PATH=$PATH:$(dirname $sequitur):$sequitur_path
+export PYTHONUNBUFFERED=1
diff --git a/egs/gop_speechocean762/s5/path.sh b/egs/gop_speechocean762/s5/path.sh
index 026775e0466..64c811fb533 100755
--- a/egs/gop_speechocean762/s5/path.sh
+++ b/egs/gop_speechocean762/s5/path.sh
@@ -25,3 +25,4 @@ sequitur_path="$(dirname $sequitur)/lib/$PYTHON/site-packages"
 
 # Directory under which the LM training corpus should be extracted
 LM_CORPUS_ROOT=./lm-corpus
+export PYTHONUNBUFFERED=1
diff --git a/egs/gp/s1/path.sh b/egs/gp/s1/path.sh
index 8a3b9a84d98..b4a3b2b8cd3 100644
--- a/egs/gp/s1/path.sh
+++ b/egs/gp/s1/path.sh
@@ -31,6 +31,7 @@ TOOLS=$SHORTEN:$SOX
 
 export PATH=$PATH:$KALDIBIN:$FSTBIN:$LMBIN:$SCRIPTS:$TOOLS
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
 
 # Site-specific configs:
 [ `hostname -y` == ecdf ] && { . path_ed.sh; }
diff --git a/egs/gp/s5/path.sh b/egs/gp/s5/path.sh
index fcf365ec8b6..ad4112c59f5 100644
--- a/egs/gp/s5/path.sh
+++ b/egs/gp/s5/path.sh
@@ -24,6 +24,7 @@ export kaldi_steps=$PWD/steps
 SCRIPTS=$kaldi_local:$kaldi_utils:$kaldi_steps
 
 export PATH=$PATH:$KALDIBIN:$FSTBIN:$LMBIN:$SCRIPTS
+export PYTHONUNBUFFERED=1
 
 # If the correct version of shorten and sox are not on the path,
 # the following will be set by local/gp_check_tools.sh
diff --git a/egs/heroico/s5/path.sh b/egs/heroico/s5/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/heroico/s5/path.sh
+++ b/egs/heroico/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/hi_mia/v1/path.sh b/egs/hi_mia/v1/path.sh
index e50f57c5271..e66dce74837 100755
--- a/egs/hi_mia/v1/path.sh
+++ b/egs/hi_mia/v1/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/hi_mia/w1/path.sh b/egs/hi_mia/w1/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/hi_mia/w1/path.sh
+++ b/egs/hi_mia/w1/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/hkust/s5/path.sh b/egs/hkust/s5/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/hkust/s5/path.sh
+++ b/egs/hkust/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/hub4_english/s5/path.sh b/egs/hub4_english/s5/path.sh
index 49813fc4cd0..3f89f6f11a2 100755
--- a/egs/hub4_english/s5/path.sh
+++ b/egs/hub4_english/s5/path.sh
@@ -5,3 +5,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 if [ -f $KALDI_ROOT/tools/env.sh ]; then . $KALDI_ROOT/tools/env.sh; fi
 export PATH=$KALDI_ROOT/tools/sctk/bin:$PATH
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/hub4_spanish/s5/path.sh b/egs/hub4_spanish/s5/path.sh
index b7b4dd983fd..f31a4946504 100755
--- a/egs/hub4_spanish/s5/path.sh
+++ b/egs/hub4_spanish/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/sph2pipe_v2.5:$KALDI_ROOT/tools/openfs
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/iam/v1/path.sh b/egs/iam/v1/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/iam/v1/path.sh
+++ b/egs/iam/v1/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/iam/v2/path.sh b/egs/iam/v2/path.sh
index 7e458144624..1d3c15c6e94 100755
--- a/egs/iam/v2/path.sh
+++ b/egs/iam/v2/path.sh
@@ -7,3 +7,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 export LD_LIBRARY_PATH=$KALDI_ROOT/tools/openfst/lib:$LD_LIBRARY_PATH
 export LD_LIBRARY_PATH=/home/dpovey/libs:$LD_LIBRARY_PATH
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/iban/s5/path.sh b/egs/iban/s5/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/iban/s5/path.sh
+++ b/egs/iban/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/icsi/s5/path.sh b/egs/icsi/s5/path.sh
index dcfbef0904b..0af9041475b 100644
--- a/egs/icsi/s5/path.sh
+++ b/egs/icsi/s5/path.sh
@@ -6,3 +6,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH:$KALDI_ROOT/too
 . $KALDI_ROOT/tools/config/common_path.sh
 
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/ifnenit/v1/path.sh b/egs/ifnenit/v1/path.sh
index 85a0ae8e0d0..ff786480de0 100755
--- a/egs/ifnenit/v1/path.sh
+++ b/egs/ifnenit/v1/path.sh
@@ -9,3 +9,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/libri_css/s5_css/path.sh b/egs/libri_css/s5_css/path.sh
index 2f4e4e4fb21..10e135bbf6c 100644
--- a/egs/libri_css/s5_css/path.sh
+++ b/egs/libri_css/s5_css/path.sh
@@ -6,4 +6,5 @@ export PYTHONPATH="${PYTHONPATH}:$PWD/dscore"
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
 
diff --git a/egs/libri_css/s5_mono/path.sh b/egs/libri_css/s5_mono/path.sh
index ab1a81a86ef..a4cae6d8692 100644
--- a/egs/libri_css/s5_mono/path.sh
+++ b/egs/libri_css/s5_mono/path.sh
@@ -7,4 +7,5 @@ export PYTHONPATH="${PYTHONPATH}:$PWD/dscore"
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
 export BASH_ENV="~/.aliases"
+export PYTHONUNBUFFERED=1
 
diff --git a/egs/librispeech/s5/fairseq_ltlm/path.sh b/egs/librispeech/s5/fairseq_ltlm/path.sh
index 653f78a9164..180247a7ee9 100644
--- a/egs/librispeech/s5/fairseq_ltlm/path.sh
+++ b/egs/librispeech/s5/fairseq_ltlm/path.sh
@@ -10,4 +10,5 @@ if [ -z $c ] ; then
 		source fairseq_ltlm/anaconda/bin/activate
 		set -- "${current_args[@]}"
 fi
+export PYTHONUNBUFFERED=1
 
diff --git a/egs/librispeech/s5/path.sh b/egs/librispeech/s5/path.sh
index 2ba2046833c..e28334ded91 100755
--- a/egs/librispeech/s5/path.sh
+++ b/egs/librispeech/s5/path.sh
@@ -25,3 +25,4 @@ sequitur_path="$(dirname $sequitur)/lib/$PYTHON/site-packages"
 
 # Directory under which the LM training corpus should be extracted
 LM_CORPUS_ROOT=./lm-corpus
+export PYTHONUNBUFFERED=1
diff --git a/egs/lre/v1/path.sh b/egs/lre/v1/path.sh
index e50f57c5271..e66dce74837 100755
--- a/egs/lre/v1/path.sh
+++ b/egs/lre/v1/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/lre07/v1/path.sh b/egs/lre07/v1/path.sh
index e50f57c5271..e66dce74837 100755
--- a/egs/lre07/v1/path.sh
+++ b/egs/lre07/v1/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/lre07/v2/path.sh b/egs/lre07/v2/path.sh
index 161fc200300..1c7e06b9dbc 100755
--- a/egs/lre07/v2/path.sh
+++ b/egs/lre07/v2/path.sh
@@ -1,3 +1,4 @@
 export KALDI_ROOT=$(cd ../../..; pwd)
 export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/ivectorbin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/madcat_ar/v1/path.sh b/egs/madcat_ar/v1/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/madcat_ar/v1/path.sh
+++ b/egs/madcat_ar/v1/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/madcat_zh/v1/path.sh b/egs/madcat_zh/v1/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/madcat_zh/v1/path.sh
+++ b/egs/madcat_zh/v1/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/malach/s5/path.sh b/egs/malach/s5/path.sh
index c1f5745adc8..3004989b033 100644
--- a/egs/malach/s5/path.sh
+++ b/egs/malach/s5/path.sh
@@ -9,6 +9,7 @@ LMBIN=$KALDI_ROOT/tools/irstlm/bin
 SRILM=$KALDI_ROOT/tools/srilm/bin/i686-m64
 
 export PATH=$PATH:$LMBIN:$SRILM
+export PYTHONUNBUFFERED=1
 
 # The following was needed to enable Python 3 and also a version of
 # gcc consistent with the latest version of cuda on our system. You
diff --git a/egs/mandarin_bn_bc/s5/path.sh b/egs/mandarin_bn_bc/s5/path.sh
index e875e4b585c..1afc1073224 100644
--- a/egs/mandarin_bn_bc/s5/path.sh
+++ b/egs/mandarin_bn_bc/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/kaldi_lm
 . $KALDI_ROOT/tools/config/common_path.sh
 . $KALDI_ROOT/tools/env.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/material/s5/path.sh b/egs/material/s5/path.sh
index ffa108b6737..256941eaec7 100644
--- a/egs/material/s5/path.sh
+++ b/egs/material/s5/path.sh
@@ -5,3 +5,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 [ ! -f $KALDI_ROOT/tools/env.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/env.sh is not present (this is uncommon but might be OK)"
 . $KALDI_ROOT/tools/env.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/mgb2_arabic/s5/path.sh b/egs/mgb2_arabic/s5/path.sh
index be11b34cbc6..0be3b11acbd 100755
--- a/egs/mgb2_arabic/s5/path.sh
+++ b/egs/mgb2_arabic/s5/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/mgb5/s5/path.sh b/egs/mgb5/s5/path.sh
index ebc3e1f4ee0..3350b2f45ed 100644
--- a/egs/mgb5/s5/path.sh
+++ b/egs/mgb5/s5/path.sh
@@ -5,4 +5,5 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
 export CUDA_CACHE_DISABLE=1
+export PYTHONUNBUFFERED=1
 
diff --git a/egs/mini_librispeech/s5/path.sh b/egs/mini_librispeech/s5/path.sh
index 34244b27f2e..5572521e9b0 100644
--- a/egs/mini_librispeech/s5/path.sh
+++ b/egs/mini_librispeech/s5/path.sh
@@ -4,6 +4,7 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
 
 # For now, don't include any of the optional dependenices of the main
 # librispeech recipe
diff --git a/egs/mobvoi/v1/path.sh b/egs/mobvoi/v1/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/mobvoi/v1/path.sh
+++ b/egs/mobvoi/v1/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/mobvoihotwords/v1/path.sh b/egs/mobvoihotwords/v1/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/mobvoihotwords/v1/path.sh
+++ b/egs/mobvoihotwords/v1/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/multi_cn/s5/path.sh b/egs/multi_cn/s5/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/multi_cn/s5/path.sh
+++ b/egs/multi_cn/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/multi_en/s5/path.sh b/egs/multi_en/s5/path.sh
index b0df9fcc2cb..39ec1fcc7db 100644
--- a/egs/multi_en/s5/path.sh
+++ b/egs/multi_en/s5/path.sh
@@ -5,3 +5,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH:$KALDI_ROOT/too
 . $KALDI_ROOT/tools/config/common_path.sh
 
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/nsc/s5/path.sh b/egs/nsc/s5/path.sh
index 2ba2046833c..e28334ded91 100755
--- a/egs/nsc/s5/path.sh
+++ b/egs/nsc/s5/path.sh
@@ -25,3 +25,4 @@ sequitur_path="$(dirname $sequitur)/lib/$PYTHON/site-packages"
 
 # Directory under which the LM training corpus should be extracted
 LM_CORPUS_ROOT=./lm-corpus
+export PYTHONUNBUFFERED=1
diff --git a/egs/opensat20/s5/path.sh b/egs/opensat20/s5/path.sh
index dcfbef0904b..0af9041475b 100644
--- a/egs/opensat20/s5/path.sh
+++ b/egs/opensat20/s5/path.sh
@@ -6,3 +6,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH:$KALDI_ROOT/too
 . $KALDI_ROOT/tools/config/common_path.sh
 
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/ptb/s5/path.sh b/egs/ptb/s5/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/ptb/s5/path.sh
+++ b/egs/ptb/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/reverb/s5/path.sh b/egs/reverb/s5/path.sh
index f46c5d8cb72..356ad0624e5 100644
--- a/egs/reverb/s5/path.sh
+++ b/egs/reverb/s5/path.sh
@@ -5,3 +5,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/rimes/v1/path.sh b/egs/rimes/v1/path.sh
index c7ebe7f2abf..44d02a8496e 100755
--- a/egs/rimes/v1/path.sh
+++ b/egs/rimes/v1/path.sh
@@ -5,3 +5,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 . $KALDI_ROOT/tools/config/common_path.sh
 export LD_LIBRARY_PATH=$KALDI_ROOT/tools/openfst/lib:$LD_LIBRARY_PATH
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/rm/s5/path.sh b/egs/rm/s5/path.sh
index 1a6fb5f891b..80c74d959dc 100755
--- a/egs/rm/s5/path.sh
+++ b/egs/rm/s5/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/sad_rats/s5/path.sh b/egs/sad_rats/s5/path.sh
index ae38e737c1f..79ea30b200a 100644
--- a/egs/sad_rats/s5/path.sh
+++ b/egs/sad_rats/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/sitw/v1/path.sh b/egs/sitw/v1/path.sh
index e50f57c5271..e66dce74837 100755
--- a/egs/sitw/v1/path.sh
+++ b/egs/sitw/v1/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/sitw/v2/path.sh b/egs/sitw/v2/path.sh
index e50f57c5271..e66dce74837 100755
--- a/egs/sitw/v2/path.sh
+++ b/egs/sitw/v2/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/snips/v1/path.sh b/egs/snips/v1/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/snips/v1/path.sh
+++ b/egs/snips/v1/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/spanish_dimex100/s5/path.sh b/egs/spanish_dimex100/s5/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/spanish_dimex100/s5/path.sh
+++ b/egs/spanish_dimex100/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/sprakbanken/s5/path.sh b/egs/sprakbanken/s5/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/sprakbanken/s5/path.sh
+++ b/egs/sprakbanken/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/sprakbanken_swe/s5/path.sh b/egs/sprakbanken_swe/s5/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/sprakbanken_swe/s5/path.sh
+++ b/egs/sprakbanken_swe/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/sre08/v1/path.sh b/egs/sre08/v1/path.sh
index e50f57c5271..e66dce74837 100755
--- a/egs/sre08/v1/path.sh
+++ b/egs/sre08/v1/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/sre10/v1/path.sh b/egs/sre10/v1/path.sh
index e50f57c5271..e66dce74837 100755
--- a/egs/sre10/v1/path.sh
+++ b/egs/sre10/v1/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/sre10/v2/path.sh b/egs/sre10/v2/path.sh
index e50f57c5271..e66dce74837 100755
--- a/egs/sre10/v2/path.sh
+++ b/egs/sre10/v2/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/sre16/v1/path.sh b/egs/sre16/v1/path.sh
index e50f57c5271..e66dce74837 100755
--- a/egs/sre16/v1/path.sh
+++ b/egs/sre16/v1/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/sre16/v2/path.sh b/egs/sre16/v2/path.sh
index e50f57c5271..e66dce74837 100755
--- a/egs/sre16/v2/path.sh
+++ b/egs/sre16/v2/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/svhn/v1/path.sh b/egs/svhn/v1/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/svhn/v1/path.sh
+++ b/egs/svhn/v1/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/swahili/s5/path.sh b/egs/swahili/s5/path.sh
index b3c471a2b93..f485700f884 100755
--- a/egs/swahili/s5/path.sh
+++ b/egs/swahili/s5/path.sh
@@ -14,3 +14,4 @@ TRAIN_DIR="train"
 
 
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/swbd/s5/path.sh b/egs/swbd/s5/path.sh
index 50eedcbb1f4..43ee8305fb7 100755
--- a/egs/swbd/s5/path.sh
+++ b/egs/swbd/s5/path.sh
@@ -6,3 +6,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
 export MKL_NUM_THREADS=16
+export PYTHONUNBUFFERED=1
diff --git a/egs/swbd/s5b/path.sh b/egs/swbd/s5b/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/swbd/s5b/path.sh
+++ b/egs/swbd/s5b/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/swbd/s5c/path.sh b/egs/swbd/s5c/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/swbd/s5c/path.sh
+++ b/egs/swbd/s5c/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/tedlium/s5/path.sh b/egs/tedlium/s5/path.sh
index 16d5314b9c2..1d12b4c37bb 100755
--- a/egs/tedlium/s5/path.sh
+++ b/egs/tedlium/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH:$KALDI_ROOT/too
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/tedlium/s5_r2/path.sh b/egs/tedlium/s5_r2/path.sh
index 16d5314b9c2..1d12b4c37bb 100755
--- a/egs/tedlium/s5_r2/path.sh
+++ b/egs/tedlium/s5_r2/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH:$KALDI_ROOT/too
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/tedlium/s5_r2_wsj/path.sh b/egs/tedlium/s5_r2_wsj/path.sh
index 92b43679edf..1ebca3f3d54 100755
--- a/egs/tedlium/s5_r2_wsj/path.sh
+++ b/egs/tedlium/s5_r2_wsj/path.sh
@@ -5,3 +5,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH:$KALDI_ROOT/too
 . $KALDI_ROOT/tools/config/common_path.sh
 export PATH=$PATH:/home/vmanoha1/kaldi-asr-diarization/src/segmenterbin
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/tedlium/s5_r3/path.sh b/egs/tedlium/s5_r3/path.sh
index 16d5314b9c2..1d12b4c37bb 100755
--- a/egs/tedlium/s5_r3/path.sh
+++ b/egs/tedlium/s5_r3/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH:$KALDI_ROOT/too
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/thchs30/s5/path.sh b/egs/thchs30/s5/path.sh
index fb1c0489386..6bc42f5ac1c 100755
--- a/egs/thchs30/s5/path.sh
+++ b/egs/thchs30/s5/path.sh
@@ -4,4 +4,5 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
 
diff --git a/egs/tidigits/s5/path.sh b/egs/tidigits/s5/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/tidigits/s5/path.sh
+++ b/egs/tidigits/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/timit/s5/path.sh b/egs/timit/s5/path.sh
index 62794699b41..1ceead26ef7 100755
--- a/egs/timit/s5/path.sh
+++ b/egs/timit/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/irstlm/b
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/tunisian_msa/s5/path.sh b/egs/tunisian_msa/s5/path.sh
index 705600ad47a..28587970a43 100644
--- a/egs/tunisian_msa/s5/path.sh
+++ b/egs/tunisian_msa/s5/path.sh
@@ -3,6 +3,7 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
 
 # For now, don't include any of the optional dependenices of the main
 # librispeech recipe
diff --git a/egs/uw3/v1/path.sh b/egs/uw3/v1/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/uw3/v1/path.sh
+++ b/egs/uw3/v1/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/voxceleb/v1/path.sh b/egs/voxceleb/v1/path.sh
index e50f57c5271..e66dce74837 100755
--- a/egs/voxceleb/v1/path.sh
+++ b/egs/voxceleb/v1/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/voxceleb/v2/path.sh b/egs/voxceleb/v2/path.sh
index e50f57c5271..e66dce74837 100755
--- a/egs/voxceleb/v2/path.sh
+++ b/egs/voxceleb/v2/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/voxforge/s5/path.sh b/egs/voxforge/s5/path.sh
index ff3c4ab6f14..927473d272b 100755
--- a/egs/voxforge/s5/path.sh
+++ b/egs/voxforge/s5/path.sh
@@ -17,3 +17,4 @@ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/tools/mitlm-svn/lib
 
 # Needed for "correct" sorting
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/vystadial_cz/online_demo/path.sh b/egs/vystadial_cz/online_demo/path.sh
index 3683a2b2372..0dc726ab645 100755
--- a/egs/vystadial_cz/online_demo/path.sh
+++ b/egs/vystadial_cz/online_demo/path.sh
@@ -39,3 +39,4 @@ max_active=14000
 
 # Size of chunks are queued in "online" interface
 batch_size=4560
+export PYTHONUNBUFFERED=1
diff --git a/egs/vystadial_cz/s5/path.sh b/egs/vystadial_cz/s5/path.sh
index e68d0776798..87cb788daf2 100755
--- a/egs/vystadial_cz/s5/path.sh
+++ b/egs/vystadial_cz/s5/path.sh
@@ -20,3 +20,4 @@ srilm_sub_bin=`find "$srilm_bin" -type d`
 for d in $srilm_sub_bin ; do
     export PATH=$d:$PATH
 done
+export PYTHONUNBUFFERED=1
diff --git a/egs/vystadial_cz/s5b/path.sh b/egs/vystadial_cz/s5b/path.sh
index 0453ff9e5aa..4ce46ad3003 100644
--- a/egs/vystadial_cz/s5b/path.sh
+++ b/egs/vystadial_cz/s5b/path.sh
@@ -9,4 +9,5 @@ SRILM_PATH=$SRILM_ROOT/bin:$SRILM_ROOT/bin/i686-m64
 export PATH=$PATH:$SRILM_PATH
 
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
 
diff --git a/egs/vystadial_en/s5/path.sh b/egs/vystadial_en/s5/path.sh
index a361f52204e..6fff219299f 100755
--- a/egs/vystadial_en/s5/path.sh
+++ b/egs/vystadial_en/s5/path.sh
@@ -17,3 +17,4 @@ srilm_sub_bin=`find "$srilm_bin" -type d`
 for d in $srilm_sub_bin ; do
     export PATH=$d:$PATH
 done
+export PYTHONUNBUFFERED=1
diff --git a/egs/wenetspeech/s5/path.sh b/egs/wenetspeech/s5/path.sh
index 78817e9acd3..a87a0c6a383 100755
--- a/egs/wenetspeech/s5/path.sh
+++ b/egs/wenetspeech/s5/path.sh
@@ -12,3 +12,4 @@ SRILM_PATH=$SRILM_ROOT/bin:$SRILM_ROOT/bin/i686-m64
 export PATH=$PATH:$SRILM_PATH
 
 source $KALDI_ROOT/tools/env.sh
+export PYTHONUNBUFFERED=1
diff --git a/egs/wsj/s5/path.sh b/egs/wsj/s5/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/wsj/s5/path.sh
+++ b/egs/wsj/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/yesno/s5/path.sh b/egs/yesno/s5/path.sh
index 21bfd1440fa..ba4c5de4315 100644
--- a/egs/yesno/s5/path.sh
+++ b/egs/yesno/s5/path.sh
@@ -4,5 +4,6 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
 
 
diff --git a/egs/yomdle_fa/v1/path.sh b/egs/yomdle_fa/v1/path.sh
index 2d17b17a84a..ed0dc115a2c 100644
--- a/egs/yomdle_fa/v1/path.sh
+++ b/egs/yomdle_fa/v1/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/yomdle_korean/v1/path.sh b/egs/yomdle_korean/v1/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/yomdle_korean/v1/path.sh
+++ b/egs/yomdle_korean/v1/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/yomdle_russian/v1/path.sh b/egs/yomdle_russian/v1/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/yomdle_russian/v1/path.sh
+++ b/egs/yomdle_russian/v1/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/yomdle_tamil/v1/path.sh b/egs/yomdle_tamil/v1/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/yomdle_tamil/v1/path.sh
+++ b/egs/yomdle_tamil/v1/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/yomdle_zh/v1/path.sh b/egs/yomdle_zh/v1/path.sh
index 2d17b17a84a..ed0dc115a2c 100644
--- a/egs/yomdle_zh/v1/path.sh
+++ b/egs/yomdle_zh/v1/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1
diff --git a/egs/zeroth_korean/s5/path.sh b/egs/zeroth_korean/s5/path.sh
index 2d17b17a84a..ed0dc115a2c 100755
--- a/egs/zeroth_korean/s5/path.sh
+++ b/egs/zeroth_korean/s5/path.sh
@@ -4,3 +4,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export PYTHONUNBUFFERED=1

From 85c120a42dbc6d314ecf0ca0abe8b1831d8a7965 Mon Sep 17 00:00:00 2001
From: "Jan \"yenda\" Trmal" <jtrmal@gmail.com>
Date: Fri, 12 Aug 2022 13:58:02 +0200
Subject: [PATCH 08/18] remove obsoleted and C++17 incompatible
 unary_function<>, resolves #4732

---
 src/lm/arpa-lm-compiler.cc           | 4 ++--
 src/nnet3/nnet-compile-utils-test.cc | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/lm/arpa-lm-compiler.cc b/src/lm/arpa-lm-compiler.cc
index 47bd20d4721..e37711c5dfe 100644
--- a/src/lm/arpa-lm-compiler.cc
+++ b/src/lm/arpa-lm-compiler.cc
@@ -63,7 +63,7 @@ class GeneralHistKey {
     return a.vector_ == b.vector_;
   }
   // Public typename HashType for hashing.
-  struct HashType : public std::unary_function<GeneralHistKey, size_t> {
+  struct HashType {
     size_t operator()(const GeneralHistKey& key) const {
       return VectorHasher<Symbol>().operator()(key.vector_);
     }
@@ -99,7 +99,7 @@ class OptimizedHistKey {
   friend bool operator==(const OptimizedHistKey& a, const OptimizedHistKey& b) {
     return a.data_ == b.data_;
   }
-  struct HashType : public std::unary_function<OptimizedHistKey, size_t> {
+  struct HashType {
     size_t operator()(const OptimizedHistKey& key) const { return key.data_; }
   };
 
diff --git a/src/nnet3/nnet-compile-utils-test.cc b/src/nnet3/nnet-compile-utils-test.cc
index 894d0a3577b..27e2e658bcb 100644
--- a/src/nnet3/nnet-compile-utils-test.cc
+++ b/src/nnet3/nnet-compile-utils-test.cc
@@ -23,7 +23,7 @@
 namespace kaldi {
 namespace nnet3 {
 
-struct ComparePair : public std::unary_function<std::pair<int32, int32>, bool>
+struct ComparePair
 {
   explicit ComparePair(const std::pair<int32, int32> &correct_pair):
   correct_pair_(correct_pair) {}
@@ -33,8 +33,7 @@ struct ComparePair : public std::unary_function<std::pair<int32, int32>, bool>
   std::pair<int32, int32> correct_pair_;
 };
 
-struct PairIsEqualComparator  :
-    public std::unary_function<std::pair<int32, int32>, bool>
+struct PairIsEqualComparator
 {
   explicit PairIsEqualComparator(const std::pair<int32, int32> pair):
       pair_(pair) {}

From 09a63063c7784dcf14c12ee60e4cd872852f9506 Mon Sep 17 00:00:00 2001
From: "Jan \"yenda\" Trmal" <jtrmal@gmail.com>
Date: Fri, 12 Aug 2022 15:35:59 +0200
Subject: [PATCH 09/18] remove the c++17 removed function random_shuffle

---
 src/chainbin/nnet3-chain-shuffle-egs.cc       |  6 ++++--
 src/chainbin/nnet3-chain-subset-egs.cc        |  5 +++--
 src/lat/word-align-lattice-lexicon-test.cc    |  7 ++++++-
 src/nnet/nnet-randomizer-test.cc              |  6 +++++-
 src/nnet/nnet-randomizer.cc                   |  6 +++++-
 src/nnet2/nnet-component.cc                   |  5 ++++-
 .../nnet-shuffle-egs-discriminative.cc        | 19 +++++++++--------
 src/nnet2bin/nnet-shuffle-egs.cc              |  6 ++++--
 src/nnet2bin/nnet-subset-egs.cc               | 21 ++++++++++---------
 src/nnet3/convolution-test.cc                 |  7 ++++++-
 src/nnet3/nnet-example-utils.cc               |  9 +++++---
 src/nnet3/nnet-test-utils.cc                  |  7 ++++++-
 .../nnet3-discriminative-shuffle-egs.cc       |  6 +++---
 .../nnet3-discriminative-subset-egs.cc        | 19 +++++++++--------
 src/nnet3bin/nnet3-shuffle-egs.cc             |  6 ++++--
 src/nnet3bin/nnet3-subset-egs.cc              | 19 +++++++++--------
 src/rnnlm/sampler-test.cc                     | 10 ++++++---
 src/rnnlm/sampler.cc                          | 11 ++++++----
 src/util/kaldi-table-test.cc                  |  7 ++++++-
 19 files changed, 117 insertions(+), 65 deletions(-)

diff --git a/src/chainbin/nnet3-chain-shuffle-egs.cc b/src/chainbin/nnet3-chain-shuffle-egs.cc
index 7ab6e28f607..1198991d6e1 100644
--- a/src/chainbin/nnet3-chain-shuffle-egs.cc
+++ b/src/chainbin/nnet3-chain-shuffle-egs.cc
@@ -22,6 +22,7 @@
 #include "util/common-utils.h"
 #include "hmm/transition-model.h"
 #include "nnet3/nnet-chain-example.h"
+#include <random>
 
 int main(int argc, char *argv[]) {
   try {
@@ -73,8 +74,9 @@ int main(int argc, char *argv[]) {
         egs.push_back(std::pair<std::string, NnetChainExample*>(
             example_reader.Key(),
             new NnetChainExample(example_reader.Value())));
-
-      std::random_shuffle(egs.begin(), egs.end());
+      std::random_device rd;
+      std::mt19937 g(rd());
+      std::shuffle(egs.begin(), egs.end(), g);
     } else {
       KALDI_ASSERT(buffer_size > 0);
       egs.resize(buffer_size,
diff --git a/src/chainbin/nnet3-chain-subset-egs.cc b/src/chainbin/nnet3-chain-subset-egs.cc
index 0206003ab13..d8a76b6bf85 100644
--- a/src/chainbin/nnet3-chain-subset-egs.cc
+++ b/src/chainbin/nnet3-chain-subset-egs.cc
@@ -21,6 +21,7 @@
 #include "base/kaldi-common.h"
 #include "util/common-utils.h"
 #include "nnet3/nnet-chain-example.h"
+#include <random>
 
 int main(int argc, char *argv[]) {
   try {
@@ -49,7 +50,7 @@ int main(int argc, char *argv[]) {
 
     po.Read(argc, argv);
 
-    srand(srand_seed);
+    std::mt19937 g(srand_seed);
 
     if (po.NumArgs() != 2) {
       po.PrintUsage();
@@ -81,7 +82,7 @@ int main(int argc, char *argv[]) {
       }
     }
     if (randomize_order)
-      std::random_shuffle(egs.begin(), egs.end());
+      std::shuffle(egs.begin(), egs.end(), g);
 
     NnetChainExampleWriter writer(examples_wspecifier);
     for (size_t i = 0; i < egs.size(); i++) {
diff --git a/src/lat/word-align-lattice-lexicon-test.cc b/src/lat/word-align-lattice-lexicon-test.cc
index d8b1a4a8426..cfd5574b656 100644
--- a/src/lat/word-align-lattice-lexicon-test.cc
+++ b/src/lat/word-align-lattice-lexicon-test.cc
@@ -26,6 +26,8 @@
 #include "hmm/hmm-test-utils.h"
 #include "lat/word-align-lattice-lexicon.h"
 
+#include <random>
+
 namespace kaldi {
 
 // This function generates a lexicon in the same format that
@@ -56,8 +58,11 @@ void GenerateLexicon(const std::vector<int32> &phones,
     }
   }
   SortAndUniq(lexicon);
+
   // randomize the order.
-  std::random_shuffle(lexicon->begin(), lexicon->end());
+  std::random_device rd;
+  std::mt19937 g(rd());
+  std::shuffle(lexicon->begin(), lexicon->end(), g);
 
 
   for (size_t i = 0; i < lexicon->size(); i++) {
diff --git a/src/nnet/nnet-randomizer-test.cc b/src/nnet/nnet-randomizer-test.cc
index 1f4b2564089..f972a7662a4 100644
--- a/src/nnet/nnet-randomizer-test.cc
+++ b/src/nnet/nnet-randomizer-test.cc
@@ -22,6 +22,7 @@
 #include <numeric>
 #include <vector>
 #include <algorithm>
+#include <random>
 
 using namespace kaldi;
 using namespace kaldi::nnet1;
@@ -181,7 +182,10 @@ void UnitTestStdVectorRandomizer() {
   for (int32 i = 0; i < v.size(); i++) {
     v.at(i) = i;
   }
-  std::random_shuffle(v.begin(), v.end());
+  std::random_device rd;
+  std::mt19937 g(rd());
+
+  std::shuffle(v.begin(), v.end(), g);
 
   // config
   NnetDataRandomizerOptions c;
diff --git a/src/nnet/nnet-randomizer.cc b/src/nnet/nnet-randomizer.cc
index b15214ea477..d38e3f3381e 100644
--- a/src/nnet/nnet-randomizer.cc
+++ b/src/nnet/nnet-randomizer.cc
@@ -22,6 +22,7 @@
 #include <vector>
 #include <algorithm>
 #include <utility>
+#include <random>
 
 namespace kaldi {
 namespace nnet1 {
@@ -37,7 +38,10 @@ const std::vector<int32>& RandomizerMask::Generate(int32 mask_size) {
   mask_.resize(mask_size);
   for (int32 i = 0; i < mask_size; i++) mask_[i] = i;
   // shuffle using built-in random generator:
-  std::random_shuffle(mask_.begin(), mask_.end());
+  std::random_device rd;
+  std::mt19937 g(rd());
+
+  std::shuffle(mask_.begin(), mask_.end(), g);
   return mask_;
 }
 
diff --git a/src/nnet2/nnet-component.cc b/src/nnet2/nnet-component.cc
index f0919acfac8..cf0186882de 100644
--- a/src/nnet2/nnet-component.cc
+++ b/src/nnet2/nnet-component.cc
@@ -23,6 +23,7 @@
 
 #include <iterator>
 #include <sstream>
+#include <random>
 #include "nnet2/nnet-component.h"
 #include "nnet2/nnet-precondition.h"
 #include "nnet2/nnet-precondition-online.h"
@@ -2317,7 +2318,9 @@ void PermuteComponent::Init(int32 dim) {
   KALDI_ASSERT(dim > 0);
   reorder_.resize(dim);
   for (int32 i = 0; i < dim; i++) reorder_[i] = i;
-  std::random_shuffle(reorder_.begin(), reorder_.end());
+  std::random_device rd;
+  std::mt19937 g(rd());
+  std::shuffle(reorder_.begin(), reorder_.end(), g);
 }
 
 void PermuteComponent::InitFromString(std::string args) {
diff --git a/src/nnet2bin/nnet-shuffle-egs-discriminative.cc b/src/nnet2bin/nnet-shuffle-egs-discriminative.cc
index ebef378d16b..5ef3acf6599 100644
--- a/src/nnet2bin/nnet-shuffle-egs-discriminative.cc
+++ b/src/nnet2bin/nnet-shuffle-egs-discriminative.cc
@@ -21,6 +21,7 @@
 #include "util/common-utils.h"
 #include "hmm/transition-model.h"
 #include "nnet2/nnet-example-functions.h"
+#include <random>
 
 int main(int argc, char *argv[]) {
   try {
@@ -37,7 +38,7 @@ int main(int argc, char *argv[]) {
         "Usage:  nnet-shuffle-egs-discriminative [options] <egs-rspecifier> <egs-wspecifier>\n"
         "\n"
         "nnet-shuffle-egs-discriminative --srand=1 ark:train.degs ark:shuffled.degs\n";
-    
+
     int32 srand_seed = 0;
     int32 buffer_size = 0;
     ParseOptions po(usage);
@@ -45,11 +46,11 @@ int main(int argc, char *argv[]) {
     po.Register("buffer-size", &buffer_size, "If >0, size of a buffer we use "
                 "to do limited-memory partial randomization.  Otherwise, do "
                 "full randomization.");
-    
+
     po.Read(argc, argv);
 
-    srand(srand_seed);
-    
+    std::mt19937 g(srand_seed);
+
     if (po.NumArgs() != 2) {
       po.PrintUsage();
       exit(1);
@@ -68,12 +69,12 @@ int main(int argc, char *argv[]) {
     if (buffer_size == 0) { // Do full randomization
       // Putting in an extra level of indirection here to avoid excessive
       // computation and memory demands when we have to resize the vector.
-    
+
       for (; !example_reader.Done(); example_reader.Next())
         egs.push_back(new DiscriminativeNnetExample(
             example_reader.Value()));
-      
-      std::random_shuffle(egs.begin(), egs.end());
+
+      std::shuffle(egs.begin(), egs.end(), g);
     } else {
       KALDI_ASSERT(buffer_size > 0);
       egs.resize(buffer_size, NULL);
@@ -88,7 +89,7 @@ int main(int argc, char *argv[]) {
           *(egs[index]) = example_reader.Value();
           num_done++;
         }
-      }      
+      }
     }
     for (size_t i = 0; i < egs.size(); i++) {
       std::ostringstream ostr;
@@ -103,7 +104,7 @@ int main(int argc, char *argv[]) {
     KALDI_LOG << "Shuffled order of " << num_done
               << " neural-network training examples "
               << (buffer_size ? "using a buffer (partial randomization)" : "");
-                  
+
     return (num_done == 0 ? 1 : 0);
   } catch(const std::exception &e) {
     std::cerr << e.what() << '\n';
diff --git a/src/nnet2bin/nnet-shuffle-egs.cc b/src/nnet2bin/nnet-shuffle-egs.cc
index 7c4872b48b6..55d02f26015 100644
--- a/src/nnet2bin/nnet-shuffle-egs.cc
+++ b/src/nnet2bin/nnet-shuffle-egs.cc
@@ -22,6 +22,7 @@
 #include "util/common-utils.h"
 #include "hmm/transition-model.h"
 #include "nnet2/nnet-example-functions.h"
+#include <random>
 
 int main(int argc, char *argv[]) {
   try {
@@ -49,7 +50,8 @@ int main(int argc, char *argv[]) {
 
     po.Read(argc, argv);
 
-    srand(srand_seed);
+    std::mt19937 g(srand_seed);
+
 
     if (po.NumArgs() != 2) {
       po.PrintUsage();
@@ -73,7 +75,7 @@ int main(int argc, char *argv[]) {
         egs.push_back(std::make_pair(example_reader.Key(),
                                      new NnetExample(example_reader.Value())));
 
-      std::random_shuffle(egs.begin(), egs.end());
+      std::shuffle(egs.begin(), egs.end(), g);
     } else {
       KALDI_ASSERT(buffer_size > 0);
       egs.resize(buffer_size,
diff --git a/src/nnet2bin/nnet-subset-egs.cc b/src/nnet2bin/nnet-subset-egs.cc
index 4511870eab1..2ff08a06692 100644
--- a/src/nnet2bin/nnet-subset-egs.cc
+++ b/src/nnet2bin/nnet-subset-egs.cc
@@ -22,6 +22,7 @@
 #include "util/common-utils.h"
 #include "hmm/transition-model.h"
 #include "nnet2/nnet-example-functions.h"
+#include <random>
 
 int main(int argc, char *argv[]) {
   try {
@@ -38,7 +39,7 @@ int main(int argc, char *argv[]) {
         "\n"
         "e.g.\n"
         "nnet-subset-egs [args] ark:- | nnet-subset-egs --n=1000 ark:- ark:subset.egs\n";
-    
+
     int32 srand_seed = 0;
     int32 n = 1000;
     bool randomize_order = true;
@@ -47,11 +48,11 @@ int main(int argc, char *argv[]) {
     po.Register("n", &n, "Number of examples to output");
     po.Register("randomize-order", &randomize_order, "If true, randomize the order "
                 "of the output");
-    
+
     po.Read(argc, argv);
-    
-    srand(srand_seed);
-    
+
+    std::mt19937 g(srand_seed);
+
     if (po.NumArgs() != 2) {
       po.PrintUsage();
       exit(1);
@@ -61,8 +62,8 @@ int main(int argc, char *argv[]) {
         examples_wspecifier = po.GetArg(2);
 
     std::vector<std::pair<std::string, NnetExample> > egs;
-    egs.reserve(n);    
-    
+    egs.reserve(n);
+
     SequentialNnetExampleReader example_reader(examples_rspecifier);
 
     int64 num_read = 0;
@@ -82,16 +83,16 @@ int main(int argc, char *argv[]) {
       }
     }
     if (randomize_order)
-      std::random_shuffle(egs.begin(), egs.end());
+      std::shuffle(egs.begin(), egs.end(), g);
 
     NnetExampleWriter writer(examples_wspecifier);
     for (size_t i = 0; i < egs.size(); i++) {
       writer.Write(egs[i].first, egs[i].second);
     }
-    
+
     KALDI_LOG << "Selected a subset of " << egs.size() << " out of " << num_read
               << " neural-network training examples ";
-    
+
     return (num_read != 0 ? 0 : 1);
   } catch(const std::exception &e) {
     std::cerr << e.what() << '\n';
diff --git a/src/nnet3/convolution-test.cc b/src/nnet3/convolution-test.cc
index cd52ae8682a..787dd889f01 100644
--- a/src/nnet3/convolution-test.cc
+++ b/src/nnet3/convolution-test.cc
@@ -19,6 +19,7 @@
 
 #include "nnet3/convolution.h"
 #include "util/common-utils.h"
+#include <random>
 
 namespace kaldi {
 namespace nnet3 {
@@ -26,6 +27,9 @@ namespace time_height_convolution {
 
 // for testing purposes, create a random ConvolutionModel.
 static void GetRandomConvolutionModel(ConvolutionModel *model) {
+    std::random_device rd;
+    std::mt19937 g(rd());
+
 start:
   {
     model->num_filters_in = RandInt(1, 10);
@@ -60,7 +64,8 @@ static void GetRandomConvolutionModel(ConvolutionModel *model) {
     }
     SortAndUniq(&(model->offsets));
     SortAndUniq(&all_time_offsets);
-    std::random_shuffle(all_time_offsets.begin(), all_time_offsets.end());
+
+    std::shuffle(all_time_offsets.begin(), all_time_offsets.end(), g);
     int32 num_required_offsets = RandInt(1, all_time_offsets.size());
     for (int32 i = 0; i < num_required_offsets; i++)
       model->required_time_offsets.insert(all_time_offsets[i]);
diff --git a/src/nnet3/nnet-example-utils.cc b/src/nnet3/nnet-example-utils.cc
index 6b917483bc2..facbbb19be0 100644
--- a/src/nnet3/nnet-example-utils.cc
+++ b/src/nnet3/nnet-example-utils.cc
@@ -22,8 +22,9 @@
 #include "lat/lattice-functions.h"
 #include "hmm/posterior.h"
 #include "util/text-utils.h"
-#include <numeric>
 #include <iomanip>
+#include <numeric>
+#include <random>
 
 namespace kaldi {
 namespace nnet3 {
@@ -572,7 +573,7 @@ bool UtteranceSplitter::LengthsMatch(const std::string &utt,
                                      int32 length_tolerance) const {
   int32 sf = config_.frame_subsampling_factor,
       expected_supervision_length = (utterance_length + sf - 1) / sf;
-  if (std::abs(supervision_length - expected_supervision_length) 
+  if (std::abs(supervision_length - expected_supervision_length)
       <= length_tolerance) {
     return true;
   } else {
@@ -710,7 +711,9 @@ void UtteranceSplitter::DistributeRandomlyUniform(int32 n, std::vector<int32> *v
   for (; i < size; i++) {
     (*vec)[i] = common_part;
   }
-  std::random_shuffle(vec->begin(), vec->end());
+  std::random_device rd;
+  std::mt19937 g(rd());
+  std::shuffle(vec->begin(), vec->end(), g);
   KALDI_ASSERT(std::accumulate(vec->begin(), vec->end(), int32(0)) == n);
 }
 
diff --git a/src/nnet3/nnet-test-utils.cc b/src/nnet3/nnet-test-utils.cc
index a8ef30bc314..913df08509b 100644
--- a/src/nnet3/nnet-test-utils.cc
+++ b/src/nnet3/nnet-test-utils.cc
@@ -21,6 +21,7 @@
 
 #include <iterator>
 #include <sstream>
+#include <random>
 #include "nnet3/nnet-test-utils.h"
 #include "nnet3/nnet-utils.h"
 
@@ -1402,6 +1403,7 @@ static void GenerateRandomComponentConfig(std::string *component_type,
 
   int32 n = RandInt(0, 37);
   BaseFloat learning_rate = 0.001 * RandInt(1, 100);
+  std::random_device rd;
 
   std::ostringstream os;
   switch(n) {
@@ -1561,7 +1563,10 @@ static void GenerateRandomComponentConfig(std::string *component_type,
       std::vector<int32> column_map(input_dim);
       for (int32 i = 0; i < input_dim; i++)
         column_map[i] = i;
-      std::random_shuffle(column_map.begin(), column_map.end());
+
+      std::mt19937 g(rd());
+      std::shuffle(column_map.begin(), column_map.end(), g);
+
       std::ostringstream buffer;
       for (int32 i = 0; i < input_dim-1; i++)
         buffer << column_map[i] << ",";
diff --git a/src/nnet3bin/nnet3-discriminative-shuffle-egs.cc b/src/nnet3bin/nnet3-discriminative-shuffle-egs.cc
index 2a029123852..faa046aaae2 100644
--- a/src/nnet3bin/nnet3-discriminative-shuffle-egs.cc
+++ b/src/nnet3bin/nnet3-discriminative-shuffle-egs.cc
@@ -22,6 +22,7 @@
 #include "util/common-utils.h"
 #include "hmm/transition-model.h"
 #include "nnet3/nnet-discriminative-example.h"
+#include <random>
 
 int main(int argc, char *argv[]) {
   try {
@@ -49,7 +50,7 @@ int main(int argc, char *argv[]) {
 
     po.Read(argc, argv);
 
-    srand(srand_seed);
+    std::mt19937 g(srand_seed);
 
     if (po.NumArgs() != 2) {
       po.PrintUsage();
@@ -74,7 +75,7 @@ int main(int argc, char *argv[]) {
             example_reader.Key(),
             new NnetDiscriminativeExample(example_reader.Value())));
 
-      std::random_shuffle(egs.begin(), egs.end());
+      std::shuffle(egs.begin(), egs.end(), g);
     } else {
       KALDI_ASSERT(buffer_size > 0);
       egs.resize(buffer_size,
@@ -113,4 +114,3 @@ int main(int argc, char *argv[]) {
 }
 
 
-
diff --git a/src/nnet3bin/nnet3-discriminative-subset-egs.cc b/src/nnet3bin/nnet3-discriminative-subset-egs.cc
index 748665adbcf..f1a63c4e946 100644
--- a/src/nnet3bin/nnet3-discriminative-subset-egs.cc
+++ b/src/nnet3bin/nnet3-discriminative-subset-egs.cc
@@ -21,6 +21,7 @@
 #include "base/kaldi-common.h"
 #include "util/common-utils.h"
 #include "nnet3/nnet-discriminative-example.h"
+#include <random>
 
 int main(int argc, char *argv[]) {
   try {
@@ -37,7 +38,7 @@ int main(int argc, char *argv[]) {
         "\n"
         "e.g.\n"
         "nnet3-discriminative-copy-egs [args] ark:degs.1.ark ark:- | nnet-discriminative-subset-egs --n=1000 ark:- ark:subset.egs\n";
-    
+
     int32 srand_seed = 0;
     int32 n = 1000;
     bool randomize_order = true;
@@ -46,11 +47,11 @@ int main(int argc, char *argv[]) {
     po.Register("n", &n, "Number of examples to output");
     po.Register("randomize-order", &randomize_order, "If true, randomize the order "
                 "of the output");
-    
+
     po.Read(argc, argv);
-    
-    srand(srand_seed);
-    
+
+    std::mt19937 g(srand_seed);
+
     if (po.NumArgs() != 2) {
       po.PrintUsage();
       exit(1);
@@ -61,7 +62,7 @@ int main(int argc, char *argv[]) {
 
     std::vector<std::pair<std::string, NnetDiscriminativeExample> > egs;
     egs.reserve(n);
-    
+
     SequentialNnetDiscriminativeExampleReader example_reader(examples_rspecifier);
 
     int64 num_read = 0;
@@ -81,16 +82,16 @@ int main(int argc, char *argv[]) {
       }
     }
     if (randomize_order)
-      std::random_shuffle(egs.begin(), egs.end());
+      std::shuffle(egs.begin(), egs.end(), g);
 
     NnetDiscriminativeExampleWriter writer(examples_wspecifier);
     for (size_t i = 0; i < egs.size(); i++) {
       writer.Write(egs[i].first, egs[i].second);
     }
-    
+
     KALDI_LOG << "Selected a subset of " << egs.size() << " out of " << num_read
               << " neural-network discriminative training examples ";
-    
+
     return (num_read != 0 ? 0 : 1);
   } catch(const std::exception &e) {
     std::cerr << e.what() << '\n';
diff --git a/src/nnet3bin/nnet3-shuffle-egs.cc b/src/nnet3bin/nnet3-shuffle-egs.cc
index 1cf08085975..f456c5f8552 100644
--- a/src/nnet3bin/nnet3-shuffle-egs.cc
+++ b/src/nnet3bin/nnet3-shuffle-egs.cc
@@ -22,6 +22,7 @@
 #include "util/common-utils.h"
 #include "hmm/transition-model.h"
 #include "nnet3/nnet-example.h"
+#include <random>
 
 int main(int argc, char *argv[]) {
   try {
@@ -50,7 +51,8 @@ int main(int argc, char *argv[]) {
 
     po.Read(argc, argv);
 
-    srand(srand_seed);
+    std::mt19937 g(srand_seed);
+
 
     if (po.NumArgs() != 2) {
       po.PrintUsage();
@@ -74,7 +76,7 @@ int main(int argc, char *argv[]) {
         egs.push_back(std::make_pair(example_reader.Key(),
                                     new NnetExample(example_reader.Value())));
 
-      std::random_shuffle(egs.begin(), egs.end());
+      std::shuffle(egs.begin(), egs.end(), g);
     } else {
       KALDI_ASSERT(buffer_size > 0);
       egs.resize(buffer_size,
diff --git a/src/nnet3bin/nnet3-subset-egs.cc b/src/nnet3bin/nnet3-subset-egs.cc
index 2a52b80ac96..2ef23d2564f 100644
--- a/src/nnet3bin/nnet3-subset-egs.cc
+++ b/src/nnet3bin/nnet3-subset-egs.cc
@@ -21,6 +21,7 @@
 #include "base/kaldi-common.h"
 #include "util/common-utils.h"
 #include "nnet3/nnet-example.h"
+#include <random>
 
 int main(int argc, char *argv[]) {
   try {
@@ -37,7 +38,7 @@ int main(int argc, char *argv[]) {
         "\n"
         "e.g.\n"
         "nnet3-copy-egs [args] ark:egs.1.ark ark:- | nnet-subset-egs --n=1000 ark:- ark:subset.egs\n";
-    
+
     int32 srand_seed = 0;
     int32 n = 1000;
     bool randomize_order = true;
@@ -46,11 +47,11 @@ int main(int argc, char *argv[]) {
     po.Register("n", &n, "Number of examples to output");
     po.Register("randomize-order", &randomize_order, "If true, randomize the order "
                 "of the output");
-    
+
     po.Read(argc, argv);
-    
-    srand(srand_seed);
-    
+
+    std::mt19937 g(srand_seed);
+
     if (po.NumArgs() != 2) {
       po.PrintUsage();
       exit(1);
@@ -61,7 +62,7 @@ int main(int argc, char *argv[]) {
 
     std::vector<std::pair<std::string, NnetExample> > egs;
     egs.reserve(n);
-    
+
     SequentialNnetExampleReader example_reader(examples_rspecifier);
 
     int64 num_read = 0;
@@ -81,16 +82,16 @@ int main(int argc, char *argv[]) {
       }
     }
     if (randomize_order)
-      std::random_shuffle(egs.begin(), egs.end());
+      std::shuffle(egs.begin(), egs.end(), g);
 
     NnetExampleWriter writer(examples_wspecifier);
     for (size_t i = 0; i < egs.size(); i++) {
       writer.Write(egs[i].first, egs[i].second);
     }
-    
+
     KALDI_LOG << "Selected a subset of " << egs.size() << " out of " << num_read
               << " neural-network training examples ";
-    
+
     return (num_read != 0 ? 0 : 1);
   } catch(const std::exception &e) {
     std::cerr << e.what() << '\n';
diff --git a/src/rnnlm/sampler-test.cc b/src/rnnlm/sampler-test.cc
index ecebaf30694..d62510773fc 100644
--- a/src/rnnlm/sampler-test.cc
+++ b/src/rnnlm/sampler-test.cc
@@ -17,10 +17,11 @@
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.
 #include "base/kaldi-math.h"
-#include <limits>
-#include <numeric>
 #include "rnnlm/sampler.h"
 #include "util/stl-utils.h"
+#include <limits>
+#include <numeric>
+#include <random>
 
 namespace kaldi {
 namespace rnnlm {
@@ -56,6 +57,9 @@ bool NormalizedSquaredDiffLessThanThreshold(
 
 void UnitTestSampleWithoutReplacement() {
   int32 num_tries = 50;
+  std::random_device rd;
+  std::mt19937 g(rd());
+
   for (int32 t = 0; t < num_tries; t++) {
     std::vector<double> prob;
     int32 num_elements = RandInt(1, 100);
@@ -71,7 +75,7 @@ void UnitTestSampleWithoutReplacement() {
     }
     int32 total_ceil = std::ceil(total);
     prob[num_elements - 1] =  total_ceil - total;
-    std::random_shuffle(prob.begin(), prob.end());
+    std::shuffle(prob.begin(), prob.end(), g);
 
     std::vector<double> sample_total(prob.size());
     size_t l = 0;
diff --git a/src/rnnlm/sampler.cc b/src/rnnlm/sampler.cc
index 3fe11ea4196..30d9bc1c543 100644
--- a/src/rnnlm/sampler.cc
+++ b/src/rnnlm/sampler.cc
@@ -17,12 +17,13 @@
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.
 
+#include "base/kaldi-math.h"
+#include "rnnlm/sampler.h"
+#include "util/stl-utils.h"
 #include <algorithm>
 #include <numeric>
 #include <queue>
-#include "rnnlm/sampler.h"
-#include "base/kaldi-math.h"
-#include "util/stl-utils.h"
+#include <random>
 
 namespace kaldi {
 namespace rnnlm {
@@ -30,6 +31,8 @@ namespace rnnlm {
 
 void SampleWithoutReplacement(const std::vector<double> &probs,
                               std::vector<int32> *sample) {
+  std::random_device rd;
+  std::mt19937 g(rd());
 
   // This outer loop over 't' will *almost always* just run for t == 0.  The
   // loop is necessary only to handle a pathological case.
@@ -52,7 +55,7 @@ void SampleWithoutReplacement(const std::vector<double> &probs,
     // matter for most applications.
     std::vector<int32> order(n);
     for (int32 i = 0; i < n; i++) order[i] = i;
-    std::random_shuffle(order.begin(), order.end());
+    std::shuffle(order.begin(), order.end(), g);
 #endif
 
     double r = RandUniform();  // r <= 0 <= 1.
diff --git a/src/util/kaldi-table-test.cc b/src/util/kaldi-table-test.cc
index d3deaff0aaa..358e33e686a 100644
--- a/src/util/kaldi-table-test.cc
+++ b/src/util/kaldi-table-test.cc
@@ -24,6 +24,8 @@
 #include "util/kaldi-holder.h"
 #include "util/table-types.h"
 
+#include <random>
+
 namespace kaldi {
 
 void UnitTestReadScriptFile() {
@@ -841,6 +843,9 @@ void UnitTestTableRandomBothDouble(bool binary, bool read_scp,
 
 
 void UnitTestRangesMatrix(bool binary) {
+  std::random_device rd;
+  std::mt19937 g(rd());
+
   int32 archive_size = RandInt(1, 10);
   std::vector<std::pair<std::string, Matrix<BaseFloat> > > archive_contents(
       archive_size);
@@ -854,7 +859,7 @@ void UnitTestRangesMatrix(bool binary) {
     archive_contents[i].second.SetRandn();
   }
   if (RandInt(0, 1) == 0)
-    std::random_shuffle(archive_contents.begin(), archive_contents.end());
+    std::shuffle(archive_contents.begin(), archive_contents.end(), g);
 
   std::ostringstream writer_name;
   writer_name << "ark,scp";

From 3dd259bcd8eb7e207a000b1cbbf5778f522878d2 Mon Sep 17 00:00:00 2001
From: "Jan \"yenda\" Trmal" <jtrmal@gmail.com>
Date: Thu, 18 Aug 2022 03:42:36 -0400
Subject: [PATCH 10/18] [infra] docker images automatically using gh (#4782)

* [infra] docker images automatically using gh
* minor change
---
 .github/workflows/docker-images.yml | 70 +++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 .github/workflows/docker-images.yml

diff --git a/.github/workflows/docker-images.yml b/.github/workflows/docker-images.yml
new file mode 100644
index 00000000000..3abf6ca72fb
--- /dev/null
+++ b/.github/workflows/docker-images.yml
@@ -0,0 +1,70 @@
+name: Docker Image CI
+
+on:
+  schedule:
+  - cron: '37 2 * * 1'
+  workflow_dispatch:
+    inputs:
+      logLevel:
+        description: 'Log level'
+        required: true
+        default: 'warning'
+        type: choice
+        options:
+          - info
+          - warning
+          - debug
+#  pull_request: #for debugging purposes
+#    branches: [ "master" ]
+
+jobs:
+  enable_build:
+    if: github.repository == 'kaldi-asr/kaldi'
+    runs-on: ubuntu-latest
+    outputs:
+      enabled: ${{ steps.set-enabled.outputs.enabled }}
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+      - name: Set enabled
+        id: set-enabled
+        run: |
+          set -x
+          echo $(git rev-list  --after="1 week"  ${{ github.sha }})
+          if test -z $(git rev-list  --after="1 week"  ${{ github.sha }} | tail -n 1) ; then
+            enabled=false
+          else
+            enabled=true
+          fi
+          echo "enabled: $enabled"
+          echo "::set-output name=enabled::${enabled}"
+
+
+  docker-buildx:
+    needs: enable_build
+    if: needs.enable_build.outputs.enabled == 'true' || github.event_name == 'push' || github.event_name == 'workflow_dispatch'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Docker Buildx
+        id: buildx
+        uses: docker/setup-buildx-action@v2
+        with:
+          install: true
+      - name: Login to DockerHub
+        uses: docker/login-action@v2
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+      - name: Inspect builder
+        run: |
+          echo "Name:      ${{ steps.buildx.outputs.name }}"
+          echo "Endpoint:  ${{ steps.buildx.outputs.endpoint }}"
+          echo "Status:    ${{ steps.buildx.outputs.status }}"
+          echo "Flags:     ${{ steps.buildx.outputs.flags }}"
+          echo "Platforms: ${{ steps.buildx.outputs.platforms }}"
+      - name: Build and push
+        run: |
+          cd docker/ubuntu18.04-cuda10.0/
+          docker build --push --tag kaldiasr/kaldi:gpu-latest --tag kaldiasr/kaldi:gpu-ubuntu18.04-cuda10.0 --tag kaldiasr/kaldi:gpu-ubuntu18.04-cuda10.0-$(date +%s) .

From 4592c54774e7101ccf4edc4a8619408e1b1030d4 Mon Sep 17 00:00:00 2001
From: "Jan \"yenda\" Trmal" <jtrmal@gmail.com>
Date: Thu, 18 Aug 2022 05:00:59 -0400
Subject: [PATCH 11/18] [infra] add cpu-only docker build  (#4783)

---
 .github/workflows/docker-images.yml | 37 +++++++++++++++++++++++++++--
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/docker-images.yml b/.github/workflows/docker-images.yml
index 3abf6ca72fb..d0bb01c5bf6 100644
--- a/.github/workflows/docker-images.yml
+++ b/.github/workflows/docker-images.yml
@@ -41,7 +41,7 @@ jobs:
           echo "::set-output name=enabled::${enabled}"
 
 
-  docker-buildx:
+  docker-buildx-gpu:
     needs: enable_build
     if: needs.enable_build.outputs.enabled == 'true' || github.event_name == 'push' || github.event_name == 'workflow_dispatch'
     runs-on: ubuntu-latest
@@ -67,4 +67,37 @@ jobs:
       - name: Build and push
         run: |
           cd docker/ubuntu18.04-cuda10.0/
-          docker build --push --tag kaldiasr/kaldi:gpu-latest --tag kaldiasr/kaldi:gpu-ubuntu18.04-cuda10.0 --tag kaldiasr/kaldi:gpu-ubuntu18.04-cuda10.0-$(date +%s) .
+          docker build --push --tag kaldiasr/kaldi:gpu-latest --tag kaldiasr/kaldi:gpu-ubuntu18.04-cuda10.0 --tag kaldiasr/kaldi:gpu-ubuntu18.04-cuda10.0-$(date +%F) .
+
+  docker-buildx-cpu:
+    needs: enable_build
+    if: needs.enable_build.outputs.enabled == 'true' || github.event_name == 'push' || github.event_name == 'workflow_dispatch'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Docker Buildx
+        id: buildx
+        uses: docker/setup-buildx-action@v2
+        with:
+          install: true
+      - name: Login to DockerHub
+        uses: docker/login-action@v2
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+      - name: Inspect builder
+        run: |
+          echo "Name:      ${{ steps.buildx.outputs.name }}"
+          echo "Endpoint:  ${{ steps.buildx.outputs.endpoint }}"
+          echo "Status:    ${{ steps.buildx.outputs.status }}"
+          echo "Flags:     ${{ steps.buildx.outputs.flags }}"
+          echo "Platforms: ${{ steps.buildx.outputs.platforms }}"
+      - name: Build and push
+        run: |
+          cd docker/debian10-cpu/
+          docker build --push \
+                       --tag kaldiasr/kaldi:latest \
+                       --tag kaldiasr/kaldi:cpu-latest  \
+                       --tag kaldiasr/kaldi:cpu-debian10 \
+                       --tag kaldiasr/kaldi:cpu-debian10-$(date +%F) .
+

From 0fb502d10047ab702c664db4d7e175d282762539 Mon Sep 17 00:00:00 2001
From: Agrover112 <42321810+Agrover112@users.noreply.github.com>
Date: Sun, 28 Aug 2022 15:45:47 +0530
Subject: [PATCH 12/18] Fix: ali-to-post piping in post-to-tacc example (#4788)

The example for the post-to-tacc fails , but with the correct of `ark:- |`  there is no piping error
---
 src/bin/post-to-tacc.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/bin/post-to-tacc.cc b/src/bin/post-to-tacc.cc
index afa5315d6b4..d50a2a60029 100644
--- a/src/bin/post-to-tacc.cc
+++ b/src/bin/post-to-tacc.cc
@@ -35,7 +35,7 @@ int main(int argc, char *argv[]) {
         "Note: the model is only read in order to get the size of the vector\n"
         "\n"
         "Usage: post-to-tacc [options] <model> <post-rspecifier> <accs>\n"
-        " e.g.: post-to-tacc --binary=false 1.mdl \"ark:ali-to-post 1.ali|\" 1.tacc\n"
+        " e.g.: post-to-tacc --binary=false 1.mdl \"ark:ali-to-post 1.ali ark:-|\" 1.tacc\n"
         "See also: get-post-on-ali\n";
 
     bool binary = true;

From a754c189f406d0dd705712d8022275ee3500c8f7 Mon Sep 17 00:00:00 2001
From: Jonghwan Hyeon <hyeon0145@gmail.com>
Date: Thu, 1 Sep 2022 06:58:41 +0900
Subject: [PATCH 13/18] [scripts] Copy utt2lang in copy_data_dir.sh (#4789)

Coauthored-By: Jonghwan Hyeon <hyeon0145@gmail.com>
---
 egs/wsj/s5/utils/copy_data_dir.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/egs/wsj/s5/utils/copy_data_dir.sh b/egs/wsj/s5/utils/copy_data_dir.sh
index 9fd420c42a5..3374baf9bdb 100755
--- a/egs/wsj/s5/utils/copy_data_dir.sh
+++ b/egs/wsj/s5/utils/copy_data_dir.sh
@@ -84,6 +84,10 @@ if [ -f $srcdir/vad.scp ]; then
   utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
 fi
 
+if [ -f $srcdir/utt2lang ]; then
+  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2lang >$destdir/utt2lang
+fi
+
 if [ -f $srcdir/segments ]; then
   utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
   cp $srcdir/wav.scp $destdir

From 7bc53ef85f1076b167c9e15216dc97081899cd07 Mon Sep 17 00:00:00 2001
From: Taku Takamatsu <59686013+taku-takamatsu@users.noreply.github.com>
Date: Wed, 31 Aug 2022 19:04:48 -0700
Subject: [PATCH 14/18] [scripts] Move otherwise misleading comment in
 mkgraph.sh (#4787)

---
 egs/wsj/s5/utils/mkgraph.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/egs/wsj/s5/utils/mkgraph.sh b/egs/wsj/s5/utils/mkgraph.sh
index c2bf206b571..dcca77913b1 100755
--- a/egs/wsj/s5/utils/mkgraph.sh
+++ b/egs/wsj/s5/utils/mkgraph.sh
@@ -50,7 +50,6 @@ dir=$3
 
 mkdir -p $dir
 
-# If $lang/tmp/LG.fst does not exist or is older than its sources, make it...
 # (note: the [[ ]] brackets make the || type operators work (inside [ ], we
 # would have to use -o instead),  -f means file exists, and -ot means older than).
 
@@ -93,6 +92,8 @@ fi
 
 mkdir -p $lang/tmp
 trap "rm -f $lang/tmp/LG.fst.$$" EXIT HUP INT PIPE TERM
+
+# If $lang/tmp/LG.fst does not exist or is older than its sources, make it...
 # Note: [[ ]] is like [ ] but enables certain extra constructs, e.g. || in
 # place of -o
 if [[ ! -s $lang/tmp/LG.fst || $lang/tmp/LG.fst -ot $lang/G.fst || \

From 727e454840df95fa792624251b70afa939186aa3 Mon Sep 17 00:00:00 2001
From: bhuang <bofenghuang7@gmail.com>
Date: Wed, 14 Sep 2022 06:17:39 +0200
Subject: [PATCH 15/18] [egs] Kill feral dupes of --num-threads in few local eg
 scripts (#4792)

---
 egs/mgb2_arabic/s5/local/run_cleanup_segmentation.sh | 3 +--
 egs/tedlium/s5_r2/local/run_cleanup_segmentation.sh  | 3 +--
 egs/tedlium/s5_r3/local/run_cleanup_segmentation.sh  | 3 +--
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/egs/mgb2_arabic/s5/local/run_cleanup_segmentation.sh b/egs/mgb2_arabic/s5/local/run_cleanup_segmentation.sh
index ac5f0cb9009..e3ffffb1e99 100755
--- a/egs/mgb2_arabic/s5/local/run_cleanup_segmentation.sh
+++ b/egs/mgb2_arabic/s5/local/run_cleanup_segmentation.sh
@@ -59,8 +59,7 @@ if [ $stage -le 4 ]; then
   utils/mkgraph.sh data/lang ${cleaned_dir} ${cleaned_dir}/graph
 
   for dset in dev test; do
-    steps/decode_fmllr.sh --nj $decode_nj --num-threads $decode_num_threads \
-       --cmd "$decode_cmd"  --num-threads 4 \
+    steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads $decode_num_threads \
        ${cleaned_dir}/graph data/${dset} ${cleaned_dir}/decode_${dset}
     steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \
        data/${dset} ${cleaned_dir}/decode_${dset} ${cleaned_dir}/decode_${dset}_rescore
diff --git a/egs/tedlium/s5_r2/local/run_cleanup_segmentation.sh b/egs/tedlium/s5_r2/local/run_cleanup_segmentation.sh
index ac5f0cb9009..e3ffffb1e99 100755
--- a/egs/tedlium/s5_r2/local/run_cleanup_segmentation.sh
+++ b/egs/tedlium/s5_r2/local/run_cleanup_segmentation.sh
@@ -59,8 +59,7 @@ if [ $stage -le 4 ]; then
   utils/mkgraph.sh data/lang ${cleaned_dir} ${cleaned_dir}/graph
 
   for dset in dev test; do
-    steps/decode_fmllr.sh --nj $decode_nj --num-threads $decode_num_threads \
-       --cmd "$decode_cmd"  --num-threads 4 \
+    steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads $decode_num_threads \
        ${cleaned_dir}/graph data/${dset} ${cleaned_dir}/decode_${dset}
     steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \
        data/${dset} ${cleaned_dir}/decode_${dset} ${cleaned_dir}/decode_${dset}_rescore
diff --git a/egs/tedlium/s5_r3/local/run_cleanup_segmentation.sh b/egs/tedlium/s5_r3/local/run_cleanup_segmentation.sh
index ac5f0cb9009..e3ffffb1e99 100755
--- a/egs/tedlium/s5_r3/local/run_cleanup_segmentation.sh
+++ b/egs/tedlium/s5_r3/local/run_cleanup_segmentation.sh
@@ -59,8 +59,7 @@ if [ $stage -le 4 ]; then
   utils/mkgraph.sh data/lang ${cleaned_dir} ${cleaned_dir}/graph
 
   for dset in dev test; do
-    steps/decode_fmllr.sh --nj $decode_nj --num-threads $decode_num_threads \
-       --cmd "$decode_cmd"  --num-threads 4 \
+    steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads $decode_num_threads \
        ${cleaned_dir}/graph data/${dset} ${cleaned_dir}/decode_${dset}
     steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \
        data/${dset} ${cleaned_dir}/decode_${dset} ${cleaned_dir}/decode_${dset}_rescore

From 299dadcf78ed56f504164d103f968a01eec1413a Mon Sep 17 00:00:00 2001
From: npovey <24726380+npovey@users.noreply.github.com>
Date: Mon, 19 Sep 2022 23:03:46 -0700
Subject: [PATCH 16/18] change [utter] to [utterance.] in data_prep.dox (#4795)

Co-authored-by: npovey <you@example.com>
---
 src/doc/data_prep.dox | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/doc/data_prep.dox b/src/doc/data_prep.dox
index d200025e4b2..8849b4e0f89 100644
--- a/src/doc/data_prep.dox
+++ b/src/doc/data_prep.dox
@@ -109,7 +109,7 @@ get mapped to a word specified in the file data/lang/oov.txt.
 It needs to be the case that when you sort both the utt2spk and spk2utt files,
 the orders "agree", e.g. the list of speaker-ids extracted from the utt2spk file
 is the same as the string sorted order.  The easiest way to make this happen is
-to make the speaker-ids a prefix of the utter Although, in this particular
+to make the speaker-ids a prefix of the utterrance. Although, in this particular
 example we have used an underscore to separate the "speaker" and "utterance"
 parts of the utterance-id, in general it is probably safer to use a dash ("-").
 This is because it has a lower ASCII value; if the speaker-ids vary in length,

From 716f558fa522c417ffa3ff0232df7098bfc5f5db Mon Sep 17 00:00:00 2001
From: Kavya Manohar <sakhi.kavya@gmail.com>
Date: Tue, 20 Sep 2022 11:34:29 +0530
Subject: [PATCH 17/18] Provide provision to pass subword separator as argument
 to make_position_dependent_subword_lexicon.py (#4794)

---
 egs/wsj/s5/utils/subword/prepare_lang_subword.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/egs/wsj/s5/utils/subword/prepare_lang_subword.sh b/egs/wsj/s5/utils/subword/prepare_lang_subword.sh
index 51a0f8c2353..afe87ebb72f 100755
--- a/egs/wsj/s5/utils/subword/prepare_lang_subword.sh
+++ b/egs/wsj/s5/utils/subword/prepare_lang_subword.sh
@@ -163,7 +163,7 @@ if $position_dependent_phones; then
   if "$silprob"; then
     echo "$0: Currently we do not support word-dependent silence probability" && exit 1;
   else
-    utils/lang/make_position_dependent_subword_lexicon.py $srcdir/lexiconp.txt > $tmpdir/lexiconp.txt || exit 1;
+    utils/lang/make_position_dependent_subword_lexicon.py --separator $separator $srcdir/lexiconp.txt > $tmpdir/lexiconp.txt || exit 1;
   fi
 
   # create $tmpdir/phone_map.txt

From f6f4ccaf213f0fe8b26e633a7dc0c802150626a0 Mon Sep 17 00:00:00 2001
From: xu-gaopeng <xu_gaopeng@sina.com>
Date: Mon, 26 Sep 2022 22:42:48 +0800
Subject: [PATCH 18/18] Update run_blstm.sh (#4790)

* Update run_blstm.sh

fix bug aspire run_blstm.sh

* Update egs/aspire/s5/local/nnet3/run_blstm.sh

Co-authored-by: Cy 'kkm' Katsnelson <kkm@pobox.com>

Co-authored-by: Cy 'kkm' Katsnelson <kkm@pobox.com>
---
 egs/aspire/s5/local/nnet3/run_blstm.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/egs/aspire/s5/local/nnet3/run_blstm.sh b/egs/aspire/s5/local/nnet3/run_blstm.sh
index 87dde580333..847af67b385 100755
--- a/egs/aspire/s5/local/nnet3/run_blstm.sh
+++ b/egs/aspire/s5/local/nnet3/run_blstm.sh
@@ -89,9 +89,9 @@ if [ $stage -le 7 ]; then
   fast-lstmp-layer name=blstm2-backward input=Append(blstm1-forward, blstm1-backward) delay=2 $lstm_opts
 
   fast-lstmp-layer name=blstm3-forward input=Append(blstm2-forward, blstm2-backward) delay=-3 $lstm_opts
-  fast-lstmp-layer name=blstm3-backward input=Append(blstm2-forward, blstm2-backward delay=3 $lstm_opts
+  fast-lstmp-layer name=blstm3-backward input=Append(blstm2-forward, blstm2-backward) delay=3 $lstm_opts
 
-  output-layer name=output output-delay=$label_delay dim=$num_targets max-change=1.5
+  output-layer name=output input=Append(blstm3-forward, blstm3-backward) include-log-softmax=false output-delay=0 dim=$num_targets max-change=1.5
 EOF
   steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs || exit 1
 fi