From a3a18e2527f911d5d3425b7caa0911b868d31691 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Thu, 30 Aug 2018 17:29:13 -0400
Subject: [PATCH 01/37] adding changes for language modelling

---
 egs/iam/v2/cmd.sh                             |   0
 egs/iam/v2/local/chain/run_cnn_e2eali.sh      |   2 +-
 .../local/chain/tuning/run_cnn_e2eali_1c.sh   |   2 +-
 .../local/chain/tuning/run_cnn_e2eali_1d.sh   | 246 ++++++++++++++++++
 egs/iam/v2/local/process_corpus.py            |  30 +++
 5 files changed, 278 insertions(+), 2 deletions(-)
 mode change 100644 => 100755 egs/iam/v2/cmd.sh
 create mode 100755 egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
 create mode 100755 egs/iam/v2/local/process_corpus.py

diff --git a/egs/iam/v2/cmd.sh b/egs/iam/v2/cmd.sh
old mode 100644
new mode 100755
diff --git a/egs/iam/v2/local/chain/run_cnn_e2eali.sh b/egs/iam/v2/local/chain/run_cnn_e2eali.sh
index ad51803ab0e..da731bcb0b1 120000
--- a/egs/iam/v2/local/chain/run_cnn_e2eali.sh
+++ b/egs/iam/v2/local/chain/run_cnn_e2eali.sh
@@ -1 +1 @@
-tuning/run_cnn_e2eali_1c.sh
\ No newline at end of file
+tuning/run_cnn_e2eali_1d.sh
\ No newline at end of file
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh
index ef851c8ae2f..48e0a76dead 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh
@@ -26,7 +26,7 @@ stage=0
 nj=30
 train_set=train
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
-affix=_1b6  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+affix=_1c  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
 e2echain_model_dir=exp/chain/e2e_cnn_1a
 common_egs_dir=
 reporting_email=
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
new file mode 100755
index 00000000000..d8aa2561f17
--- /dev/null
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
@@ -0,0 +1,246 @@
+#!/bin/bash
+
+# e2eali_1c is the same as e2eali_1b but has fewer CNN layers, smaller
+# l2-regularize, more epochs and uses dropout.
+
+
+# local/chain/compare_wer.sh exp/chain/cnn_e2eali_1b exp/chain/cnn_e2eali_1c
+# System                      cnn_e2eali_1b cnn_e2eali_1c
+# WER                             10.33     10.05
+# WER (rescored)                  10.10      9.75
+# CER                              5.00      4.76
+# CER (rescored)                   4.88      4.68
+# Final train prob              -0.0428   -0.0317
+# Final valid prob              -0.0666   -0.0630
+# Final train prob (xent)       -0.9210   -0.5413
+# Final valid prob (xent)       -1.0264   -0.7096
+# Parameters                      3.98M     5.12M
+
+# steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1c
+# exp/chain/cnn_e2eali_1c: num-iters=21 nj=2..4 num-params=5.1M dim=40->392 combine=-0.034->-0.034 (over 1) xent:train/valid[13,20,final]=(-0.953,-0.800,-0.541/-1.03,-0.933,-0.710) logprob:train/valid[13,20,final]=(-0.069,-0.048,-0.032/-0.091,-0.078,-0.063)
+
+set -e -o pipefail
+
+stage=0
+
+nj=30
+train_set=train
+nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
+affix=_1d  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+e2echain_model_dir=exp/chain/e2e_cnn_1a
+common_egs_dir=
+reporting_email=
+
+# chain options
+train_stage=-10
+xent_regularize=0.1
+frame_subsampling_factor=4
+# training chunk-options
+chunk_width=340,300,200,100
+num_leaves=500
+# we don't need extra left/right context for TDNN systems.
+chunk_left_context=0
+chunk_right_context=0
+tdnn_dim=550
+# training options
+srand=0
+remove_egs=true
+#lang_decode=data.new/lang
+#lang_rescore=data.new/lang_rescore_6g
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
+
+dropout_schedule='0,0@0.20,0.2@0.50,0'
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+ali_dir=exp/chain/e2e_ali_train
+lat_dir=exp/chain${nnet3_affix}/e2e_${train_set}_lats
+dir=exp/chain${nnet3_affix}/cnn_e2eali${affix}
+#dir=exp/chain/cnn_e2eali_1c
+train_data_dir=data/${train_set}
+tree_dir=exp/chain${nnet3_affix}/tree_e2e
+
+# the 'lang' directory is created by this script.
+# If you create such a directory with a non-standard topology
+# you should probably name it differently.
+lang=data/lang_chain
+for f in $train_data_dir/feats.scp $ali_dir/ali.1.gz $ali_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+
+if [ $stage -le 1 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 2 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
+                            --acoustic-scale 1.0 \
+                            --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
+                            ${train_data_dir} data/lang $e2echain_model_dir $lat_dir
+  echo "" >$lat_dir/splice_opts
+fi
+
+if [ $stage -le 3 ]; then
+  # Build a tree using our new topology.  We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.  The num-leaves is always somewhat less than the num-leaves from
+  # the GMM baseline.
+  if [ -f $tree_dir/final.mdl ]; then
+    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+    exit 1;
+  fi
+
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor $frame_subsampling_factor \
+    --alignment-subsampling-factor 1 \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$cmd" $num_leaves ${train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+
+if [ $stage -le 4 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  cnn_opts="l2-regularize=0.03 dropout-proportion=0.0"
+  tdnn_opts="l2-regularize=0.03"
+  output_opts="l2-regularize=0.04"
+  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=90"
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+
+  conv-relu-batchnorm-dropout-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-dropout-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-dropout-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-dropout-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-dropout-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common3 height-subsample-out=2
+  relu-batchnorm-dropout-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+  relu-batchnorm-dropout-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+  relu-batchnorm-dropout-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
+  relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 5 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage=$train_stage \
+    --cmd="$cmd" \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.00005 \
+    --chain.apply-deriv-weights=true \
+    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
+    --chain.frame-subsampling-factor=$frame_subsampling_factor \
+    --chain.alignment-subsampling-factor=1 \
+    --chain.left-tolerance 3 \
+    --chain.right-tolerance 3 \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=8 \
+    --trainer.frames-per-iter=1500000 \
+    --trainer.optimization.num-jobs-initial=2 \
+    --trainer.optimization.num-jobs-final=4 \
+    --trainer.dropout-schedule $dropout_schedule \
+    --trainer.optimization.initial-effective-lrate=0.001 \
+    --trainer.optimization.final-effective-lrate=0.0001 \
+    --trainer.optimization.shrink-value=1.0 \
+    --trainer.num-chunk-per-minibatch=32,16 \
+    --trainer.optimization.momentum=0.0 \
+    --egs.chunk-width=$chunk_width \
+    --egs.chunk-left-context=$chunk_left_context \
+    --egs.chunk-right-context=$chunk_right_context \
+    --egs.chunk-left-context-initial=0 \
+    --egs.chunk-right-context-final=0 \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir=$train_data_dir \
+    --tree-dir=$tree_dir \
+    --lat-dir=$lat_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 6 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 $lang_decode \
+    $dir $dir/graph || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+    --extra-left-context $chunk_left_context \
+    --extra-right-context $chunk_right_context \
+    --extra-left-context-initial 0 \
+    --extra-right-context-final 0 \
+    --frames-per-chunk $frames_per_chunk \
+    --nj $nj --cmd "$cmd" \
+    $dir/graph data/test $dir/decode_test || exit 1;
+
+  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/test $dir/decode_test{,_rescored} || exit 1
+fi
diff --git a/egs/iam/v2/local/process_corpus.py b/egs/iam/v2/local/process_corpus.py
new file mode 100755
index 00000000000..9f8e1d275d3
--- /dev/null
+++ b/egs/iam/v2/local/process_corpus.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+# Copyright      2018  Ashish Arora
+# Apache 2.0
+# This script reads valid phones and removes the lines in the corpus
+# which have any other phone.
+
+import os
+import sys, io
+
+phone_file = os.path.join('data/local/phones.txt')
+infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
+output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+phone_dict = dict()
+with open(phone_file, 'r', encoding='utf-8') as phone_fh:
+    for line in phone_fh:
+        line = line.strip().split()[0]
+        phone_dict[line] = line
+
+phone_dict[' '] = ' '
+corpus_text = list()
+for line in infile:
+    text = line.strip()
+    skip_text = False
+    for phone in text:
+        if phone not in phone_dict.keys():
+            skip_text = True
+            break
+    if not skip_text:
+        output.write(text+ '\n')
+

From 91508b5f322b06f5d214a7fa90bb9b375f359252 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Thu, 30 Aug 2018 22:35:10 -0400
Subject: [PATCH 02/37] adding modifications for augmentation, topology,
 shearing, run.sh

---
 egs/iam/v2/local/augment_data.sh              |  33 ++
 egs/iam/v2/local/chain/run_cnn_1a.sh          | 241 +++++++++
 egs/iam/v2/local/chain/run_cnn_chainali_1c.sh | 247 +++++++++
 .../local/chain/tuning/run_cnn_e2eali_1d.sh   |  13 +-
 egs/iam/v2/local/extract_features.sh          |  48 ++
 egs/iam/v2/local/gen_topo.py                  |  93 ++++
 egs/iam/v2/local/make_features.py             | 193 ++++++-
 egs/iam/v2/local/prepare_data.sh              |   5 +-
 egs/iam/v2/local/prepare_lang.sh              | 474 ++++++++++++++++++
 egs/iam/v2/local/score.sh                     | 155 +-----
 egs/iam/v2/run.sh                             | 174 +++++++
 egs/iam/v2/run_end2end.sh                     |  53 +-
 egs/wsj/s5/utils/copy_data_dir.sh             |  10 +-
 13 files changed, 1537 insertions(+), 202 deletions(-)
 create mode 100755 egs/iam/v2/local/augment_data.sh
 create mode 100755 egs/iam/v2/local/chain/run_cnn_1a.sh
 create mode 100755 egs/iam/v2/local/chain/run_cnn_chainali_1c.sh
 create mode 100755 egs/iam/v2/local/extract_features.sh
 create mode 100755 egs/iam/v2/local/gen_topo.py
 create mode 100755 egs/iam/v2/local/prepare_lang.sh
 create mode 100755 egs/iam/v2/run.sh

diff --git a/egs/iam/v2/local/augment_data.sh b/egs/iam/v2/local/augment_data.sh
new file mode 100755
index 00000000000..443a16874f2
--- /dev/null
+++ b/egs/iam/v2/local/augment_data.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+# Copyright   2018 Hossein Hadian
+#             2018 Ashish Arora
+
+# Apache 2.0
+# This script performs data augmentation.
+
+nj=4
+cmd=run.pl
+feat_dim=40
+echo "$0 $@"
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh || exit 1;
+
+srcdir=$1
+outdir=$2
+datadir=$3
+
+mkdir -p $datadir/augmentations
+echo "copying $srcdir to $datadir/augmentations/aug1"
+utils/copy_data_dir.sh --spk-prefix aug1- --utt-prefix aug1- $srcdir $datadir/augmentations/aug1
+
+echo " copying allowed length for training with augmented data..."
+cat $srcdir/allowed_lengths.txt > $datadir/augmentations/aug1/allowed_lengths.txt
+
+echo " Extracting features, creating feats.scp file for augmentated data"
+local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim $feat_dim --fliplr false --augment true $datadir/augmentations/aug1
+
+echo " combine original data and data from different augmentations"
+utils/combine_data.sh --extra-files images.scp $outdir $srcdir $datadir/augmentations/aug1
+cat $srcdir/allowed_lengths.txt > $outdir/allowed_lengths.txt
diff --git a/egs/iam/v2/local/chain/run_cnn_1a.sh b/egs/iam/v2/local/chain/run_cnn_1a.sh
new file mode 100755
index 00000000000..41a76920e37
--- /dev/null
+++ b/egs/iam/v2/local/chain/run_cnn_1a.sh
@@ -0,0 +1,241 @@
+#!/bin/bash
+
+# Copyright    2017 Hossein Hadian
+#              2017 Chun Chieh Chang
+#              2017 Ashish Arora
+
+# steps/info/chain_dir_info.pl exp/chain/cnn_1a/
+# exp/chain/cnn_1a/: num-iters=21 nj=2..4 num-params=4.4M dim=40->364 combine=-0.021->-0.015 xent:train/valid[13,20,final]=(-1.05,-0.701,-0.591/-1.30,-1.08,-1.00) logprob:train/valid[13,20,final]=(-0.061,-0.034,-0.030/-0.107,-0.101,-0.098)
+
+# local/chain/compare_wer.sh exp/chain/cnn_1a/
+# System                         cnn_1a
+# WER                             18.52
+# CER                             10.07
+# Final train prob              -0.0077
+# Final valid prob              -0.0970
+# Final train prob (xent)       -0.5484
+# Final valid prob (xent)       -0.9643
+# Parameters                      4.36M
+
+set -e -o pipefail
+
+stage=0
+
+nj=30
+train_set=train
+gmm=tri3        # this is the source gmm-dir that we'll use for alignments; it
+                # should have alignments for the specified training data.
+nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
+affix=_1a  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+ali=tri3_ali
+common_egs_dir=
+reporting_email=
+
+# chain options
+train_stage=-10
+xent_regularize=0.1
+frame_subsampling_factor=4
+alignment_subsampling_factor=1
+# training chunk-options
+chunk_width=340,300,200,100
+num_leaves=500
+# we don't need extra left/right context for TDNN systems.
+chunk_left_context=0
+chunk_right_context=0
+tdnn_dim=450
+# training options
+srand=0
+remove_egs=false
+lang_test=lang_unk
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+gmm_dir=exp/${gmm}
+ali_dir=exp/${ali}
+lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_lats
+dir=exp/chain${nnet3_affix}/cnn${affix}
+train_data_dir=data/${train_set}
+tree_dir=exp/chain${nnet3_affix}/tree
+
+# the 'lang' directory is created by this script.
+# If you create such a directory with a non-standard topology
+# you should probably name it differently.
+lang=data/lang_chain
+
+for f in $train_data_dir/feats.scp \
+    $train_data_dir/feats.scp $gmm_dir/final.mdl \
+    $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+
+if [ $stage -le 1 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 2 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/align_fmllr_lats.sh --nj $nj --cmd "$cmd" ${train_data_dir} \
+    data/lang $gmm_dir $lat_dir
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 3 ]; then
+  # Build a tree using our new topology.  We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.  The num-leaves is always somewhat less than the num-leaves from
+  # the GMM baseline.
+   if [ -f $tree_dir/final.mdl ]; then
+     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+     exit 1;
+  fi
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor $frame_subsampling_factor \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$cmd" $num_leaves ${train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+
+if [ $stage -le 4 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  common1="height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+
+  conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
+  relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-4,0,4) dim=$tdnn_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' mod?els... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn4 dim=$tdnn_dim target-rms=0.5
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 5 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage=$train_stage \
+    --cmd="$cmd" \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.00005 \
+    --chain.apply-deriv-weights=false \
+    --chain.lm-opts="--num-extra-lm-states=500" \
+    --chain.frame-subsampling-factor=$frame_subsampling_factor \
+    --chain.alignment-subsampling-factor=$frame_subsampling_factor \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=4 \
+    --trainer.frames-per-iter=1000000 \
+    --trainer.optimization.num-jobs-initial=2 \
+    --trainer.optimization.num-jobs-final=4 \
+    --trainer.optimization.initial-effective-lrate=0.001 \
+    --trainer.optimization.final-effective-lrate=0.0001 \
+    --trainer.optimization.shrink-value=1.0 \
+    --trainer.num-chunk-per-minibatch=64,32 \
+    --trainer.optimization.momentum=0.0 \
+    --egs.chunk-width=$chunk_width \
+    --egs.chunk-left-context=$chunk_left_context \
+    --egs.chunk-right-context=$chunk_right_context \
+    --egs.chunk-left-context-initial=0 \
+    --egs.chunk-right-context-final=0 \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts="--frames-overlap-per-eg 0" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir=$train_data_dir \
+    --tree-dir=$tree_dir \
+    --lat-dir=$lat_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 6 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 data/$lang_test \
+    $dir $dir/graph || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+    --extra-left-context $chunk_left_context \
+    --extra-right-context $chunk_right_context \
+    --extra-left-context-initial 0 \
+    --extra-right-context-final 0 \
+    --frames-per-chunk $frames_per_chunk \
+    --nj $nj --cmd "$cmd" \
+    $dir/graph data/test $dir/decode_test || exit 1;
+fi
diff --git a/egs/iam/v2/local/chain/run_cnn_chainali_1c.sh b/egs/iam/v2/local/chain/run_cnn_chainali_1c.sh
new file mode 100755
index 00000000000..54c52d913de
--- /dev/null
+++ b/egs/iam/v2/local/chain/run_cnn_chainali_1c.sh
@@ -0,0 +1,247 @@
+#!/bin/bash
+
+# chainali_1c is as chainali_1b except it uses l2-regularize
+# local/chain/compare_wer.sh exp/chain/cnn_chainali_1b exp/chain/cnn_chainali_1c
+# System                      cnn_chainali_1b cnn_chainali_1c
+# WER                             14.38     12.72
+# CER                              7.14      5.99
+# Final train prob              -0.0113   -0.0291
+# Final valid prob              -0.0400   -0.0359
+# Final train prob (xent)       -0.6043   -0.9781
+# Final valid prob (xent)       -0.9030   -1.1544
+# Parameters                      3.96M     3.96M
+
+# steps/info/chain_dir_info.pl exp/chain/cnn_chainali_1c
+# exp/chain/cnn_chainali_1c: num-iters=21 nj=2..4 num-params=4.0M dim=40->369 combine=-0.007->-0.007 (over 1) xent:train/valid[13,20,final]=(-1.44,-1.05,-0.997/-1.53,-1.19,-1.15) logprob:train/valid[13,20,final]=(-0.056,-0.020,-0.012/-0.056,-0.025,-0.020)
+
+set -e -o pipefail
+
+stage=0
+
+nj=30
+train_set=train
+gmm=tri3        # this is the source gmm-dir that we'll use for alignments; it
+                # should have alignments for the specified training data.
+nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
+affix=_1c  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+ali=tri3_ali
+chain_model_dir=exp/chain${nnet3_affix}/cnn_1a
+common_egs_dir=
+reporting_email=
+
+# chain options
+train_stage=-10
+xent_regularize=0.1
+frame_subsampling_factor=4
+# training chunk-options
+chunk_width=340,300,200,100
+num_leaves=500
+# we don't need extra left/right context for TDNN systems.
+chunk_left_context=0
+chunk_right_context=0
+tdnn_dim=450
+# training options
+srand=0
+remove_egs=false
+lang_test=lang_unk
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+gmm_dir=exp/${gmm}
+ali_dir=exp/${ali}
+lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_lats_chain
+gmm_lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_lats
+dir=exp/chain${nnet3_affix}/cnn_chainali${affix}
+train_data_dir=data/${train_set}
+tree_dir=exp/chain${nnet3_affix}/tree_chain
+
+# the 'lang' directory is created by this script.
+# If you create such a directory with a non-standard topology
+# you should probably name it differently.
+lang=data/lang_chain
+for f in $train_data_dir/feats.scp \
+    $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+
+if [ $stage -le 1 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 2 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
+                            --acoustic-scale 1.0 \
+                            --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
+                            ${train_data_dir} data/lang $chain_model_dir $lat_dir
+  cp $gmm_lat_dir/splice_opts $lat_dir/splice_opts
+fi
+
+if [ $stage -le 3 ]; then
+  # Build a tree using our new topology.  We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.  The num-leaves is always somewhat less than the num-leaves from
+  # the GMM baseline.
+   if [ -f $tree_dir/final.mdl ]; then
+     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+     exit 1;
+  fi
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor $frame_subsampling_factor \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$cmd" $num_leaves ${train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+
+if [ $stage -le 4 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  cnn_opts="l2-regularize=0.075"
+  tdnn_opts="l2-regularize=0.075"
+  output_opts="l2-regularize=0.1"
+  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+
+  conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-1,0,1 $common3
+  conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-1,0,1 $common3
+  relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim $tdnn_opts
+  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
+  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' mod?els... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 5 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage=$train_stage \
+    --cmd="$cmd" \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.00005 \
+    --chain.apply-deriv-weights=false \
+    --chain.lm-opts="--num-extra-lm-states=500" \
+    --chain.frame-subsampling-factor=$frame_subsampling_factor \
+    --chain.alignment-subsampling-factor=1 \
+    --chain.left-tolerance 3 \
+    --chain.right-tolerance 3 \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=4 \
+    --trainer.frames-per-iter=1000000 \
+    --trainer.optimization.num-jobs-initial=2 \
+    --trainer.optimization.num-jobs-final=4 \
+    --trainer.optimization.initial-effective-lrate=0.001 \
+    --trainer.optimization.final-effective-lrate=0.0001 \
+    --trainer.optimization.shrink-value=1.0 \
+    --trainer.num-chunk-per-minibatch=64,32 \
+    --trainer.optimization.momentum=0.0 \
+    --egs.chunk-width=$chunk_width \
+    --egs.chunk-left-context=$chunk_left_context \
+    --egs.chunk-right-context=$chunk_right_context \
+    --egs.chunk-left-context-initial=0 \
+    --egs.chunk-right-context-final=0 \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts="--frames-overlap-per-eg 0" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir=$train_data_dir \
+    --tree-dir=$tree_dir \
+    --lat-dir=$lat_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 6 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 data/$lang_test \
+    $dir $dir/graph || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+    --extra-left-context $chunk_left_context \
+    --extra-right-context $chunk_right_context \
+    --extra-left-context-initial 0 \
+    --extra-right-context-final 0 \
+    --frames-per-chunk $frames_per_chunk \
+    --nj $nj --cmd "$cmd" \
+    $dir/graph data/test $dir/decode_test || exit 1;
+fi
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
index d8aa2561f17..6ab74dc2f0d 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
@@ -45,8 +45,6 @@ tdnn_dim=550
 # training options
 srand=0
 remove_egs=true
-#lang_decode=data.new/lang
-#lang_rescore=data.new/lang_rescore_6g
 lang_decode=data/lang
 lang_rescore=data/lang_rescore_6g
 
@@ -71,7 +69,6 @@ fi
 ali_dir=exp/chain/e2e_ali_train
 lat_dir=exp/chain${nnet3_affix}/e2e_${train_set}_lats
 dir=exp/chain${nnet3_affix}/cnn_e2eali${affix}
-#dir=exp/chain/cnn_e2eali_1c
 train_data_dir=data/${train_set}
 tree_dir=exp/chain${nnet3_affix}/tree_e2e
 
@@ -164,6 +161,16 @@ if [ $stage -le 4 ]; then
   ## adding the layers for chain branch
   relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
   output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' mod?els... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
   relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 $tdnn_opts
   output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $output_opts
 EOF
diff --git a/egs/iam/v2/local/extract_features.sh b/egs/iam/v2/local/extract_features.sh
new file mode 100755
index 00000000000..1741ad3f9b2
--- /dev/null
+++ b/egs/iam/v2/local/extract_features.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright   2017 Yiwen Shao
+#             2018 Ashish Arora
+
+# Apache 2.0
+# This script runs the make features script in parallel. 
+
+nj=4
+cmd=run.pl
+feat_dim=40
+augment=false
+fliplr=false
+echo "$0 $@"
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh || exit 1;
+
+data=$1
+featdir=$data/data
+scp=$data/images.scp
+logdir=$data/log
+
+mkdir -p $logdir
+mkdir -p $featdir
+
+# make $featdir an absolute pathname
+featdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $featdir ${PWD}`
+
+for n in $(seq $nj); do
+    split_scps="$split_scps $logdir/images.$n.scp"
+done
+
+# split images.scp
+utils/split_scp.pl $scp $split_scps || exit 1;
+
+$cmd JOB=1:$nj $logdir/extract_features.JOB.log \
+  local/make_features.py $logdir/images.JOB.scp \
+    --allowed_len_file_path $data/allowed_lengths.txt \
+    --feat-dim $feat_dim --fliplr $fliplr --augment $augment \| \
+    copy-feats --compress=true --compression-method=7 \
+    ark:- ark,scp:$featdir/images.JOB.ark,$featdir/images.JOB.scp
+
+## aggregates the output scp's to get feats.scp
+for n in $(seq $nj); do
+  cat $featdir/images.$n.scp || exit 1;
+done > $data/feats.scp || exit 1
diff --git a/egs/iam/v2/local/gen_topo.py b/egs/iam/v2/local/gen_topo.py
new file mode 100755
index 00000000000..a74c6d4bbae
--- /dev/null
+++ b/egs/iam/v2/local/gen_topo.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+
+# Copyright 2017 (author: Chun-Chieh Chang)
+
+# Generate a topology file.  This allows control of the number of states in the
+# non-silence HMMs, and in the silence HMMs. This is a modified version of
+# 'utils/gen_topo.pl'. The difference is that this creates two topologies for
+# the non-silence HMMs. The number of states for punctuations is different than
+# the number of states for other characters.
+
+from __future__ import print_function
+import argparse
+import string
+
+parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py "
+                                             "<colon-separated-nonsilence-phones> <colon-separated-silence-phones>"
+                                             "e.g.:  steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n",
+                                 epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage.");
+parser.add_argument("num_nonsil_states", type=int, help="number of states for nonsilence phones");
+parser.add_argument("num_sil_states", type=int, help="number of states for silence phones");
+parser.add_argument("num_punctuation_states", type=int, help="number of states for punctuation");
+parser.add_argument("nonsilence_phones", type=str,
+                    help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9");
+parser.add_argument("silence_phones", type=str,
+                    help="List of silence phones as integers, separated by colons, e.g. 1:2:3");
+parser.add_argument("phone_list", type=str, help="file containing all phones and their corresponding number.");
+
+args = parser.parse_args()
+
+silence_phones = [ int(x) for x in args.silence_phones.split(":") ]
+nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ]
+all_phones = silence_phones +  nonsilence_phones
+
+punctuation_phones = []
+exclude = set(string.punctuation)
+with open(args.phone_list) as f:
+    for line in f:
+        line = line.strip()
+        phone = line.split('_')[0]
+        if len(phone) == 1 and phone in exclude:
+            punctuation_phones.append(int(line.split(' ')[1]))
+
+# For nonsilence phones that are not punctuations
+print("<Topology>")
+print("<TopologyEntry>")
+print("<ForPhones>")
+print(" ".join([str(x) for x in nonsilence_phones if x not in punctuation_phones]))
+print("</ForPhones>")
+for x in range(0, args.num_nonsil_states):
+    xp1 = x + 1
+    print("<State> " + str(x) + " <PdfClass> " + str(x) + " <Transition> " + str(x) + " 0.75 <Transition> " + str(xp1) + " 0.25 </State>")
+print("<State> " + str(args.num_nonsil_states) + " </State>")
+print("</TopologyEntry>")
+
+# For nonsilence phones that ar punctuations
+print("<TopologyEntry>")
+print("<ForPhones>")
+print(" ".join([str(x) for x in nonsilence_phones if x in punctuation_phones]))
+print("</ForPhones>")
+for x in range(0, args.num_punctuation_states):
+    xp1 = x + 1
+    print("<State> " + str(x) + " <PdfClass> " + str(x) + " <Transition> " + str(x) + " 0.75 <Transition> " + str(xp1) + " 0.25 </State>")
+print("<State> " + str(args.num_punctuation_states) + " </State>")
+print("</TopologyEntry>")
+
+# For silence phones
+print("<TopologyEntry>")
+print("<ForPhones>")
+print(" ".join([str(x) for x in silence_phones]))
+print("</ForPhones>")
+if(args.num_sil_states > 1):
+    transp = 1.0 / (args.num_sil_states - 1)
+    
+    state_str = "<State> 0 <PdfClass> 0 "
+    for x in range(0, (args.num_sil_states - 1)):
+        state_str = state_str + "<Transition> " + str(x) + " " + str(transp) + " "
+    state_str = state_str + "</State>"
+    print(state_str)
+
+    for x in range(1, (args.num_sil_states - 1)):
+        state_str = "<State> " + str(x) + " <PdfClass> " + str(x) + " "
+        for y in range(1, args.num_sil_states):
+            state_str = state_str + "<Transition> " + str(y) + " " + str(transp) + " "
+        state_str = state_str + "</State>"
+        print(state_str)
+    second_last = args.num_sil_states - 1
+    print("<State> " + str(second_last) + " <PdfClass> " + str(second_last) + " <Transition> " + str(second_last) + " 0.75 <Transition> " + str(args.num_sil_states) + " 0.25 </State>")
+    print("<State> " + str(args.num_sil_states) + " </State>")
+else:
+    print("<State> 0 <PdfClass> 0 <Transition> 0 0.75 <Transition> 1 0.25 </State>")
+    print("<State> " + str(args.num_sil_states) + " </State>")
+print("</TopologyEntry>")
+print("</Topology>")
diff --git a/egs/iam/v2/local/make_features.py b/egs/iam/v2/local/make_features.py
index 84e012daedb..3ce501732cf 100755
--- a/egs/iam/v2/local/make_features.py
+++ b/egs/iam/v2/local/make_features.py
@@ -2,6 +2,7 @@
 
 # Copyright      2017  Chun Chieh Chang
 #                2017  Ashish Arora
+#                2017  Yiwen Shao
 #                2018  Hossein Hadian
 
 """ This script converts images to Kaldi-format feature matrices. The input to
@@ -14,20 +15,27 @@
     to enforce the images to have the specified length in that file by padding
     white pixels (the --padding option will be ignored in this case). This relates
     to end2end chain training.
-
     eg. local/make_features.py data/train --feat-dim 40
 """
-
+import random
 import argparse
 import os
 import sys
+import scipy.io as sio
 import numpy as np
 from scipy import misc
+from scipy.ndimage.interpolation import affine_transform
+import math
+from signal import signal, SIGPIPE, SIG_DFL
+signal(SIGPIPE, SIG_DFL)
 
 parser = argparse.ArgumentParser(description="""Converts images (in 'dir'/images.scp) to features and
                                                 writes them to standard output in text format.""")
-parser.add_argument('dir', type=str,
-                    help='Source data directory (containing images.scp)')
+parser.add_argument('images_scp_path', type=str,
+                    help='Path of images.scp file')
+parser.add_argument('--allowed_len_file_path', type=str, default=None,
+                    help='If supplied, each images will be padded to reach the '
+                    'target length (this overrides --padding).')
 parser.add_argument('--out-ark', type=str, default='-',
                     help='Where to write the output feature file')
 parser.add_argument('--feat-dim', type=int, default=40,
@@ -35,8 +43,10 @@
 parser.add_argument('--padding', type=int, default=5,
                     help='Number of white pixels to pad on the left'
                     'and right side of the image.')
-
-
+parser.add_argument('--fliplr', type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="Flip the image left-right for right to left languages")
+parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="performs image augmentation")
 args = parser.parse_args()
 
 
@@ -56,18 +66,12 @@ def write_kaldi_matrix(file_handle, matrix, key):
             file_handle.write("\n")
     file_handle.write(" ]\n")
 
-def get_scaled_image(im, allowed_lengths = None):
-    scale_size = args.feat_dim
-    sx = im.shape[1]
-    sy = im.shape[0]
-    scale = (1.0 * scale_size) / sy
-    nx = int(scale_size)
-    ny = int(scale * sx)
-    im = misc.imresize(im, (nx, ny))
+
+def horizontal_pad(im, allowed_lengths = None):
     if allowed_lengths is None:
         left_padding = right_padding = args.padding
     else:  # Find an allowed length for the image
-        imlen = im.shape[1]
+        imlen = im.shape[1] # width
         allowed_len = 0
         for l in allowed_lengths:
             if l > imlen:
@@ -77,28 +81,153 @@ def get_scaled_image(im, allowed_lengths = None):
             #  No allowed length was found for the image (the image is too long)
             return None
         padding = allowed_len - imlen
-        left_padding = padding // 2
+        left_padding = int(padding // 2)
         right_padding = padding - left_padding
-    dim_y = im.shape[0]
+    dim_y = im.shape[0] # height
     im_pad = np.concatenate((255 * np.ones((dim_y, left_padding),
                                            dtype=int), im), axis=1)
     im_pad1 = np.concatenate((im_pad, 255 * np.ones((dim_y, right_padding),
                                                     dtype=int)), axis=1)
     return im_pad1
 
-### main ###
-data_list_path = os.path.join(args.dir, 'images.scp')
+def get_scaled_image_aug(im, mode='normal'):
+    scale_size = args.feat_dim
+    sx = im.shape[1]
+    sy = im.shape[0]
+    scale = (1.0 * scale_size) / sy
+    nx = int(scale_size)
+    ny = int(scale * sx) 
+    scale_size = random.randint(10, 30)
+    scale = (1.0 * scale_size) / sy
+    down_nx = int(scale_size)
+    down_ny = int(scale * sx)
+    if mode == 'normal':
+        im = misc.imresize(im, (nx, ny))
+        return im
+    else:
+        im_scaled_down = misc.imresize(im, (down_nx, down_ny))
+        im_scaled_up = misc.imresize(im_scaled_down, (nx, ny))
+        return im_scaled_up
+    return im
+
+def contrast_normalization(im, low_pct, high_pct):
+    element_number = im.size
+    rows = im.shape[0]
+    cols = im.shape[1]
+    im_contrast = np.zeros(shape=im.shape)
+    low_index = int(low_pct * element_number)
+    high_index = int(high_pct * element_number)
+    sorted_im = np.sort(im, axis=None)
+    low_thred = sorted_im[low_index]
+    high_thred = sorted_im[high_index]
+    for i in range(rows):
+        for j in range(cols):
+            if im[i, j] > high_thred:
+                im_contrast[i, j] = 255  # lightest to white
+            elif im[i, j] < low_thred:
+                im_contrast[i, j] = 0  # darkest to black
+            else:
+                # linear normalization
+                im_contrast[i, j] = (im[i, j] - low_thred) * \
+                    255 / (high_thred - low_thred)
+    return im_contrast
+
+
+def geometric_moment(frame, p, q):
+    m = 0
+    for i in range(frame.shape[1]):
+        for j in range(frame.shape[0]):
+            m += (i ** p) * (j ** q) * frame[i][i]
+    return m
+
+
+def central_moment(frame, p, q):
+    u = 0
+    x_bar = geometric_moment(frame, 1, 0) / \
+        geometric_moment(frame, 0, 0)  # m10/m00
+    y_bar = geometric_moment(frame, 0, 1) / \
+        geometric_moment(frame, 0, 0)  # m01/m00
+    for i in range(frame.shape[1]):
+        for j in range(frame.shape[0]):
+            u += ((i - x_bar)**p) * ((j - y_bar)**q) * frame[i][j]
+    return u
+
+
+def height_normalization(frame, w, h):
+    frame_normalized = np.zeros(shape=(h, w))
+    alpha = 4
+    x_bar = geometric_moment(frame, 1, 0) / \
+        geometric_moment(frame, 0, 0)  # m10/m00
+    y_bar = geometric_moment(frame, 0, 1) / \
+        geometric_moment(frame, 0, 0)  # m01/m00
+    sigma_x = (alpha * ((central_moment(frame, 2, 0) /
+                         geometric_moment(frame, 0, 0)) ** .5))  # alpha * sqrt(u20/m00)
+    sigma_y = (alpha * ((central_moment(frame, 0, 2) /
+                         geometric_moment(frame, 0, 0)) ** .5))  # alpha * sqrt(u02/m00)
+    for x in range(w):
+        for y in range(h):
+            i = int((x / w - 0.5) * sigma_x + x_bar)
+            j = int((y / h - 0.5) * sigma_y + y_bar)
+            frame_normalized[x][y] = frame[i][j]
+    return frame_normalized
 
+
+def find_slant_project(im):
+    rows = im.shape[0]
+    cols = im.shape[1]
+    std_max = 0
+    alpha_max = 0
+    col_disp = np.zeros(90, int)
+    proj = np.zeros(shape=(90, cols + 2 * rows), dtype=int)
+    for r in range(rows):
+        for alpha in range(-45, 45, 1):
+            col_disp[alpha] = int(r * math.tan(alpha / 180.0 * math.pi))
+        for c in range(cols):
+            if im[r, c] < 100:
+                for alpha in range(-45, 45, 1):
+                    proj[alpha + 45, c + col_disp[alpha] + rows] += 1
+    for alpha in range(-45, 45, 1):
+        proj_histogram, bin_array = np.histogram(proj[alpha + 45, :], bins=10)
+        proj_std = np.std(proj_histogram)
+        if proj_std > std_max:
+            std_max = proj_std
+            alpha_max = alpha
+    proj_std = np.std(proj, axis=1)
+    return -alpha_max
+
+
+def horizontal_shear(im, degree):
+    rad = degree / 180.0 * math.pi
+    padding_x = int(abs(np.tan(rad)) * im.shape[0])
+    padding_y = im.shape[0]
+    if rad > 0:
+        im_pad = np.concatenate(
+            (255 * np.ones((padding_y, padding_x), dtype=int), im), axis=1)
+    elif rad < 0:
+        im_pad = np.concatenate(
+            (im, 255 * np.ones((padding_y, padding_x), dtype=int)), axis=1)
+    else:
+        im_pad = im
+    shear_matrix = np.array([[1, 0],
+                             [np.tan(rad), 1]])
+    sheared_im = affine_transform(im_pad, shear_matrix, cval=255.0)
+    return sheared_im
+
+
+### main ###
+random.seed(1)
+data_list_path = args.images_scp_path
 if args.out_ark == '-':
     out_fh = sys.stdout
 else:
-    out_fh = open(args.out_ark,'wb')
+    out_fh = open(args.out_ark,'w')
 
 allowed_lengths = None
-if os.path.isfile(os.path.join(args.dir, 'allowed_lengths.txt')):
+allowed_len_handle = args.allowed_len_file_path
+if os.path.isfile(allowed_len_handle):
     print("Found 'allowed_lengths.txt' file...", file=sys.stderr)
     allowed_lengths = []
-    with open(os.path.join(args.dir,'allowed_lengths.txt')) as f:
+    with open(allowed_len_handle) as f:
         for line in f:
             allowed_lengths.append(int(line.strip()))
     print("Read {} allowed lengths and will apply them to the "
@@ -106,6 +235,7 @@ def get_scaled_image(im, allowed_lengths = None):
 
 num_fail = 0
 num_ok = 0
+aug_setting = ['normal', 'scaled']
 with open(data_list_path) as f:
     for line in f:
         line = line.strip()
@@ -113,15 +243,24 @@ def get_scaled_image(im, allowed_lengths = None):
         image_id = line_vect[0]
         image_path = line_vect[1]
         im = misc.imread(image_path)
-        im_scaled = get_scaled_image(im, allowed_lengths)
-
-        if im_scaled is None:
+        if args.fliplr:
+            im = np.fliplr(im)
+        if args.augment:
+            im_aug = get_scaled_image_aug(im, aug_setting[0])
+            im_contrast = contrast_normalization(im_aug, 0.05, 0.2)
+            slant_degree = find_slant_project(im_contrast)
+            im_sheared = horizontal_shear(im_contrast, slant_degree)
+            im_aug = im_sheared
+        else:
+            im_aug = get_scaled_image_aug(im, aug_setting[0])
+        im_horizontal_padded = horizontal_pad(im_aug, allowed_lengths)
+        if im_horizontal_padded is None:
             num_fail += 1
             continue
-        data = np.transpose(im_scaled, (1, 0))
+        data = np.transpose(im_horizontal_padded, (1, 0))
         data = np.divide(data, 255.0)
         num_ok += 1
         write_kaldi_matrix(out_fh, data, image_id)
 
-print('Generated features for {} images. Failed for {} (iamge too '
+print('Generated features for {} images. Failed for {} (image too '
       'long).'.format(num_ok, num_fail), file=sys.stderr)
diff --git a/egs/iam/v2/local/prepare_data.sh b/egs/iam/v2/local/prepare_data.sh
index 73d711c73f0..9c01ac90f28 100755
--- a/egs/iam/v2/local/prepare_data.sh
+++ b/egs/iam/v2/local/prepare_data.sh
@@ -165,6 +165,7 @@ if [ $stage -le 0 ]; then
   local/process_data.py data/local data/test --dataset test || exit 1
   local/process_data.py data/local data/val --dataset validation || exit 1
 
-  utils/utt2spk_to_spk2utt.pl data/train/utt2spk > data/train/spk2utt
-  utils/utt2spk_to_spk2utt.pl data/test/utt2spk > data/test/spk2utt
+  image/fix_data_dir.sh data/train
+  image/fix_data_dir.sh data/test
+  image/fix_data_dir.sh data/val
 fi
diff --git a/egs/iam/v2/local/prepare_lang.sh b/egs/iam/v2/local/prepare_lang.sh
new file mode 100755
index 00000000000..cc6bc03a432
--- /dev/null
+++ b/egs/iam/v2/local/prepare_lang.sh
@@ -0,0 +1,474 @@
+#!/bin/bash
+# Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey);
+#                      Arnab Ghoshal
+#                2014  Guoguo Chen
+#                2015  Hainan Xu
+#                2016  FAU Erlangen (Author: Axel Horndasch)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# This script prepares a directory such as data/lang/, in the standard format,
+# given a source directory containing a dictionary lexicon.txt in a form like:
+# word phone1 phone2 ... phoneN
+# per line (alternate prons would be separate lines), or a dictionary with probabilities
+# called lexiconp.txt in a form:
+# word pron-prob phone1 phone2 ... phoneN
+# (with 0.0 < pron-prob <= 1.0); note: if lexiconp.txt exists, we use it even if
+# lexicon.txt exists.
+# and also files silence_phones.txt, nonsilence_phones.txt, optional_silence.txt
+# and extra_questions.txt
+# Here, silence_phones.txt and nonsilence_phones.txt are lists of silence and
+# non-silence phones respectively (where silence includes various kinds of
+# noise, laugh, cough, filled pauses etc., and nonsilence phones includes the
+# "real" phones.)
+# In each line of those files is a list of phones, and the phones on each line
+# are assumed to correspond to the same "base phone", i.e. they will be
+# different stress or tone variations of the same basic phone.
+# The file "optional_silence.txt" contains just a single phone (typically SIL)
+# which is used for optional silence in the lexicon.
+# extra_questions.txt might be empty; typically will consist of lists of phones,
+# all members of each list with the same stress or tone; and also possibly a
+# list for the silence phones.  This will augment the automatically generated
+# questions (note: the automatically generated ones will treat all the
+# stress/tone versions of a phone the same, so will not "get to ask" about
+# stress or tone).
+#
+
+# This script adds word-position-dependent phones and constructs a host of other
+# derived files, that go in data/lang/.
+
+# Begin configuration section.
+num_sil_states=5
+num_nonsil_states=3
+position_dependent_phones=true
+# position_dependent_phones is false also when position dependent phones and word_boundary.txt
+# have been generated by another source
+share_silence_phones=false  # if true, then share pdfs of different silence
+                            # phones together.
+sil_prob=0.5
+unk_fst=        # if you want to model the unknown-word (<oov-dict-entry>)
+                # with a phone-level LM as created by make_unk_lm.sh,
+                # provide the text-form FST via this flag, e.g. <work-dir>/unk_fst.txt
+                # where <work-dir> was the 2nd argument of make_unk_lm.sh.
+phone_symbol_table=              # if set, use a specified phones.txt file.
+extra_word_disambig_syms=        # if set, add disambiguation symbols from this file (one per line)
+                                 # to phones/disambig.txt, phones/wdisambig.txt and words.txt
+num_extra_phone_disambig_syms=1 # Standard one phone disambiguation symbol is used for optional silence.
+                                # Increasing this number does not harm, but is only useful if you later
+                                # want to introduce this labels to L_disambig.fst
+# end configuration sections
+
+echo "$0 $@"  # Print the command line for logging
+
+. utils/parse_options.sh
+
+if [ $# -ne 4 ]; then
+  echo "usage: utils/prepare_lang.sh <dict-src-dir> <oov-dict-entry> <tmp-dir> <lang-dir>"
+  echo "e.g.: utils/prepare_lang.sh data/local/dict <SPOKEN_NOISE> data/local/lang data/lang"
+  echo "<dict-src-dir> should contain the following files:"
+  echo " extra_questions.txt  lexicon.txt nonsilence_phones.txt  optional_silence.txt  silence_phones.txt"
+  echo "See http://kaldi-asr.org/doc/data_prep.html#data_prep_lang_creating for more info."
+  echo "options: "
+  echo "     --num-sil-states <number of states>             # default: 5, #states in silence models."
+  echo "     --num-nonsil-states <number of states>          # default: 3, #states in non-silence models."
+  echo "     --position-dependent-phones (true|false)        # default: true; if true, use _B, _E, _S & _I"
+  echo "                                                     # markers on phones to indicate word-internal positions. "
+  echo "     --share-silence-phones (true|false)             # default: false; if true, share pdfs of "
+  echo "                                                     # all non-silence phones. "
+  echo "     --sil-prob <probability of silence>             # default: 0.5 [must have 0 <= silprob < 1]"
+  echo "     --phone-symbol-table <filename>                 # default: \"\"; if not empty, use the provided "
+  echo "                                                     # phones.txt as phone symbol table. This is useful "
+  echo "                                                     # if you use a new dictionary for the existing setup."
+  echo "     --unk-fst <text-fst>                            # default: none.  e.g. exp/make_unk_lm/unk_fst.txt."
+  echo "                                                     # This is for if you want to model the unknown word"
+  echo "                                                     # via a phone-level LM rather than a special phone"
+  echo "                                                     # (this should be more useful for test-time than train-time)."
+  echo "     --extra-word-disambig-syms <filename>           # default: \"\"; if not empty, add disambiguation symbols"
+  echo "                                                     # from this file (one per line) to phones/disambig.txt,"
+  echo "                                                     # phones/wdisambig.txt and words.txt"
+  exit 1;
+fi
+
+srcdir=$1
+oov_word=$2
+tmpdir=$3
+dir=$4
+mkdir -p $dir $tmpdir $dir/phones
+
+silprob=false
+[ -f $srcdir/lexiconp_silprob.txt ] && silprob=true
+
+[ -f path.sh ] && . ./path.sh
+
+! utils/validate_dict_dir.pl $srcdir && \
+  echo "*Error validating directory $srcdir*" && exit 1;
+
+if [[ ! -f $srcdir/lexicon.txt ]]; then
+  echo "**Creating $dir/lexicon.txt from $dir/lexiconp.txt"
+  perl -ape 's/(\S+\s+)\S+\s+(.+)/$1$2/;' < $srcdir/lexiconp.txt > $srcdir/lexicon.txt || exit 1;
+fi
+if [[ ! -f $srcdir/lexiconp.txt ]]; then
+  echo "**Creating $srcdir/lexiconp.txt from $srcdir/lexicon.txt"
+  perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $srcdir/lexiconp.txt || exit 1;
+fi
+
+if [ ! -z "$unk_fst" ] && [ ! -f "$unk_fst" ]; then
+  echo "$0: expected --unk-fst $unk_fst to exist as a file"
+  exit 1
+fi
+
+if ! utils/validate_dict_dir.pl $srcdir >&/dev/null; then
+  utils/validate_dict_dir.pl $srcdir  # show the output.
+  echo "Validation failed (second time)"
+  exit 1;
+fi
+
+# phones.txt file provided, we will do some sanity check here.
+if [[ ! -z $phone_symbol_table ]]; then
+  # Checks if we have position dependent phones
+  n1=`cat $phone_symbol_table | grep -v -E "^#[0-9]+$" | cut -d' ' -f1 | sort -u | wc -l`
+  n2=`cat $phone_symbol_table | grep -v -E "^#[0-9]+$" | cut -d' ' -f1 | sed 's/_[BIES]$//g' | sort -u | wc -l`
+  $position_dependent_phones && [ $n1 -eq $n2 ] &&\
+    echo "$0: Position dependent phones requested, but not in provided phone symbols" && exit 1;
+  ! $position_dependent_phones && [ $n1 -ne $n2 ] &&\
+      echo "$0: Position dependent phones not requested, but appear in the provided phones.txt" && exit 1;
+
+  # Checks if the phone sets match.
+  cat $srcdir/{,non}silence_phones.txt | awk -v f=$phone_symbol_table '
+  BEGIN { while ((getline < f) > 0) { sub(/_[BEIS]$/, "", $1); phones[$1] = 1; }}
+  { for (x = 1; x <= NF; ++x) { if (!($x in phones)) {
+      print "Phone appears in the lexicon but not in the provided phones.txt: "$x; exit 1; }}}' || exit 1;
+fi
+
+# In case there are extra word-level disambiguation symbols we need
+# to make sure that all symbols in the provided file are valid.
+if [ ! -z "$extra_word_disambig_syms" ]; then
+  if ! utils/lang/validate_disambig_sym_file.pl --allow-numeric "false" $extra_word_disambig_syms; then
+    echo "$0: Validation of disambiguation file \"$extra_word_disambig_syms\" failed."
+    exit 1;
+  fi
+fi
+
+if $position_dependent_phones; then
+  # Create $tmpdir/lexiconp.txt from $srcdir/lexiconp.txt (or
+  # $tmpdir/lexiconp_silprob.txt from $srcdir/lexiconp_silprob.txt) by
+  # adding the markers _B, _E, _S, _I depending on word position.
+  # In this recipe, these markers apply to silence also.
+  # Do this starting from lexiconp.txt only.
+  if "$silprob"; then
+    perl -ane '@A=split(" ",$_); $w = shift @A; $p = shift @A; $silword_p = shift @A;
+              $wordsil_f = shift @A; $wordnonsil_f = shift @A; @A>0||die;
+         if(@A==1) { print "$w $p $silword_p $wordsil_f $wordnonsil_f $A[0]_S\n"; }
+         else { print "$w $p $silword_p $wordsil_f $wordnonsil_f $A[0]_B ";
+         for($n=1;$n<@A-1;$n++) { print "$A[$n]_I "; } print "$A[$n]_E\n"; } ' \
+                < $srcdir/lexiconp_silprob.txt > $tmpdir/lexiconp_silprob.txt
+  else
+    perl -ane '@A=split(" ",$_); $w = shift @A; $p = shift @A; @A>0||die;
+         if(@A==1) { print "$w $p $A[0]_S\n"; } else { print "$w $p $A[0]_B ";
+         for($n=1;$n<@A-1;$n++) { print "$A[$n]_I "; } print "$A[$n]_E\n"; } ' \
+         < $srcdir/lexiconp.txt > $tmpdir/lexiconp.txt || exit 1;
+  fi
+
+  # create $tmpdir/phone_map.txt
+  # this has the format (on each line)
+  # <original phone> <version 1 of original phone> <version 2> ...
+  # where the versions depend on the position of the phone within a word.
+  # For instance, we'd have:
+  # AA AA_B AA_E AA_I AA_S
+  # for (B)egin, (E)nd, (I)nternal and (S)ingleton
+  # and in the case of silence
+  # SIL SIL SIL_B SIL_E SIL_I SIL_S
+  # [because SIL on its own is one of the variants; this is for when it doesn't
+  #  occur inside a word but as an option in the lexicon.]
+
+  # This phone map expands the phone lists into all the word-position-dependent
+  # versions of the phone lists.
+  cat <(set -f; for x in `cat $srcdir/silence_phones.txt`; do for y in "" "" "_B" "_E" "_I" "_S"; do echo -n "$x$y "; done; echo; done) \
+    <(set -f; for x in `cat $srcdir/nonsilence_phones.txt`; do for y in "" "_B" "_E" "_I" "_S"; do echo -n "$x$y "; done; echo; done) \
+    > $tmpdir/phone_map.txt
+else
+  if "$silprob"; then
+    cp $srcdir/lexiconp_silprob.txt $tmpdir/lexiconp_silprob.txt
+  else
+    cp $srcdir/lexiconp.txt $tmpdir/lexiconp.txt
+  fi
+  cat $srcdir/silence_phones.txt $srcdir/nonsilence_phones.txt | \
+    awk '{for(n=1;n<=NF;n++) print $n; }' > $tmpdir/phones
+  paste -d' ' $tmpdir/phones $tmpdir/phones > $tmpdir/phone_map.txt
+fi
+mkdir -p $dir/phones  # various sets of phones...
+# Sets of phones for use in clustering, and making monophone systems.
+if $share_silence_phones; then
+  # build a roots file that will force all the silence phones to share the
+  # same pdf's. [three distinct states, only the transitions will differ.]
+  # 'shared'/'not-shared' means, do we share the 3 states of the HMM
+  # in the same tree-root?
+  # Sharing across models(phones) is achieved by writing several phones
+  # into one line of roots.txt (shared/not-shared doesn't affect this).
+  # 'not-shared not-split' means we have separate tree roots for the 3 states,
+  # but we never split the tree so they remain stumps,
+  # so all phones in the line correspond to the same model.
+  cat $srcdir/silence_phones.txt | awk '{printf("%s ", $0); } END{printf("\n");}' | cat - $srcdir/nonsilence_phones.txt | \
+    utils/apply_map.pl $tmpdir/phone_map.txt > $dir/phones/sets.txt
+  cat $dir/phones/sets.txt | \
+    awk '{if(NR==1) print "not-shared", "not-split", $0; else print "shared", "split", $0;}' > $dir/phones/roots.txt
+else
+  # different silence phones will have different GMMs.  [note: here, all "shared split" means
+  # is that we may have one GMM for all the states, or we can split on states.  because they're
+  # context-independent phones, they don't see the context.]
+  cat $srcdir/{,non}silence_phones.txt | utils/apply_map.pl $tmpdir/phone_map.txt > $dir/phones/sets.txt
+  cat $dir/phones/sets.txt | awk '{print "shared", "split", $0;}' > $dir/phones/roots.txt
+fi
+cat $srcdir/silence_phones.txt | utils/apply_map.pl $tmpdir/phone_map.txt | \
+  awk '{for(n=1;n<=NF;n++) print $n;}' > $dir/phones/silence.txt
+cat $srcdir/nonsilence_phones.txt | utils/apply_map.pl $tmpdir/phone_map.txt | \
+  awk '{for(n=1;n<=NF;n++) print $n;}' > $dir/phones/nonsilence.txt
+cp $srcdir/optional_silence.txt $dir/phones/optional_silence.txt
+cp $dir/phones/silence.txt $dir/phones/context_indep.txt
+# if extra_questions.txt is empty, it's OK.
+cat $srcdir/extra_questions.txt 2>/dev/null | utils/apply_map.pl $tmpdir/phone_map.txt \
+  >$dir/phones/extra_questions.txt
+# Want extra questions about the word-start/word-end stuff. Make it separate for
+# silence and non-silence. Probably doesn't matter, as silence will rarely
+# be inside a word.
+if $position_dependent_phones; then
+  for suffix in _B _E _I _S; do
+    (set -f; for x in `cat $srcdir/nonsilence_phones.txt`; do echo -n "$x$suffix "; done; echo) >>$dir/phones/extra_questions.txt
+  done
+  for suffix in "" _B _E _I _S; do
+    (set -f; for x in `cat $srcdir/silence_phones.txt`; do echo -n "$x$suffix "; done; echo) >>$dir/phones/extra_questions.txt
+  done
+fi
+# add_lex_disambig.pl is responsible for adding disambiguation symbols to
+# the lexicon, for telling us how many disambiguation symbols it used,
+# and and also for modifying the unknown-word's pronunciation (if the
+# --unk-fst was provided) to the sequence "#1 #2 #3", and reserving those
+# disambig symbols for that purpose.
+# The #2 will later be replaced with the actual unk model.  The reason
+# for the #1 and the #3 is for disambiguation and also to keep the
+# FST compact.  If we didn't have the #1, we might have a different copy of
+# the unk-model FST, or at least some of its arcs, for each start-state from
+# which an <unk> transition comes (instead of per end-state, which is more compact);
+# and adding the #3 prevents us from potentially having 2 copies of the unk-model
+# FST due to the optional-silence [the last phone of any word gets 2 arcs].
+if [ ! -z "$unk_fst" ]; then  # if the --unk-fst option was provided...
+  if "$silprob"; then
+    utils/lang/internal/modify_unk_pron.py $tmpdir/lexiconp_silprob.txt "$oov_word" || exit 1
+  else
+    utils/lang/internal/modify_unk_pron.py $tmpdir/lexiconp.txt "$oov_word" || exit 1
+  fi
+  unk_opt="--first-allowed-disambig 4"
+else
+  unk_opt=
+fi
+if "$silprob"; then
+  ndisambig=$(utils/add_lex_disambig.pl $unk_opt --pron-probs --sil-probs $tmpdir/lexiconp_silprob.txt $tmpdir/lexiconp_silprob_disambig.txt)
+else
+  ndisambig=$(utils/add_lex_disambig.pl $unk_opt --pron-probs $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt)
+fi
+ndisambig=$[$ndisambig+$num_extra_phone_disambig_syms]; # add (at least) one disambig symbol for silence in lexicon FST.
+echo $ndisambig > $tmpdir/lex_ndisambig
+# Format of lexiconp_disambig.txt:
+# !SIL  1.0   SIL_S
+# <SPOKEN_NOISE>  1.0   SPN_S #1
+# <UNK>  1.0  SPN_S #2
+# <NOISE>  1.0  NSN_S
+# !EXCLAMATION-POINT  1.0  EH2_B K_I S_I K_I L_I AH0_I M_I EY1_I SH_I AH0_I N_I P_I OY2_I N_I T_E
+( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) >$dir/phones/disambig.txt
+# In case there are extra word-level disambiguation symbols they also
+# need to be added to the list of phone-level disambiguation symbols.
+if [ ! -z "$extra_word_disambig_syms" ]; then
+  # We expect a file containing valid word-level disambiguation symbols.
+  cat $extra_word_disambig_syms | awk '{ print $1 }' >> $dir/phones/disambig.txt
+fi
+# Create phone symbol table.
+if [[ ! -z $phone_symbol_table ]]; then
+  start_symbol=`grep \#0 $phone_symbol_table | awk '{print $2}'`
+  echo "<eps>" | cat - $dir/phones/{silence,nonsilence}.txt | awk -v f=$phone_symbol_table '
+  BEGIN { while ((getline < f) > 0) { phones[$1] = $2; }} { print $1" "phones[$1]; }' | sort -k2 -g |\
+    cat - <(cat $dir/phones/disambig.txt | awk -v x=$start_symbol '{n=x+NR-1; print $1, n;}') > $dir/phones.txt
+else
+  echo "<eps>" | cat - $dir/phones/{silence,nonsilence,disambig}.txt | \
+    awk '{n=NR-1; print $1, n;}' > $dir/phones.txt
+fi
+# Create a file that describes the word-boundary information for
+# each phone.  5 categories.
+if $position_dependent_phones; then
+  cat $dir/phones/{silence,nonsilence}.txt | \
+    awk '/_I$/{print $1, "internal"; next;} /_B$/{print $1, "begin"; next; }
+         /_S$/{print $1, "singleton"; next;} /_E$/{print $1, "end"; next; }
+         {print $1, "nonword";} ' > $dir/phones/word_boundary.txt
+else
+  # word_boundary.txt might have been generated by another source
+  [ -f $srcdir/word_boundary.txt ] && cp $srcdir/word_boundary.txt $dir/phones/word_boundary.txt
+fi
+# Create word symbol table.
+# <s> and </s> are only needed due to the need to rescore lattices with
+# ConstArpaLm format language model. They do not normally appear in G.fst or
+# L.fst.
+if "$silprob"; then
+  # remove the silprob
+  cat $tmpdir/lexiconp_silprob.txt |\
+    awk '{
+      for(i=1; i<=NF; i++) {
+        if(i!=3 && i!=4 && i!=5) printf("%s\t", $i); if(i==NF) print "";
+      }
+    }' > $tmpdir/lexiconp.txt
+fi
+cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq  | awk '
+  BEGIN {
+    print "<eps> 0";
+  }
+  {
+    if ($1 == "<s>") {
+      print "<s> is in the vocabulary!" | "cat 1>&2"
+      exit 1;
+    }
+    if ($1 == "</s>") {
+      print "</s> is in the vocabulary!" | "cat 1>&2"
+      exit 1;
+    }
+    printf("%s %d\n", $1, NR);
+  }
+  END {
+    printf("#0 %d\n", NR+1);
+    printf("<s> %d\n", NR+2);
+    printf("</s> %d\n", NR+3);
+  }' > $dir/words.txt || exit 1;
+# In case there are extra word-level disambiguation symbols they also
+# need to be added to words.txt
+if [ ! -z "$extra_word_disambig_syms" ]; then
+  # Since words.txt already exists, we need to extract the current word count.
+  word_count=`tail -n 1 $dir/words.txt | awk '{ print $2 }'`
+  # We expect a file containing valid word-level disambiguation symbols.
+  # The list of symbols is attached to the current words.txt (including
+  # a numeric identifier for each symbol).
+  cat $extra_word_disambig_syms | \
+    awk -v WC=$word_count '{ printf("%s %d\n", $1, ++WC); }' >> $dir/words.txt || exit 1;
+fi
+# format of $dir/words.txt:
+#<eps> 0
+#!EXCLAMATION-POINT 1
+#!SIL 2
+#"CLOSE-QUOTE 3
+#...
+silphone=`cat $srcdir/optional_silence.txt` || exit 1;
+[ -z "$silphone" ] && \
+  ( echo "You have no optional-silence phone; it is required in the current scripts"
+    echo "but you may use the option --sil-prob 0.0 to stop it being used." ) && \
+   exit 1;
+# create $dir/phones/align_lexicon.{txt,int}.
+# This is the method we use for lattice word alignment if we are not
+# using word-position-dependent phones.
+# First remove pron-probs from the lexicon.
+perl -ape 's/(\S+\s+)\S+\s+(.+)/$1$2/;' <$tmpdir/lexiconp.txt >$tmpdir/align_lexicon.txt
+# Note: here, $silphone will have no suffix e.g. _S because it occurs as optional-silence,
+# and is not part of a word.
+[ ! -z "$silphone" ] && echo "<eps> $silphone" >> $tmpdir/align_lexicon.txt
+cat $tmpdir/align_lexicon.txt | \
+ perl -ane '@A = split; print $A[0], " ", join(" ", @A), "\n";' | sort | uniq > $dir/phones/align_lexicon.txt
+# create phones/align_lexicon.int
+cat $dir/phones/align_lexicon.txt | utils/sym2int.pl -f 3- $dir/phones.txt | \
+  utils/sym2int.pl -f 1-2 $dir/words.txt > $dir/phones/align_lexicon.int
+# Create the basic L.fst without disambiguation symbols, for use
+# in training.
+if $silprob; then
+  # Add silence probabilities (modlels the prob. of silence before and after each
+  # word).  On some setups this helps a bit.  See utils/dict_dir_add_pronprobs.sh
+  # and where it's called in the example scripts (run.sh).
+  utils/make_lexicon_fst_silprob.pl $tmpdir/lexiconp_silprob.txt $srcdir/silprob.txt $silphone "<eps>" | \
+     fstcompile --isymbols=$dir/phones.txt --osymbols=$dir/words.txt \
+     --keep_isymbols=false --keep_osymbols=false |   \
+     fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
+else
+  utils/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp.txt $sil_prob $silphone | \
+    fstcompile --isymbols=$dir/phones.txt --osymbols=$dir/words.txt \
+    --keep_isymbols=false --keep_osymbols=false | \
+     fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
+fi
+# The file oov.txt contains a word that we will map any OOVs to during
+# training.
+echo "$oov_word" > $dir/oov.txt || exit 1;
+cat $dir/oov.txt | utils/sym2int.pl $dir/words.txt >$dir/oov.int || exit 1;
+# integer version of oov symbol, used in some scripts.
+# the file wdisambig.txt contains a (line-by-line) list of the text-form of the
+# disambiguation symbols that are used in the grammar and passed through by the
+# lexicon.  At this stage it's hardcoded as '#0', but we're laying the groundwork
+# for more generality (which probably would be added by another script).
+# wdisambig_words.int contains the corresponding list interpreted by the
+# symbol table words.txt, and wdisambig_phones.int contains the corresponding
+# list interpreted by the symbol table phones.txt.
+echo '#0' >$dir/phones/wdisambig.txt
+# In case there are extra word-level disambiguation symbols they need
+# to be added to the existing word-level disambiguation symbols file.
+if [ ! -z "$extra_word_disambig_syms" ]; then
+  # We expect a file containing valid word-level disambiguation symbols.
+  # The regular expression for awk is just a paranoia filter (e.g. for empty lines).
+  cat $extra_word_disambig_syms | awk '{ print $1 }' >> $dir/phones/wdisambig.txt
+fi
+utils/sym2int.pl $dir/phones.txt <$dir/phones/wdisambig.txt >$dir/phones/wdisambig_phones.int
+utils/sym2int.pl $dir/words.txt <$dir/phones/wdisambig.txt >$dir/phones/wdisambig_words.int
+# Create these lists of phones in colon-separated integer list form too,
+# for purposes of being given to programs as command-line options.
+for f in silence nonsilence optional_silence disambig context_indep; do
+  utils/sym2int.pl $dir/phones.txt <$dir/phones/$f.txt >$dir/phones/$f.int
+  utils/sym2int.pl $dir/phones.txt <$dir/phones/$f.txt | \
+   awk '{printf(":%d", $1);} END{printf "\n"}' | sed s/:// > $dir/phones/$f.csl || exit 1;
+done
+for x in sets extra_questions; do
+  utils/sym2int.pl $dir/phones.txt <$dir/phones/$x.txt > $dir/phones/$x.int || exit 1;
+done
+utils/sym2int.pl -f 3- $dir/phones.txt <$dir/phones/roots.txt \
+   > $dir/phones/roots.int || exit 1;
+if [ -f $dir/phones/word_boundary.txt ]; then
+  utils/sym2int.pl -f 1 $dir/phones.txt <$dir/phones/word_boundary.txt \
+    > $dir/phones/word_boundary.int || exit 1;
+fi
+silphonelist=`cat $dir/phones/silence.csl`
+nonsilphonelist=`cat $dir/phones/nonsilence.csl`
+# Note: it's OK, after generating the 'lang' directory, to overwrite the topo file
+# with another one of your choice if the 'topo' file you want can't be generated by
+# utils/gen_topo.pl.  We do this in the 'chain' recipes.  Of course, the 'topo' file
+# should cover all the phones.  Try running utils/validate_lang.pl to check that
+# everything is OK after modifying the topo file.
+local/gen_topo.py $num_nonsil_states $num_sil_states 4 $nonsilphonelist $silphonelist $dir/phones.txt >$dir/topo
+# Create the lexicon FST with disambiguation symbols, and put it in lang_test.
+# There is an extra step where we create a loop to "pass through" the
+# disambiguation symbols from G.fst.
+if $silprob; then
+  utils/make_lexicon_fst_silprob.pl $tmpdir/lexiconp_silprob_disambig.txt $srcdir/silprob.txt $silphone '#'$ndisambig | \
+     fstcompile --isymbols=$dir/phones.txt --osymbols=$dir/words.txt \
+     --keep_isymbols=false --keep_osymbols=false |   \
+     fstaddselfloops  $dir/phones/wdisambig_phones.int $dir/phones/wdisambig_words.int | \
+     fstarcsort --sort_type=olabel > $dir/L_disambig.fst || exit 1;
+else
+  utils/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt $sil_prob $silphone '#'$ndisambig | \
+     fstcompile --isymbols=$dir/phones.txt --osymbols=$dir/words.txt \
+     --keep_isymbols=false --keep_osymbols=false |   \
+     fstaddselfloops  $dir/phones/wdisambig_phones.int $dir/phones/wdisambig_words.int | \
+     fstarcsort --sort_type=olabel > $dir/L_disambig.fst || exit 1;
+fi
+if [ ! -z "$unk_fst" ]; then
+  utils/lang/internal/apply_unk_lm.sh $unk_fst $dir || exit 1
+  if ! $position_dependent_phones; then
+    echo "$0: warning: you are using the --unk-lm option and setting --position-dependent-phones false."
+    echo " ... this will make it impossible to properly work out the word boundaries after"
+    echo " ... decoding; quite a few scripts will not work as a result, and many scoring scripts"
+    echo " ... will die."
+    sleep 4
+  fi
+fi
+echo "$(basename $0): validating output directory"
+! utils/validate_lang.pl $dir && echo "$(basename $0): error validating output" &&  exit 1;
+exit 0;
diff --git a/egs/iam/v2/local/score.sh b/egs/iam/v2/local/score.sh
index b2032909333..1d84815fc69 100755
--- a/egs/iam/v2/local/score.sh
+++ b/egs/iam/v2/local/score.sh
@@ -1,155 +1,6 @@
-#!/bin/bash
-# Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey, Yenda Trmal)
-# Apache 2.0
-
-# This script is like steps/scoring/score_kaldi_wer.sh except it transcribes the <unk>'s
-# using local/unk_arc_post_to_transcription.py and also it calls
-# steps/scoring/score_kaldi_cer.sh at the end.
-
-[ -f ./path.sh ] && . ./path.sh
-
-# begin configuration section.
-cmd=run.pl
-stage=0
-decode_mbr=false
-stats=true
-beam=6
-word_ins_penalty=0.0,0.5,1.0
-min_lmwt=3
-max_lmwt=13
-iter=final
-#end configuration section.
-
-echo "$0 $@"  # Print the command line for logging
-[ -f ./path.sh ] && . ./path.sh
-. parse_options.sh || exit 1;
-
-if [ $# -ne 3 ]; then
-  echo "Usage: $0 [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <decode-dir>"
-  echo " Options:"
-  echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
-  echo "    --stage (0|1|2)                 # start scoring script from part-way through."
-  echo "    --decode_mbr (true/false)       # maximum bayes risk decoding (confusion network)."
-  echo "    --min_lmwt <int>                # minumum LM-weight for lattice rescoring "
-  echo "    --max_lmwt <int>                # maximum LM-weight for lattice rescoring "
-  exit 1;
-fi
-
-data=$1
-lang_or_graph=$2
-dir=$3
-model_path=`echo $dir |xargs dirname`
-symtab=$lang_or_graph/words.txt
-
-for f in $symtab $dir/lat.1.gz $data/text; do
-  [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1;
-done
-
-
-ref_filtering_cmd="cat"
-[ -x local/wer_output_filter ] && ref_filtering_cmd="local/wer_output_filter"
-[ -x local/wer_ref_filter ] && ref_filtering_cmd="local/wer_ref_filter"
-hyp_filtering_cmd="cat"
-[ -x local/wer_output_filter ] && hyp_filtering_cmd="local/wer_output_filter"
-[ -x local/wer_hyp_filter ] && hyp_filtering_cmd="local/wer_hyp_filter"
-
-
-if $decode_mbr ; then
-  echo "$0: scoring with MBR, word insertion penalty=$word_ins_penalty"
-else
-  echo "$0: scoring with word insertion penalty=$word_ins_penalty"
-fi
-
-
-mkdir -p $dir/scoring_kaldi
-cat $data/text | $ref_filtering_cmd > $dir/scoring_kaldi/test_filt.txt || exit 1;
-if [ $stage -le 0 ]; then
-
-  for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
-    mkdir -p $dir/scoring_kaldi/penalty_$wip/log
-
-    if $decode_mbr ; then
-      $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/best_path.LMWT.log \
-        acwt=\`perl -e \"print 1.0/LMWT\"\`\; \
-        lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
-        lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \
-        lattice-prune --beam=$beam ark:- ark:- \| \
-        lattice-mbr-decode  --word-symbol-table=$symtab \
-        ark:- ark,t:- \| \
-        utils/int2sym.pl -f 2- $symtab \| \
-        $hyp_filtering_cmd '>' $dir/scoring_kaldi/penalty_$wip/LMWT.txt || exit 1;
 
-    else
-      $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/best_path.LMWT.log \
-        lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
-        lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \
-        lattice-best-path --word-symbol-table=$symtab ark:- ark,t:- \| \
-        utils/int2sym.pl -f 2- $symtab \| \
-        $hyp_filtering_cmd '>' $dir/scoring_kaldi/penalty_$wip/LMWT.txt || exit 1;
-    fi
-
-    $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/score.LMWT.log \
-      cat $dir/scoring_kaldi/penalty_$wip/LMWT.txt \| \
-      compute-wer --text --mode=present \
-      "ark:cat $dir/scoring_kaldi/test_filt.txt |" ark,p:- ">&" $dir/wer_LMWT_$wip || exit 1;
-
-  done
-fi
-
-
-
-if [ $stage -le 1 ]; then
-
-  for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
-    for lmwt in $(seq $min_lmwt $max_lmwt); do
-      # adding /dev/null to the command list below forces grep to output the filename
-      grep WER $dir/wer_${lmwt}_${wip} /dev/null
-    done
-  done | utils/best_wer.sh  >& $dir/scoring_kaldi/best_wer || exit 1
-
-  best_wer_file=$(awk '{print $NF}' $dir/scoring_kaldi/best_wer)
-  best_wip=$(echo $best_wer_file | awk -F_ '{print $NF}')
-  best_lmwt=$(echo $best_wer_file | awk -F_ '{N=NF-1; print $N}')
-
-  if [ -z "$best_lmwt" ]; then
-    echo "$0: we could not get the details of the best WER from the file $dir/wer_*.  Probably something went wrong."
-    exit 1;
-  fi
-
-  if $stats; then
-    mkdir -p $dir/scoring_kaldi/wer_details
-    echo $best_lmwt > $dir/scoring_kaldi/wer_details/lmwt # record best language model weight
-    echo $best_wip > $dir/scoring_kaldi/wer_details/wip # record best word insertion penalty
-
-    $cmd $dir/scoring_kaldi/log/stats1.log \
-      cat $dir/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt \| \
-      align-text --special-symbol="'***'" ark:$dir/scoring_kaldi/test_filt.txt ark:- ark,t:- \|  \
-      utils/scoring/wer_per_utt_details.pl --special-symbol "'***'" \| tee $dir/scoring_kaldi/wer_details/per_utt \|\
-       utils/scoring/wer_per_spk_details.pl $data/utt2spk \> $dir/scoring_kaldi/wer_details/per_spk || exit 1;
-
-    $cmd $dir/scoring_kaldi/log/stats2.log \
-      cat $dir/scoring_kaldi/wer_details/per_utt \| \
-      utils/scoring/wer_ops_details.pl --special-symbol "'***'" \| \
-      sort -b -i -k 1,1 -k 4,4rn -k 2,2 -k 3,3 \> $dir/scoring_kaldi/wer_details/ops || exit 1;
-
-    $cmd $dir/scoring_kaldi/log/wer_bootci.log \
-      compute-wer-bootci --mode=present \
-        ark:$dir/scoring_kaldi/test_filt.txt ark:$dir/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt \
-        '>' $dir/scoring_kaldi/wer_details/wer_bootci || exit 1;
-
-  fi
-fi
-
-steps/scoring/score_kaldi_cer.sh --cmd "$cmd" --stage 2 --min-lmwt $min_lmwt \
-                                 --max-lmwt $max_lmwt --word-ins-penalty $word_ins_penalty \
-                                 $data $lang_or_graph $dir
+#!/bin/bash
 
-# If we got here, the scoring was successful.
-# As a  small aid to prevent confusion, we remove all wer_{?,??} files;
-# these originate from the previous version of the scoring files
-# i keep both statement here because it could lead to confusion about
-# the capabilities of the script (we don't do cer in the script)
-rm $dir/wer_{?,??} 2>/dev/null
-rm $dir/cer_{?,??} 2>/dev/null
 
-exit 0;
+steps/scoring/score_kaldi_wer.sh "$@"
+steps/scoring/score_kaldi_cer.sh --stage 2 "$@"
diff --git a/egs/iam/v2/run.sh b/egs/iam/v2/run.sh
new file mode 100755
index 00000000000..c74397ccc48
--- /dev/null
+++ b/egs/iam/v2/run.sh
@@ -0,0 +1,174 @@
+#!/bin/bash
+
+# Copyright      2017  Chun Chieh Chang
+#                2017  Ashish Arora
+#                2017  Hossein Hadian
+
+set -e
+stage=0
+nj=20
+decode_gmm=false
+username=
+password=
+iam_database=/export/corpora5/handwriting_ocr/IAM
+wellington_database=/export/corpora5/Wellington/WWC/
+
+. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
+           ## This relates to the queue.
+. ./path.sh
+. ./utils/parse_options.sh  # e.g. this parses the above options
+                            # if supplied.
+
+./local/check_tools.sh
+
+if [ $stage -le 0 ]; then
+  echo "$0: Preparing data..."
+  local/prepare_data.sh --download-dir "$iam_database" \
+    --wellington-dir "$wellington_database" \
+    --username "$username" --password "$password"
+fi
+mkdir -p data/{train,test}/data
+
+if [ $stage -le 1 ]; then
+  echo "$(date) stage 1: getting allowed image widths for e2e training..."
+  image/get_image2num_frames.py --feat-dim 40 data/train
+  image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
+  for set in train test; do
+    echo "$(date) Extracting features, creating feats.scp file"
+    local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set}
+    steps/compute_cmvn_stats.sh data/${set} || exit 1;
+  done
+  image/fix_data_dir.sh data/train
+fi
+
+if [ $stage -le 2 ]; then
+  for set in train; do
+    echo "$(date) stage 2: Performing augmentation, it will double training data"
+    local/augment_data.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set} data/${set}_aug data
+    steps/compute_cmvn_stats.sh data/${set}_aug || exit 1;
+  done
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: Preparing BPE..."
+  # getting non-silence phones.
+  cut -d' ' -f2- data/train/text | \
+python3 <(
+cat << "END"
+import os, sys, io;
+infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8');
+output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8');
+phone_dict = dict();
+for line in infile:
+    line_vect = line.strip().split();
+    for word in line_vect:
+        for phone in word:
+            phone_dict[phone] = phone;
+for phone in phone_dict.keys():
+      output.write(phone+ '\n');
+END
+   ) > data/local/phones.txt
+
+  cut -d' ' -f2- data/train/text > data/local/train_data.txt
+  cat data/local/phones.txt data/local/train_data.txt | \
+    local/prepend_words.py | \
+    utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt
+  for set in test train val train_aug; do
+    cut -d' ' -f1 data/$set/text > data/$set/ids
+    cut -d' ' -f2- data/$set/text | \
+      local/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
+      | sed 's/@@//g' > data/$set/bpe_text
+    mv data/$set/text data/$set/text.old
+    paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text
+  done
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: Estimating a language model for decoding..."
+  local/train_lm.sh
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: Preparing dictionary and lang..."
+  local/prepare_dict.sh
+  utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \
+                        data/local/dict "<sil>" data/lang/temp data/lang
+  utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang
+
+  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_big.arpa.gz \
+                     data/local/dict/lexicon.txt data/lang
+  utils/build_const_arpa_lm.sh data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \
+                               data/lang data/lang_rescore_6g
+fi
+
+if [ $stage -le 4 ]; then
+  steps/train_mono.sh --nj $nj --cmd $cmd --totgauss 10000 data/train_aug \
+    data/lang exp/mono
+fi
+
+if [ $stage -le 5 ] && $decode_gmm; then
+  utils/mkgraph.sh --mono data/lang_test exp/mono exp/mono/graph
+
+  steps/decode.sh --nj $nj --cmd $cmd exp/mono/graph data/test \
+    exp/mono/decode_test
+fi
+
+if [ $stage -le 6 ]; then
+  steps/align_si.sh --nj $nj --cmd $cmd data/train_aug data/lang \
+    exp/mono exp/mono_ali
+
+  steps/train_deltas.sh --cmd $cmd 500 20000 data/train_aug data/lang \
+    exp/mono_ali exp/tri
+fi
+
+if [ $stage -le 7 ] && $decode_gmm; then
+  utils/mkgraph.sh data/lang_test exp/tri exp/tri/graph
+
+  steps/decode.sh --nj $nj --cmd $cmd exp/tri/graph data/test \
+    exp/tri/decode_test
+fi
+
+if [ $stage -le 8 ]; then
+  steps/align_si.sh --nj $nj --cmd $cmd data/train_aug data/lang \
+    exp/tri exp/tri_ali
+
+  steps/train_lda_mllt.sh --cmd $cmd \
+    --splice-opts "--left-context=3 --right-context=3" 500 20000 \
+    data/train data/lang exp/tri_ali exp/tri2
+fi
+
+if [ $stage -le 9 ] && $decode_gmm; then
+  utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph
+
+  steps/decode.sh --nj $nj --cmd $cmd exp/tri2/graph \
+    data/test exp/tri2/decode_test
+fi
+
+if [ $stage -le 10 ]; then
+  steps/align_fmllr.sh --nj $nj --cmd $cmd --use-graphs true \
+    data/train data/lang exp/tri2 exp/tri2_ali
+
+  steps/train_sat.sh --cmd $cmd 500 20000 \
+    data/train_aug data/lang exp/tri2_ali exp/tri3
+fi
+
+if [ $stage -le 11 ] && $decode_gmm; then
+  utils/mkgraph.sh data/lang_test exp/tri3 exp/tri3/graph
+
+  steps/decode_fmllr.sh --nj $nj --cmd $cmd exp/tri3/graph \
+    data/test exp/tri3/decode_test
+fi
+
+if [ $stage -le 12 ]; then
+  steps/align_fmllr.sh --nj $nj --cmd $cmd --use-graphs true \
+    data/train_aug data/lang exp/tri3 exp/tri3_ali
+fi
+
+if [ $stage -le 13 ]; then
+  local/chain/run_cnn_1a.sh --train_set train_aug
+fi
+
+if [ $stage -le 14 ]; then
+  local/chain/run_cnn_chainali_1c.sh --train_set train_aug \
+    --chain-model-dir exp/chain/cnn_1a --stage 2
+fi
diff --git a/egs/iam/v2/run_end2end.sh b/egs/iam/v2/run_end2end.sh
index de5c7086ec2..bd78c011b75 100755
--- a/egs/iam/v2/run_end2end.sh
+++ b/egs/iam/v2/run_end2end.sh
@@ -35,27 +35,50 @@ fi
 mkdir -p data/{train,test}/data
 
 if [ $stage -le 1 ]; then
-  image/get_image2num_frames.py data/train  # This will be needed for the next command
-  # The next command creates a "allowed_lengths.txt" file in data/train
-  # which will be used by local/make_features.py to enforce the images to
-  # have allowed lengths. The allowed lengths will be spaced by 10% difference in length.
+  echo "$(date) stage 1: getting allowed image widths for e2e training..."
+  image/get_image2num_frames.py --feat-dim 40 data/train
   image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
-  echo "$0: Preparing the test and train feature files..."
-  for dataset in train test; do
-    local/make_features.py data/$dataset --feat-dim 40 | \
-      copy-feats --compress=true --compression-method=7 \
-                 ark:- ark,scp:data/$dataset/data/images.ark,data/$dataset/feats.scp
-    steps/compute_cmvn_stats.sh data/$dataset
+  for set in train test; do
+    echo "$(date) Extracting features, creating feats.scp file"
+    local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set}
+    steps/compute_cmvn_stats.sh data/${set} || exit 1;
+  done
+  image/fix_data_dir.sh data/train
+fi
+
+if [ $stage -le 2 ]; then
+  for set in train; do
+    echo "$(date) stage 2: Performing augmentation, it will double training data"
+    local/augment_data.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set} data/${set}_aug data
+    steps/compute_cmvn_stats.sh data/${set}_aug || exit 1;
   done
-  utils/fix_data_dir.sh data/train
 fi
 
 if [ $stage -le 2 ]; then
   echo "$0: Preparing BPE..."
+  # getting non-silence phones.
   cut -d' ' -f2- data/train/text | \
+python3 <(
+cat << "END"
+import os, sys, io;
+infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8');
+output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8');
+phone_dict = dict();
+for line in infile:
+    line_vect = line.strip().split();
+    for word in line_vect:
+        for phone in word:
+            phone_dict[phone] = phone;
+for phone in phone_dict.keys():
+      output.write(phone+ '\n');
+END
+   ) > data/local/phones.txt
+
+  cut -d' ' -f2- data/train/text > data/local/train_data.txt
+  cat data/local/phones.txt data/local/train_data.txt | \
     local/prepend_words.py | \
     utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt
-  for set in test train val; do
+  for set in test train val train_aug; do
     cut -d' ' -f1 data/$set/text > data/$set/ids
     cut -d' ' -f2- data/$set/text | \
       local/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
@@ -73,8 +96,6 @@ fi
 if [ $stage -le 4 ]; then
   echo "$0: Preparing dictionary and lang..."
   local/prepare_dict.sh
-  # This recipe uses byte-pair encoding, the silences are part of the words' pronunciations.
-  # So we set --sil-prob to 0.0
   utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \
                         data/local/dict "<sil>" data/lang/temp data/lang
   utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang
@@ -87,7 +108,7 @@ fi
 
 if [ $stage -le 5 ]; then
   echo "$0: Calling the flat-start chain recipe..."
-  local/chain/run_e2e_cnn.sh
+  local/chain/run_e2e_cnn.sh --train_set train_aug
 fi
 
 if [ $stage -le 6 ]; then
@@ -100,5 +121,5 @@ fi
 
 if [ $stage -le 7 ]; then
   echo "$0: Building a tree and training a regular chain model using the e2e alignments..."
-  local/chain/run_cnn_e2eali.sh
+  local/chain/run_cnn_e2eali.sh --train_set train_aug
 fi
diff --git a/egs/wsj/s5/utils/copy_data_dir.sh b/egs/wsj/s5/utils/copy_data_dir.sh
index f3b885c5e79..996cf76ec33 100755
--- a/egs/wsj/s5/utils/copy_data_dir.sh
+++ b/egs/wsj/s5/utils/copy_data_dir.sh
@@ -86,10 +86,16 @@ fi
 
 if [ -f $srcdir/segments ]; then
   utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
-  cp $srcdir/wav.scp $destdir
+  if [ -f $srcdir/wav.scp ]; then
+    cp $srcdir/wav.scp $destdir
+  elif [ -f $srcdir/images.scp ]; then
+    cp $srcdir/images.scp $destdir
+  fi
 else # no segments->wav indexed by utt.
   if [ -f $srcdir/wav.scp ]; then
     utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
+  elif [ -f $srcdir/images.scp ]; then
+    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/images.scp >$destdir/images.scp
   fi
 fi
 
@@ -126,7 +132,7 @@ rm $destdir/spk_map $destdir/utt_map
 
 echo "$0: copied data from $srcdir to $destdir"
 
-for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel stm glm ctm; do
+for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp images.scp reco2file_and_channel stm glm ctm; do
   if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
     echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
     echo " ... $destdir/.backup/$f"

From 5f273d6c5548f22f4f1f857d13df6b29986d9a1e Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 31 Aug 2018 00:50:46 -0400
Subject: [PATCH 03/37] fixing bugs

---
 egs/iam/v2/local/chain/run_cnn_1a.sh          |  2 +-
 egs/iam/v2/local/chain/run_cnn_chainali_1c.sh |  2 +-
 egs/iam/v2/run.sh                             | 32 +++++++++----------
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/egs/iam/v2/local/chain/run_cnn_1a.sh b/egs/iam/v2/local/chain/run_cnn_1a.sh
index 41a76920e37..5e7d5bac77b 100755
--- a/egs/iam/v2/local/chain/run_cnn_1a.sh
+++ b/egs/iam/v2/local/chain/run_cnn_1a.sh
@@ -46,7 +46,7 @@ tdnn_dim=450
 # training options
 srand=0
 remove_egs=false
-lang_test=lang_unk
+lang_test=lang
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
diff --git a/egs/iam/v2/local/chain/run_cnn_chainali_1c.sh b/egs/iam/v2/local/chain/run_cnn_chainali_1c.sh
index 54c52d913de..a17d7307fb4 100755
--- a/egs/iam/v2/local/chain/run_cnn_chainali_1c.sh
+++ b/egs/iam/v2/local/chain/run_cnn_chainali_1c.sh
@@ -43,7 +43,7 @@ tdnn_dim=450
 # training options
 srand=0
 remove_egs=false
-lang_test=lang_unk
+lang_test=lang
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
diff --git a/egs/iam/v2/run.sh b/egs/iam/v2/run.sh
index c74397ccc48..33a78bf41d0 100755
--- a/egs/iam/v2/run.sh
+++ b/egs/iam/v2/run.sh
@@ -6,7 +6,7 @@
 
 set -e
 stage=0
-nj=20
+nj=70
 decode_gmm=false
 username=
 password=
@@ -101,19 +101,19 @@ if [ $stage -le 4 ]; then
                                data/lang data/lang_rescore_6g
 fi
 
-if [ $stage -le 4 ]; then
+if [ $stage -le 5 ]; then
   steps/train_mono.sh --nj $nj --cmd $cmd --totgauss 10000 data/train_aug \
     data/lang exp/mono
 fi
 
-if [ $stage -le 5 ] && $decode_gmm; then
+if [ $stage -le 6 ] && $decode_gmm; then
   utils/mkgraph.sh --mono data/lang_test exp/mono exp/mono/graph
 
   steps/decode.sh --nj $nj --cmd $cmd exp/mono/graph data/test \
     exp/mono/decode_test
 fi
 
-if [ $stage -le 6 ]; then
+if [ $stage -le 7 ]; then
   steps/align_si.sh --nj $nj --cmd $cmd data/train_aug data/lang \
     exp/mono exp/mono_ali
 
@@ -121,30 +121,30 @@ if [ $stage -le 6 ]; then
     exp/mono_ali exp/tri
 fi
 
-if [ $stage -le 7 ] && $decode_gmm; then
-  utils/mkgraph.sh data/lang_test exp/tri exp/tri/graph
+if [ $stage -le 8 ] && $decode_gmm; then
+  utils/mkgraph.sh data/lang exp/tri exp/tri/graph
 
   steps/decode.sh --nj $nj --cmd $cmd exp/tri/graph data/test \
     exp/tri/decode_test
 fi
 
-if [ $stage -le 8 ]; then
+if [ $stage -le 9 ]; then
   steps/align_si.sh --nj $nj --cmd $cmd data/train_aug data/lang \
     exp/tri exp/tri_ali
 
   steps/train_lda_mllt.sh --cmd $cmd \
     --splice-opts "--left-context=3 --right-context=3" 500 20000 \
-    data/train data/lang exp/tri_ali exp/tri2
+    data/train_aug data/lang exp/tri_ali exp/tri2
 fi
 
-if [ $stage -le 9 ] && $decode_gmm; then
-  utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph
+if [ $stage -le 10 ] && $decode_gmm; then
+  utils/mkgraph.sh data/lang exp/tri2 exp/tri2/graph
 
   steps/decode.sh --nj $nj --cmd $cmd exp/tri2/graph \
     data/test exp/tri2/decode_test
 fi
 
-if [ $stage -le 10 ]; then
+if [ $stage -le 11 ]; then
   steps/align_fmllr.sh --nj $nj --cmd $cmd --use-graphs true \
     data/train data/lang exp/tri2 exp/tri2_ali
 
@@ -152,23 +152,23 @@ if [ $stage -le 10 ]; then
     data/train_aug data/lang exp/tri2_ali exp/tri3
 fi
 
-if [ $stage -le 11 ] && $decode_gmm; then
-  utils/mkgraph.sh data/lang_test exp/tri3 exp/tri3/graph
+if [ $stage -le 12 ] && $decode_gmm; then
+  utils/mkgraph.sh data/lang exp/tri3 exp/tri3/graph
 
   steps/decode_fmllr.sh --nj $nj --cmd $cmd exp/tri3/graph \
     data/test exp/tri3/decode_test
 fi
 
-if [ $stage -le 12 ]; then
+if [ $stage -le 13 ]; then
   steps/align_fmllr.sh --nj $nj --cmd $cmd --use-graphs true \
     data/train_aug data/lang exp/tri3 exp/tri3_ali
 fi
 
-if [ $stage -le 13 ]; then
+if [ $stage -le 14 ]; then
   local/chain/run_cnn_1a.sh --train_set train_aug
 fi
 
-if [ $stage -le 14 ]; then
+if [ $stage -le 15 ]; then
   local/chain/run_cnn_chainali_1c.sh --train_set train_aug \
     --chain-model-dir exp/chain/cnn_1a --stage 2
 fi

From 2645f146491a27ebbf2e127246f5d8ae4a0efc22 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 31 Aug 2018 02:19:00 -0400
Subject: [PATCH 04/37] fixing bug

---
 egs/iam/v2/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/egs/iam/v2/run.sh b/egs/iam/v2/run.sh
index 33a78bf41d0..0881df1ff05 100755
--- a/egs/iam/v2/run.sh
+++ b/egs/iam/v2/run.sh
@@ -146,7 +146,7 @@ fi
 
 if [ $stage -le 11 ]; then
   steps/align_fmllr.sh --nj $nj --cmd $cmd --use-graphs true \
-    data/train data/lang exp/tri2 exp/tri2_ali
+    data/train_aug data/lang exp/tri2 exp/tri2_ali
 
   steps/train_sat.sh --cmd $cmd 500 20000 \
     data/train_aug data/lang exp/tri2_ali exp/tri3

From 6ebfdb2213bf9ea76b7884dd31bc51f981f0ea56 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 31 Aug 2018 20:37:08 -0400
Subject: [PATCH 05/37] adding parameter tuning

---
 egs/iam/v2/local/chain/run_cnn_chainali_1c.sh |   7 +-
 egs/iam/v2/local/chain/run_cnn_chainali_1d.sh | 251 ++++++++++++++++++
 egs/iam/v2/run.sh                             |   2 +-
 3 files changed, 258 insertions(+), 2 deletions(-)
 create mode 100755 egs/iam/v2/local/chain/run_cnn_chainali_1d.sh

diff --git a/egs/iam/v2/local/chain/run_cnn_chainali_1c.sh b/egs/iam/v2/local/chain/run_cnn_chainali_1c.sh
index a17d7307fb4..53039377baf 100755
--- a/egs/iam/v2/local/chain/run_cnn_chainali_1c.sh
+++ b/egs/iam/v2/local/chain/run_cnn_chainali_1c.sh
@@ -44,6 +44,8 @@ tdnn_dim=450
 srand=0
 remove_egs=false
 lang_test=lang
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
@@ -230,7 +232,7 @@ if [ $stage -le 6 ]; then
   # as long as phones.txt was compatible.
 
   utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
+    --self-loop-scale 1.0 $lang_decode \
     $dir $dir/graph || exit 1;
 fi
 
@@ -244,4 +246,7 @@ if [ $stage -le 7 ]; then
     --frames-per-chunk $frames_per_chunk \
     --nj $nj --cmd "$cmd" \
     $dir/graph data/test $dir/decode_test || exit 1;
+
+  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/test $dir/decode_test{,_rescored} || exit 1
 fi
diff --git a/egs/iam/v2/local/chain/run_cnn_chainali_1d.sh b/egs/iam/v2/local/chain/run_cnn_chainali_1d.sh
new file mode 100755
index 00000000000..3123ee897d5
--- /dev/null
+++ b/egs/iam/v2/local/chain/run_cnn_chainali_1d.sh
@@ -0,0 +1,251 @@
+#!/bin/bash
+
+# chainali_1c is as chainali_1b except it uses l2-regularize
+# local/chain/compare_wer.sh exp/chain/cnn_chainali_1b exp/chain/cnn_chainali_1c
+# System                      cnn_chainali_1b cnn_chainali_1c
+# WER                             14.38     12.72
+# CER                              7.14      5.99
+# Final train prob              -0.0113   -0.0291
+# Final valid prob              -0.0400   -0.0359
+# Final train prob (xent)       -0.6043   -0.9781
+# Final valid prob (xent)       -0.9030   -1.1544
+# Parameters                      3.96M     3.96M
+
+# steps/info/chain_dir_info.pl exp/chain/cnn_chainali_1c
+# exp/chain/cnn_chainali_1c: num-iters=21 nj=2..4 num-params=4.0M dim=40->369 combine=-0.007->-0.007 (over 1) xent:train/valid[13,20,final]=(-1.44,-1.05,-0.997/-1.53,-1.19,-1.15) logprob:train/valid[13,20,final]=(-0.056,-0.020,-0.012/-0.056,-0.025,-0.020)
+
+set -e -o pipefail
+
+stage=0
+
+nj=30
+train_set=train
+gmm=tri3        # this is the source gmm-dir that we'll use for alignments; it
+                # should have alignments for the specified training data.
+nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
+affix=_1c  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+ali=tri3_ali
+chain_model_dir=exp/chain${nnet3_affix}/cnn_1a
+common_egs_dir=
+reporting_email=
+
+# chain options
+train_stage=-10
+xent_regularize=0.1
+frame_subsampling_factor=4
+# training chunk-options
+chunk_width=340,300,200,100
+num_leaves=500
+# we don't need extra left/right context for TDNN systems.
+chunk_left_context=0
+chunk_right_context=0
+tdnn_dim=550
+# training options
+srand=0
+remove_egs=false
+lang_test=lang
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
+dropout_schedule='0,0@0.20,0.2@0.50,0'
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+gmm_dir=exp/${gmm}
+ali_dir=exp/${ali}
+lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_lats_chain
+gmm_lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_lats
+dir=exp/chain${nnet3_affix}/cnn_chainali${affix}
+train_data_dir=data/${train_set}
+tree_dir=exp/chain${nnet3_affix}/tree_chain
+
+# the 'lang' directory is created by this script.
+# If you create such a directory with a non-standard topology
+# you should probably name it differently.
+lang=data/lang_chain
+for f in $train_data_dir/feats.scp \
+    $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+
+if [ $stage -le 1 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 2 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
+                            --acoustic-scale 1.0 \
+                            --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
+                            ${train_data_dir} data/lang $chain_model_dir $lat_dir
+  cp $gmm_lat_dir/splice_opts $lat_dir/splice_opts
+fi
+
+if [ $stage -le 3 ]; then
+  # Build a tree using our new topology.  We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.  The num-leaves is always somewhat less than the num-leaves from
+  # the GMM baseline.
+   if [ -f $tree_dir/final.mdl ]; then
+     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+     exit 1;
+  fi
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor $frame_subsampling_factor \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$cmd" $num_leaves ${train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+
+if [ $stage -le 4 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  cnn_opts="l2-regularize=0.03 dropout-proportion=0.0"
+  tdnn_opts="l2-regularize=0.03"
+  output_opts="l2-regularize=0.04"
+  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=90"
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+
+  conv-relu-batchnorm-dropout-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-dropout-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-dropout-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-dropout-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-dropout-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common3 height-subsample-out=2
+  relu-batchnorm-dropout-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+  relu-batchnorm-dropout-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+  relu-batchnorm-dropout-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' mod?els... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 5 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage=$train_stage \
+    --cmd="$cmd" \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.00005 \
+    --chain.apply-deriv-weights=true \
+    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
+    --chain.frame-subsampling-factor=$frame_subsampling_factor \
+    --chain.alignment-subsampling-factor=1 \
+    --chain.left-tolerance 3 \
+    --chain.right-tolerance 3 \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=6 \
+    --trainer.frames-per-iter=1500000 \
+    --trainer.optimization.num-jobs-initial=2 \
+    --trainer.optimization.num-jobs-final=4 \
+    --trainer.dropout-schedule $dropout_schedule \
+    --trainer.optimization.initial-effective-lrate=0.001 \
+    --trainer.optimization.final-effective-lrate=0.0001 \
+    --trainer.optimization.shrink-value=1.0 \
+    --trainer.num-chunk-per-minibatch=32,16 \
+    --trainer.optimization.momentum=0.0 \
+    --egs.chunk-width=$chunk_width \
+    --egs.chunk-left-context=$chunk_left_context \
+    --egs.chunk-right-context=$chunk_right_context \
+    --egs.chunk-left-context-initial=0 \
+    --egs.chunk-right-context-final=0 \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir=$train_data_dir \
+    --tree-dir=$tree_dir \
+    --lat-dir=$lat_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 6 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 $lang_decode \
+    $dir $dir/graph || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+    --extra-left-context $chunk_left_context \
+    --extra-right-context $chunk_right_context \
+    --extra-left-context-initial 0 \
+    --extra-right-context-final 0 \
+    --frames-per-chunk $frames_per_chunk \
+    --nj $nj --cmd "$cmd" \
+    $dir/graph data/test $dir/decode_test || exit 1;
+
+  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/test $dir/decode_test{,_rescored} || exit 1
+fi
diff --git a/egs/iam/v2/run.sh b/egs/iam/v2/run.sh
index 0881df1ff05..209fcb5de2f 100755
--- a/egs/iam/v2/run.sh
+++ b/egs/iam/v2/run.sh
@@ -169,6 +169,6 @@ if [ $stage -le 14 ]; then
 fi
 
 if [ $stage -le 15 ]; then
-  local/chain/run_cnn_chainali_1c.sh --train_set train_aug \
+  local/chain/run_cnn_chainali_1d.sh --train_set train_aug \
     --chain-model-dir exp/chain/cnn_1a --stage 2
 fi

From b5329781b0e049c653d92eed06a911e450ef79a1 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Sat, 1 Sep 2018 04:01:54 -0400
Subject: [PATCH 06/37] cosmetic fixes and updating results

---
 egs/iam/v2/local/chain/run_cnn.sh             |  1 +
 egs/iam/v2/local/chain/run_cnn_chainali.sh    |  1 +
 .../v2/local/chain/{ => tuning}/run_cnn_1a.sh |  0
 .../run_cnn_chainali_1a.sh}                   | 24 +++++++-------
 .../run_cnn_chainali_1b.sh}                   | 32 +++++++++----------
 egs/iam/v2/run.sh                             |  6 ++--
 6 files changed, 32 insertions(+), 32 deletions(-)
 create mode 120000 egs/iam/v2/local/chain/run_cnn.sh
 create mode 120000 egs/iam/v2/local/chain/run_cnn_chainali.sh
 rename egs/iam/v2/local/chain/{ => tuning}/run_cnn_1a.sh (100%)
 rename egs/iam/v2/local/chain/{run_cnn_chainali_1c.sh => tuning/run_cnn_chainali_1a.sh} (91%)
 rename egs/iam/v2/local/chain/{run_cnn_chainali_1d.sh => tuning/run_cnn_chainali_1b.sh} (90%)

diff --git a/egs/iam/v2/local/chain/run_cnn.sh b/egs/iam/v2/local/chain/run_cnn.sh
new file mode 120000
index 00000000000..df6f0a468c1
--- /dev/null
+++ b/egs/iam/v2/local/chain/run_cnn.sh
@@ -0,0 +1 @@
+tuning/run_cnn_1a.sh
\ No newline at end of file
diff --git a/egs/iam/v2/local/chain/run_cnn_chainali.sh b/egs/iam/v2/local/chain/run_cnn_chainali.sh
new file mode 120000
index 00000000000..86568421fe1
--- /dev/null
+++ b/egs/iam/v2/local/chain/run_cnn_chainali.sh
@@ -0,0 +1 @@
+tuning/run_cnn_chainali_1b.sh
\ No newline at end of file
diff --git a/egs/iam/v2/local/chain/run_cnn_1a.sh b/egs/iam/v2/local/chain/tuning/run_cnn_1a.sh
similarity index 100%
rename from egs/iam/v2/local/chain/run_cnn_1a.sh
rename to egs/iam/v2/local/chain/tuning/run_cnn_1a.sh
diff --git a/egs/iam/v2/local/chain/run_cnn_chainali_1c.sh b/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1a.sh
similarity index 91%
rename from egs/iam/v2/local/chain/run_cnn_chainali_1c.sh
rename to egs/iam/v2/local/chain/tuning/run_cnn_chainali_1a.sh
index 53039377baf..aa4d65c0fde 100755
--- a/egs/iam/v2/local/chain/run_cnn_chainali_1c.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1a.sh
@@ -1,18 +1,16 @@
 #!/bin/bash
 
-# chainali_1c is as chainali_1b except it uses l2-regularize
-# local/chain/compare_wer.sh exp/chain/cnn_chainali_1b exp/chain/cnn_chainali_1c
-# System                      cnn_chainali_1b cnn_chainali_1c
-# WER                             14.38     12.72
-# CER                              7.14      5.99
-# Final train prob              -0.0113   -0.0291
-# Final valid prob              -0.0400   -0.0359
-# Final train prob (xent)       -0.6043   -0.9781
-# Final valid prob (xent)       -0.9030   -1.1544
-# Parameters                      3.96M     3.96M
-
-# steps/info/chain_dir_info.pl exp/chain/cnn_chainali_1c
-# exp/chain/cnn_chainali_1c: num-iters=21 nj=2..4 num-params=4.0M dim=40->369 combine=-0.007->-0.007 (over 1) xent:train/valid[13,20,final]=(-1.44,-1.05,-0.997/-1.53,-1.19,-1.15) logprob:train/valid[13,20,final]=(-0.056,-0.020,-0.012/-0.056,-0.025,-0.020)
+# ./local/chain/compare_wer.sh exp/chain/cnn_chainali_1a/
+# System                      cnn_chainali_1a
+# WER                             10.48
+# WER (rescored)                  10.23
+# CER                              4.82
+# CER (rescored)                   4.69
+# Final train prob              -0.0444
+# Final valid prob              -0.0645
+# Final train prob (xent)       -0.4523
+# Final valid prob (xent)       -0.5350
+# Parameters                      5.65M
 
 set -e -o pipefail
 
diff --git a/egs/iam/v2/local/chain/run_cnn_chainali_1d.sh b/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh
similarity index 90%
rename from egs/iam/v2/local/chain/run_cnn_chainali_1d.sh
rename to egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh
index 3123ee897d5..c648f189dca 100755
--- a/egs/iam/v2/local/chain/run_cnn_chainali_1d.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh
@@ -1,19 +1,19 @@
 #!/bin/bash
 
-# chainali_1c is as chainali_1b except it uses l2-regularize
-# local/chain/compare_wer.sh exp/chain/cnn_chainali_1b exp/chain/cnn_chainali_1c
-# System                      cnn_chainali_1b cnn_chainali_1c
-# WER                             14.38     12.72
-# CER                              7.14      5.99
-# Final train prob              -0.0113   -0.0291
-# Final valid prob              -0.0400   -0.0359
-# Final train prob (xent)       -0.6043   -0.9781
-# Final valid prob (xent)       -0.9030   -1.1544
-# Parameters                      3.96M     3.96M
-
-# steps/info/chain_dir_info.pl exp/chain/cnn_chainali_1c
-# exp/chain/cnn_chainali_1c: num-iters=21 nj=2..4 num-params=4.0M dim=40->369 combine=-0.007->-0.007 (over 1) xent:train/valid[13,20,final]=(-1.44,-1.05,-0.997/-1.53,-1.19,-1.15) logprob:train/valid[13,20,final]=(-0.056,-0.020,-0.012/-0.056,-0.025,-0.020)
-
+# local/chain/compare_wer.sh exp/chain/cnn_chainali_1b
+# System                      cnn_chainali_1b
+# WER                              9.49
+# WER (rescored)                   9.27
+# CER                              4.39
+# CER (rescored)                   4.32
+# Final train prob              -0.0466
+# Final valid prob              -0.0692
+# Final train prob (xent)       -0.4811
+# Final valid prob (xent)       -0.5538
+# Parameters                      5.65M
+
+# steps/info/chain_dir_info.pl exp/chain/cnn_chainali_1b
+# exp/chain/cnn_chainali_1d: num-iters=40 nj=2..4 num-params=5.7M dim=40->400 combine=-0.051->-0.051 (over 1) xent:train/valid[25,39,final]=(-0.818,-0.500,-0.481/-0.828,-0.570,-0.554) logprob:train/valid[25,39,final]=(-0.097,-0.050,-0.047/-0.114,-0.073,-0.069)
 set -e -o pipefail
 
 stage=0
@@ -23,7 +23,7 @@ train_set=train
 gmm=tri3        # this is the source gmm-dir that we'll use for alignments; it
                 # should have alignments for the specified training data.
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
-affix=_1c  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+affix=_1d  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
 ali=tri3_ali
 chain_model_dir=exp/chain${nnet3_affix}/cnn_1a
 common_egs_dir=
@@ -196,7 +196,7 @@ if [ $stage -le 5 ]; then
     --chain.right-tolerance 3 \
     --trainer.srand=$srand \
     --trainer.max-param-change=2.0 \
-    --trainer.num-epochs=6 \
+    --trainer.num-epochs=5 \
     --trainer.frames-per-iter=1500000 \
     --trainer.optimization.num-jobs-initial=2 \
     --trainer.optimization.num-jobs-final=4 \
diff --git a/egs/iam/v2/run.sh b/egs/iam/v2/run.sh
index 209fcb5de2f..319741d814d 100755
--- a/egs/iam/v2/run.sh
+++ b/egs/iam/v2/run.sh
@@ -165,10 +165,10 @@ if [ $stage -le 13 ]; then
 fi
 
 if [ $stage -le 14 ]; then
-  local/chain/run_cnn_1a.sh --train_set train_aug
+  local/chain/run_cnn.sh --train_set train_aug
 fi
 
 if [ $stage -le 15 ]; then
-  local/chain/run_cnn_chainali_1d.sh --train_set train_aug \
-    --chain-model-dir exp/chain/cnn_1a --stage 2
+  local/chain/run_cnn_chainali.sh --train_set train_aug \
+    --chain-model-dir exp/chain/cnn_1a --stage 4
 fi

From f383334f221f69c03a607579c6fb0564de116032 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Sat, 1 Sep 2018 04:27:30 -0400
Subject: [PATCH 07/37] cosmetic fixes

---
 egs/iam/v2/local/augment_data.sh              | 17 ++++++-----
 egs/iam/v2/local/chain/run_e2e_cnn.sh         | 28 ++++++++---------
 egs/iam/v2/local/chain/tuning/run_cnn_1a.sh   | 20 +++++++------
 .../local/chain/tuning/run_cnn_chainali_1a.sh |  2 +-
 .../local/chain/tuning/run_cnn_chainali_1b.sh |  2 +-
 egs/iam/v2/local/extract_features.sh          |  2 +-
 egs/iam/v2/local/process_corpus.py            | 30 -------------------
 egs/wsj/s5/utils/copy_data_dir.sh             | 10 ++-----
 8 files changed, 39 insertions(+), 72 deletions(-)
 delete mode 100755 egs/iam/v2/local/process_corpus.py

diff --git a/egs/iam/v2/local/augment_data.sh b/egs/iam/v2/local/augment_data.sh
index 443a16874f2..82fa5230a43 100755
--- a/egs/iam/v2/local/augment_data.sh
+++ b/egs/iam/v2/local/augment_data.sh
@@ -19,14 +19,15 @@ outdir=$2
 datadir=$3
 
 mkdir -p $datadir/augmentations
-echo "copying $srcdir to $datadir/augmentations/aug1"
-utils/copy_data_dir.sh --spk-prefix aug1- --utt-prefix aug1- $srcdir $datadir/augmentations/aug1
-
-echo " copying allowed length for training with augmented data..."
-cat $srcdir/allowed_lengths.txt > $datadir/augmentations/aug1/allowed_lengths.txt
-
-echo " Extracting features, creating feats.scp file for augmentated data"
-local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim $feat_dim --fliplr false --augment true $datadir/augmentations/aug1
+echo "copying $srcdir to $datadir/augmentations/aug1, allowed length, creating feats.scp"
+
+for set in aug1; do
+  image/copy_data_dir.sh --spk-prefix $set- --utt-prefix $set- \
+    $srcdir $datadir/augmentations/$set
+  cat $srcdir/allowed_lengths.txt > $datadir/augmentations/$set/allowed_lengths.txt
+  local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim $feat_dim \
+    --fliplr false --augment true $datadir/augmentations/$set
+done
 
 echo " combine original data and data from different augmentations"
 utils/combine_data.sh --extra-files images.scp $outdir $srcdir $datadir/augmentations/aug1
diff --git a/egs/iam/v2/local/chain/run_e2e_cnn.sh b/egs/iam/v2/local/chain/run_e2e_cnn.sh
index 15bdf610cd3..bd9f788d702 100755
--- a/egs/iam/v2/local/chain/run_e2e_cnn.sh
+++ b/egs/iam/v2/local/chain/run_e2e_cnn.sh
@@ -2,20 +2,20 @@
 # Copyright    2017  Hossein Hadian
 
 # This script does end2end chain training (i.e. from scratch)
-
-# local/chain/compare_wer.sh exp/chain/cnn_1a exp/chain/cnn_chainali_1c exp/chain/e2e_cnn_1a
-# System                         cnn_1a cnn_chainali_1c e2e_cnn_1a
-# WER                             18.52     12.72     12.15
-# CER                             10.07      5.99      6.03
-# Final train prob              -0.0077   -0.0291   -0.0371
-# Final valid prob              -0.0970   -0.0359   -0.0636
-# Final train prob (xent)       -0.5484   -0.9781
-# Final valid prob (xent)       -0.9643   -1.1544
-# Parameters                      4.36M     3.96M     9.13M
-
-# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a
-# exp/chain/e2e_cnn_1a: num-iters=21 nj=2..4 num-params=9.1M dim=40->12640 combine=-0.033->-0.033 (over 1) logprob:train/valid[13,20,final]=(-0.058,-0.042,-0.035/-0.070,-0.064,-0.059)
-
+# ./local/chain/compare_wer.sh exp/chain/cnn_e2eali_1d/
+# System                      cnn_e2eali_1d
+# WER                              9.92
+# WER (rescored)                   9.50
+# CER                              4.53
+# CER (rescored)                   4.46
+# Final train prob              -0.0472
+# Final valid prob              -0.0713
+# Final train prob (xent)       -0.4751
+# Final valid prob (xent)       -0.5506
+# Parameters                      5.64M
+
+# steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1d
+# exp/chain/cnn_e2eali_1d/: num-iters=40 nj=2..4 num-params=5.6M dim=40->392 combine=-0.051->-0.051 (over 1) xent:train/valid[25,39,final]=(-0.764,-0.493,-0.475/-0.770,-0.566,-0.551) logprob:train/valid[25,39,final]=(-0.094,-0.051,-0.047/-0.111,-0.075,-0.071)
 set -e
 
 # configs for 'chain'
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_1a.sh b/egs/iam/v2/local/chain/tuning/run_cnn_1a.sh
index 5e7d5bac77b..e39b14ac8dc 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_1a.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_1a.sh
@@ -5,17 +5,19 @@
 #              2017 Ashish Arora
 
 # steps/info/chain_dir_info.pl exp/chain/cnn_1a/
-# exp/chain/cnn_1a/: num-iters=21 nj=2..4 num-params=4.4M dim=40->364 combine=-0.021->-0.015 xent:train/valid[13,20,final]=(-1.05,-0.701,-0.591/-1.30,-1.08,-1.00) logprob:train/valid[13,20,final]=(-0.061,-0.034,-0.030/-0.107,-0.101,-0.098)
+# exp/chain/cnn_1a/: num-iters=42 nj=2..4 num-params=4.4M dim=40->400 combine=-0.039->-0.039 (over 2) xent:train/valid[27,41,final]=(-0.502,-0.380,-0.376/-0.679,-0.626,-0.625) logprob:train/valid[27,41,final]=(-0.038,-0.032,-0.032/-0.063,-0.064,-0.064)
 
-# local/chain/compare_wer.sh exp/chain/cnn_1a/
+# ./local/chain/compare_wer.sh exp/chain/cnn_1a/
 # System                         cnn_1a
-# WER                             18.52
-# CER                             10.07
-# Final train prob              -0.0077
-# Final valid prob              -0.0970
-# Final train prob (xent)       -0.5484
-# Final valid prob (xent)       -0.9643
-# Parameters                      4.36M
+# WER                             14.91
+# WER (rescored)                     --
+# CER                              7.92
+# CER (rescored)                     --
+# Final train prob              -0.0320
+# Final valid prob              -0.0643
+# Final train prob (xent)       -0.3762
+# Final valid prob (xent)       -0.6247
+# Parameters                      4.39M
 
 set -e -o pipefail
 
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1a.sh b/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1a.sh
index aa4d65c0fde..07bdac88468 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1a.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1a.sh
@@ -21,7 +21,7 @@ train_set=train
 gmm=tri3        # this is the source gmm-dir that we'll use for alignments; it
                 # should have alignments for the specified training data.
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
-affix=_1c  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+affix=_1a  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
 ali=tri3_ali
 chain_model_dir=exp/chain${nnet3_affix}/cnn_1a
 common_egs_dir=
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh b/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh
index c648f189dca..36a30b2df29 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh
@@ -23,7 +23,7 @@ train_set=train
 gmm=tri3        # this is the source gmm-dir that we'll use for alignments; it
                 # should have alignments for the specified training data.
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
-affix=_1d  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+affix=_1b  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
 ali=tri3_ali
 chain_model_dir=exp/chain${nnet3_affix}/cnn_1a
 common_egs_dir=
diff --git a/egs/iam/v2/local/extract_features.sh b/egs/iam/v2/local/extract_features.sh
index 1741ad3f9b2..4ed6ba04348 100755
--- a/egs/iam/v2/local/extract_features.sh
+++ b/egs/iam/v2/local/extract_features.sh
@@ -36,7 +36,7 @@ done
 utils/split_scp.pl $scp $split_scps || exit 1;
 
 $cmd JOB=1:$nj $logdir/extract_features.JOB.log \
-  local/make_features.py $logdir/images.JOB.scp \
+  image/ocr/make_features.py $logdir/images.JOB.scp \
     --allowed_len_file_path $data/allowed_lengths.txt \
     --feat-dim $feat_dim --fliplr $fliplr --augment $augment \| \
     copy-feats --compress=true --compression-method=7 \
diff --git a/egs/iam/v2/local/process_corpus.py b/egs/iam/v2/local/process_corpus.py
deleted file mode 100755
index 9f8e1d275d3..00000000000
--- a/egs/iam/v2/local/process_corpus.py
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/usr/bin/env python3
-# Copyright      2018  Ashish Arora
-# Apache 2.0
-# This script reads valid phones and removes the lines in the corpus
-# which have any other phone.
-
-import os
-import sys, io
-
-phone_file = os.path.join('data/local/phones.txt')
-infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
-output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
-phone_dict = dict()
-with open(phone_file, 'r', encoding='utf-8') as phone_fh:
-    for line in phone_fh:
-        line = line.strip().split()[0]
-        phone_dict[line] = line
-
-phone_dict[' '] = ' '
-corpus_text = list()
-for line in infile:
-    text = line.strip()
-    skip_text = False
-    for phone in text:
-        if phone not in phone_dict.keys():
-            skip_text = True
-            break
-    if not skip_text:
-        output.write(text+ '\n')
-
diff --git a/egs/wsj/s5/utils/copy_data_dir.sh b/egs/wsj/s5/utils/copy_data_dir.sh
index 996cf76ec33..f3b885c5e79 100755
--- a/egs/wsj/s5/utils/copy_data_dir.sh
+++ b/egs/wsj/s5/utils/copy_data_dir.sh
@@ -86,16 +86,10 @@ fi
 
 if [ -f $srcdir/segments ]; then
   utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
-  if [ -f $srcdir/wav.scp ]; then
-    cp $srcdir/wav.scp $destdir
-  elif [ -f $srcdir/images.scp ]; then
-    cp $srcdir/images.scp $destdir
-  fi
+  cp $srcdir/wav.scp $destdir
 else # no segments->wav indexed by utt.
   if [ -f $srcdir/wav.scp ]; then
     utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
-  elif [ -f $srcdir/images.scp ]; then
-    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/images.scp >$destdir/images.scp
   fi
 fi
 
@@ -132,7 +126,7 @@ rm $destdir/spk_map $destdir/utt_map
 
 echo "$0: copied data from $srcdir to $destdir"
 
-for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp images.scp reco2file_and_channel stm glm ctm; do
+for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel stm glm ctm; do
   if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
     echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
     echo " ... $destdir/.backup/$f"

From 44c9e5866f9fb554fef31558a4eb1d28a1577be2 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Sat, 1 Sep 2018 04:36:27 -0400
Subject: [PATCH 08/37] adding results

---
 egs/iam/v2/local/chain/run_e2e_cnn.sh         | 28 +++++++--------
 .../local/chain/tuning/run_cnn_e2eali_1d.sh   | 34 ++++++++-----------
 egs/iam/v2/local/extract_features.sh          |  2 +-
 3 files changed, 30 insertions(+), 34 deletions(-)

diff --git a/egs/iam/v2/local/chain/run_e2e_cnn.sh b/egs/iam/v2/local/chain/run_e2e_cnn.sh
index bd9f788d702..c1e9780876c 100755
--- a/egs/iam/v2/local/chain/run_e2e_cnn.sh
+++ b/egs/iam/v2/local/chain/run_e2e_cnn.sh
@@ -2,20 +2,20 @@
 # Copyright    2017  Hossein Hadian
 
 # This script does end2end chain training (i.e. from scratch)
-# ./local/chain/compare_wer.sh exp/chain/cnn_e2eali_1d/
-# System                      cnn_e2eali_1d
-# WER                              9.92
-# WER (rescored)                   9.50
-# CER                              4.53
-# CER (rescored)                   4.46
-# Final train prob              -0.0472
-# Final valid prob              -0.0713
-# Final train prob (xent)       -0.4751
-# Final valid prob (xent)       -0.5506
-# Parameters                      5.64M
-
-# steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1d
-# exp/chain/cnn_e2eali_1d/: num-iters=40 nj=2..4 num-params=5.6M dim=40->392 combine=-0.051->-0.051 (over 1) xent:train/valid[25,39,final]=(-0.764,-0.493,-0.475/-0.770,-0.566,-0.551) logprob:train/valid[25,39,final]=(-0.094,-0.051,-0.047/-0.111,-0.075,-0.071)
+# ./local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/
+# System                      e2e_cnn_1a
+# WER                             11.24
+# WER (rescored)                  10.80
+# CER                              5.32
+# CER (rescored)                   5.24
+# Final train prob               0.0568
+# Final valid prob               0.0381
+# Final train prob (xent)
+# Final valid prob (xent)
+# Parameters                      9.13M
+
+# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a
+# exp/chain/e2e_cnn_1a: num-iters=42 nj=2..4 num-params=9.1M dim=40->12640 combine=0.049->0.049 (over 1) logprob:train/valid[27,41,final]=(0.035,0.055,0.057/0.016,0.037,0.038)
 set -e
 
 # configs for 'chain'
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
index 6ab74dc2f0d..9cf5fbadcc8 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
@@ -1,24 +1,20 @@
 #!/bin/bash
 
-# e2eali_1c is the same as e2eali_1b but has fewer CNN layers, smaller
-# l2-regularize, more epochs and uses dropout.
-
-
-# local/chain/compare_wer.sh exp/chain/cnn_e2eali_1b exp/chain/cnn_e2eali_1c
-# System                      cnn_e2eali_1b cnn_e2eali_1c
-# WER                             10.33     10.05
-# WER (rescored)                  10.10      9.75
-# CER                              5.00      4.76
-# CER (rescored)                   4.88      4.68
-# Final train prob              -0.0428   -0.0317
-# Final valid prob              -0.0666   -0.0630
-# Final train prob (xent)       -0.9210   -0.5413
-# Final valid prob (xent)       -1.0264   -0.7096
-# Parameters                      3.98M     5.12M
-
-# steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1c
-# exp/chain/cnn_e2eali_1c: num-iters=21 nj=2..4 num-params=5.1M dim=40->392 combine=-0.034->-0.034 (over 1) xent:train/valid[13,20,final]=(-0.953,-0.800,-0.541/-1.03,-0.933,-0.710) logprob:train/valid[13,20,final]=(-0.069,-0.048,-0.032/-0.091,-0.078,-0.063)
-
+# This script does end2end chain training (i.e. from scratch)
+# ./local/chain/compare_wer.sh exp/chain/cnn_e2eali_1d/
+# System                      cnn_e2eali_1d
+# WER                              9.92
+# WER (rescored)                   9.50
+# CER                              4.53
+# CER (rescored)                   4.46
+# Final train prob              -0.0472
+# Final valid prob              -0.0713
+# Final train prob (xent)       -0.4751
+# Final valid prob (xent)       -0.5506
+# Parameters                      5.64M
+
+# steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1d
+# exp/chain/cnn_e2eali_1d/: num-iters=40 nj=2..4 num-params=5.6M dim=40->392 combine=-0.051->-0.051 (over 1) xent:train/valid[25,39,final]=(-0.764,-0.493,-0.475/-0.770,-0.566,-0.551) logprob:train/valid[25,39,final]=(-0.094,-0.051,-0.047/-0.111,-0.075,-0.071)
 set -e -o pipefail
 
 stage=0
diff --git a/egs/iam/v2/local/extract_features.sh b/egs/iam/v2/local/extract_features.sh
index 4ed6ba04348..1741ad3f9b2 100755
--- a/egs/iam/v2/local/extract_features.sh
+++ b/egs/iam/v2/local/extract_features.sh
@@ -36,7 +36,7 @@ done
 utils/split_scp.pl $scp $split_scps || exit 1;
 
 $cmd JOB=1:$nj $logdir/extract_features.JOB.log \
-  image/ocr/make_features.py $logdir/images.JOB.scp \
+  local/make_features.py $logdir/images.JOB.scp \
     --allowed_len_file_path $data/allowed_lengths.txt \
     --feat-dim $feat_dim --fliplr $fliplr --augment $augment \| \
     copy-feats --compress=true --compression-method=7 \

From 2d11672f50aa24d772159712351a2f0248c27861 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Sat, 1 Sep 2018 07:21:54 -0400
Subject: [PATCH 09/37] removing local/prepare_lang and adding gen_topo in
 run.sh

---
 egs/iam/v2/local/prepare_lang.sh | 474 -------------------------------
 egs/iam/v2/run.sh                |   3 +
 2 files changed, 3 insertions(+), 474 deletions(-)
 delete mode 100755 egs/iam/v2/local/prepare_lang.sh

diff --git a/egs/iam/v2/local/prepare_lang.sh b/egs/iam/v2/local/prepare_lang.sh
deleted file mode 100755
index cc6bc03a432..00000000000
--- a/egs/iam/v2/local/prepare_lang.sh
+++ /dev/null
@@ -1,474 +0,0 @@
-#!/bin/bash
-# Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey);
-#                      Arnab Ghoshal
-#                2014  Guoguo Chen
-#                2015  Hainan Xu
-#                2016  FAU Erlangen (Author: Axel Horndasch)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script prepares a directory such as data/lang/, in the standard format,
-# given a source directory containing a dictionary lexicon.txt in a form like:
-# word phone1 phone2 ... phoneN
-# per line (alternate prons would be separate lines), or a dictionary with probabilities
-# called lexiconp.txt in a form:
-# word pron-prob phone1 phone2 ... phoneN
-# (with 0.0 < pron-prob <= 1.0); note: if lexiconp.txt exists, we use it even if
-# lexicon.txt exists.
-# and also files silence_phones.txt, nonsilence_phones.txt, optional_silence.txt
-# and extra_questions.txt
-# Here, silence_phones.txt and nonsilence_phones.txt are lists of silence and
-# non-silence phones respectively (where silence includes various kinds of
-# noise, laugh, cough, filled pauses etc., and nonsilence phones includes the
-# "real" phones.)
-# In each line of those files is a list of phones, and the phones on each line
-# are assumed to correspond to the same "base phone", i.e. they will be
-# different stress or tone variations of the same basic phone.
-# The file "optional_silence.txt" contains just a single phone (typically SIL)
-# which is used for optional silence in the lexicon.
-# extra_questions.txt might be empty; typically will consist of lists of phones,
-# all members of each list with the same stress or tone; and also possibly a
-# list for the silence phones.  This will augment the automatically generated
-# questions (note: the automatically generated ones will treat all the
-# stress/tone versions of a phone the same, so will not "get to ask" about
-# stress or tone).
-#
-
-# This script adds word-position-dependent phones and constructs a host of other
-# derived files, that go in data/lang/.
-
-# Begin configuration section.
-num_sil_states=5
-num_nonsil_states=3
-position_dependent_phones=true
-# position_dependent_phones is false also when position dependent phones and word_boundary.txt
-# have been generated by another source
-share_silence_phones=false  # if true, then share pdfs of different silence
-                            # phones together.
-sil_prob=0.5
-unk_fst=        # if you want to model the unknown-word (<oov-dict-entry>)
-                # with a phone-level LM as created by make_unk_lm.sh,
-                # provide the text-form FST via this flag, e.g. <work-dir>/unk_fst.txt
-                # where <work-dir> was the 2nd argument of make_unk_lm.sh.
-phone_symbol_table=              # if set, use a specified phones.txt file.
-extra_word_disambig_syms=        # if set, add disambiguation symbols from this file (one per line)
-                                 # to phones/disambig.txt, phones/wdisambig.txt and words.txt
-num_extra_phone_disambig_syms=1 # Standard one phone disambiguation symbol is used for optional silence.
-                                # Increasing this number does not harm, but is only useful if you later
-                                # want to introduce this labels to L_disambig.fst
-# end configuration sections
-
-echo "$0 $@"  # Print the command line for logging
-
-. utils/parse_options.sh
-
-if [ $# -ne 4 ]; then
-  echo "usage: utils/prepare_lang.sh <dict-src-dir> <oov-dict-entry> <tmp-dir> <lang-dir>"
-  echo "e.g.: utils/prepare_lang.sh data/local/dict <SPOKEN_NOISE> data/local/lang data/lang"
-  echo "<dict-src-dir> should contain the following files:"
-  echo " extra_questions.txt  lexicon.txt nonsilence_phones.txt  optional_silence.txt  silence_phones.txt"
-  echo "See http://kaldi-asr.org/doc/data_prep.html#data_prep_lang_creating for more info."
-  echo "options: "
-  echo "     --num-sil-states <number of states>             # default: 5, #states in silence models."
-  echo "     --num-nonsil-states <number of states>          # default: 3, #states in non-silence models."
-  echo "     --position-dependent-phones (true|false)        # default: true; if true, use _B, _E, _S & _I"
-  echo "                                                     # markers on phones to indicate word-internal positions. "
-  echo "     --share-silence-phones (true|false)             # default: false; if true, share pdfs of "
-  echo "                                                     # all non-silence phones. "
-  echo "     --sil-prob <probability of silence>             # default: 0.5 [must have 0 <= silprob < 1]"
-  echo "     --phone-symbol-table <filename>                 # default: \"\"; if not empty, use the provided "
-  echo "                                                     # phones.txt as phone symbol table. This is useful "
-  echo "                                                     # if you use a new dictionary for the existing setup."
-  echo "     --unk-fst <text-fst>                            # default: none.  e.g. exp/make_unk_lm/unk_fst.txt."
-  echo "                                                     # This is for if you want to model the unknown word"
-  echo "                                                     # via a phone-level LM rather than a special phone"
-  echo "                                                     # (this should be more useful for test-time than train-time)."
-  echo "     --extra-word-disambig-syms <filename>           # default: \"\"; if not empty, add disambiguation symbols"
-  echo "                                                     # from this file (one per line) to phones/disambig.txt,"
-  echo "                                                     # phones/wdisambig.txt and words.txt"
-  exit 1;
-fi
-
-srcdir=$1
-oov_word=$2
-tmpdir=$3
-dir=$4
-mkdir -p $dir $tmpdir $dir/phones
-
-silprob=false
-[ -f $srcdir/lexiconp_silprob.txt ] && silprob=true
-
-[ -f path.sh ] && . ./path.sh
-
-! utils/validate_dict_dir.pl $srcdir && \
-  echo "*Error validating directory $srcdir*" && exit 1;
-
-if [[ ! -f $srcdir/lexicon.txt ]]; then
-  echo "**Creating $dir/lexicon.txt from $dir/lexiconp.txt"
-  perl -ape 's/(\S+\s+)\S+\s+(.+)/$1$2/;' < $srcdir/lexiconp.txt > $srcdir/lexicon.txt || exit 1;
-fi
-if [[ ! -f $srcdir/lexiconp.txt ]]; then
-  echo "**Creating $srcdir/lexiconp.txt from $srcdir/lexicon.txt"
-  perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $srcdir/lexiconp.txt || exit 1;
-fi
-
-if [ ! -z "$unk_fst" ] && [ ! -f "$unk_fst" ]; then
-  echo "$0: expected --unk-fst $unk_fst to exist as a file"
-  exit 1
-fi
-
-if ! utils/validate_dict_dir.pl $srcdir >&/dev/null; then
-  utils/validate_dict_dir.pl $srcdir  # show the output.
-  echo "Validation failed (second time)"
-  exit 1;
-fi
-
-# phones.txt file provided, we will do some sanity check here.
-if [[ ! -z $phone_symbol_table ]]; then
-  # Checks if we have position dependent phones
-  n1=`cat $phone_symbol_table | grep -v -E "^#[0-9]+$" | cut -d' ' -f1 | sort -u | wc -l`
-  n2=`cat $phone_symbol_table | grep -v -E "^#[0-9]+$" | cut -d' ' -f1 | sed 's/_[BIES]$//g' | sort -u | wc -l`
-  $position_dependent_phones && [ $n1 -eq $n2 ] &&\
-    echo "$0: Position dependent phones requested, but not in provided phone symbols" && exit 1;
-  ! $position_dependent_phones && [ $n1 -ne $n2 ] &&\
-      echo "$0: Position dependent phones not requested, but appear in the provided phones.txt" && exit 1;
-
-  # Checks if the phone sets match.
-  cat $srcdir/{,non}silence_phones.txt | awk -v f=$phone_symbol_table '
-  BEGIN { while ((getline < f) > 0) { sub(/_[BEIS]$/, "", $1); phones[$1] = 1; }}
-  { for (x = 1; x <= NF; ++x) { if (!($x in phones)) {
-      print "Phone appears in the lexicon but not in the provided phones.txt: "$x; exit 1; }}}' || exit 1;
-fi
-
-# In case there are extra word-level disambiguation symbols we need
-# to make sure that all symbols in the provided file are valid.
-if [ ! -z "$extra_word_disambig_syms" ]; then
-  if ! utils/lang/validate_disambig_sym_file.pl --allow-numeric "false" $extra_word_disambig_syms; then
-    echo "$0: Validation of disambiguation file \"$extra_word_disambig_syms\" failed."
-    exit 1;
-  fi
-fi
-
-if $position_dependent_phones; then
-  # Create $tmpdir/lexiconp.txt from $srcdir/lexiconp.txt (or
-  # $tmpdir/lexiconp_silprob.txt from $srcdir/lexiconp_silprob.txt) by
-  # adding the markers _B, _E, _S, _I depending on word position.
-  # In this recipe, these markers apply to silence also.
-  # Do this starting from lexiconp.txt only.
-  if "$silprob"; then
-    perl -ane '@A=split(" ",$_); $w = shift @A; $p = shift @A; $silword_p = shift @A;
-              $wordsil_f = shift @A; $wordnonsil_f = shift @A; @A>0||die;
-         if(@A==1) { print "$w $p $silword_p $wordsil_f $wordnonsil_f $A[0]_S\n"; }
-         else { print "$w $p $silword_p $wordsil_f $wordnonsil_f $A[0]_B ";
-         for($n=1;$n<@A-1;$n++) { print "$A[$n]_I "; } print "$A[$n]_E\n"; } ' \
-                < $srcdir/lexiconp_silprob.txt > $tmpdir/lexiconp_silprob.txt
-  else
-    perl -ane '@A=split(" ",$_); $w = shift @A; $p = shift @A; @A>0||die;
-         if(@A==1) { print "$w $p $A[0]_S\n"; } else { print "$w $p $A[0]_B ";
-         for($n=1;$n<@A-1;$n++) { print "$A[$n]_I "; } print "$A[$n]_E\n"; } ' \
-         < $srcdir/lexiconp.txt > $tmpdir/lexiconp.txt || exit 1;
-  fi
-
-  # create $tmpdir/phone_map.txt
-  # this has the format (on each line)
-  # <original phone> <version 1 of original phone> <version 2> ...
-  # where the versions depend on the position of the phone within a word.
-  # For instance, we'd have:
-  # AA AA_B AA_E AA_I AA_S
-  # for (B)egin, (E)nd, (I)nternal and (S)ingleton
-  # and in the case of silence
-  # SIL SIL SIL_B SIL_E SIL_I SIL_S
-  # [because SIL on its own is one of the variants; this is for when it doesn't
-  #  occur inside a word but as an option in the lexicon.]
-
-  # This phone map expands the phone lists into all the word-position-dependent
-  # versions of the phone lists.
-  cat <(set -f; for x in `cat $srcdir/silence_phones.txt`; do for y in "" "" "_B" "_E" "_I" "_S"; do echo -n "$x$y "; done; echo; done) \
-    <(set -f; for x in `cat $srcdir/nonsilence_phones.txt`; do for y in "" "_B" "_E" "_I" "_S"; do echo -n "$x$y "; done; echo; done) \
-    > $tmpdir/phone_map.txt
-else
-  if "$silprob"; then
-    cp $srcdir/lexiconp_silprob.txt $tmpdir/lexiconp_silprob.txt
-  else
-    cp $srcdir/lexiconp.txt $tmpdir/lexiconp.txt
-  fi
-  cat $srcdir/silence_phones.txt $srcdir/nonsilence_phones.txt | \
-    awk '{for(n=1;n<=NF;n++) print $n; }' > $tmpdir/phones
-  paste -d' ' $tmpdir/phones $tmpdir/phones > $tmpdir/phone_map.txt
-fi
-mkdir -p $dir/phones  # various sets of phones...
-# Sets of phones for use in clustering, and making monophone systems.
-if $share_silence_phones; then
-  # build a roots file that will force all the silence phones to share the
-  # same pdf's. [three distinct states, only the transitions will differ.]
-  # 'shared'/'not-shared' means, do we share the 3 states of the HMM
-  # in the same tree-root?
-  # Sharing across models(phones) is achieved by writing several phones
-  # into one line of roots.txt (shared/not-shared doesn't affect this).
-  # 'not-shared not-split' means we have separate tree roots for the 3 states,
-  # but we never split the tree so they remain stumps,
-  # so all phones in the line correspond to the same model.
-  cat $srcdir/silence_phones.txt | awk '{printf("%s ", $0); } END{printf("\n");}' | cat - $srcdir/nonsilence_phones.txt | \
-    utils/apply_map.pl $tmpdir/phone_map.txt > $dir/phones/sets.txt
-  cat $dir/phones/sets.txt | \
-    awk '{if(NR==1) print "not-shared", "not-split", $0; else print "shared", "split", $0;}' > $dir/phones/roots.txt
-else
-  # different silence phones will have different GMMs.  [note: here, all "shared split" means
-  # is that we may have one GMM for all the states, or we can split on states.  because they're
-  # context-independent phones, they don't see the context.]
-  cat $srcdir/{,non}silence_phones.txt | utils/apply_map.pl $tmpdir/phone_map.txt > $dir/phones/sets.txt
-  cat $dir/phones/sets.txt | awk '{print "shared", "split", $0;}' > $dir/phones/roots.txt
-fi
-cat $srcdir/silence_phones.txt | utils/apply_map.pl $tmpdir/phone_map.txt | \
-  awk '{for(n=1;n<=NF;n++) print $n;}' > $dir/phones/silence.txt
-cat $srcdir/nonsilence_phones.txt | utils/apply_map.pl $tmpdir/phone_map.txt | \
-  awk '{for(n=1;n<=NF;n++) print $n;}' > $dir/phones/nonsilence.txt
-cp $srcdir/optional_silence.txt $dir/phones/optional_silence.txt
-cp $dir/phones/silence.txt $dir/phones/context_indep.txt
-# if extra_questions.txt is empty, it's OK.
-cat $srcdir/extra_questions.txt 2>/dev/null | utils/apply_map.pl $tmpdir/phone_map.txt \
-  >$dir/phones/extra_questions.txt
-# Want extra questions about the word-start/word-end stuff. Make it separate for
-# silence and non-silence. Probably doesn't matter, as silence will rarely
-# be inside a word.
-if $position_dependent_phones; then
-  for suffix in _B _E _I _S; do
-    (set -f; for x in `cat $srcdir/nonsilence_phones.txt`; do echo -n "$x$suffix "; done; echo) >>$dir/phones/extra_questions.txt
-  done
-  for suffix in "" _B _E _I _S; do
-    (set -f; for x in `cat $srcdir/silence_phones.txt`; do echo -n "$x$suffix "; done; echo) >>$dir/phones/extra_questions.txt
-  done
-fi
-# add_lex_disambig.pl is responsible for adding disambiguation symbols to
-# the lexicon, for telling us how many disambiguation symbols it used,
-# and and also for modifying the unknown-word's pronunciation (if the
-# --unk-fst was provided) to the sequence "#1 #2 #3", and reserving those
-# disambig symbols for that purpose.
-# The #2 will later be replaced with the actual unk model.  The reason
-# for the #1 and the #3 is for disambiguation and also to keep the
-# FST compact.  If we didn't have the #1, we might have a different copy of
-# the unk-model FST, or at least some of its arcs, for each start-state from
-# which an <unk> transition comes (instead of per end-state, which is more compact);
-# and adding the #3 prevents us from potentially having 2 copies of the unk-model
-# FST due to the optional-silence [the last phone of any word gets 2 arcs].
-if [ ! -z "$unk_fst" ]; then  # if the --unk-fst option was provided...
-  if "$silprob"; then
-    utils/lang/internal/modify_unk_pron.py $tmpdir/lexiconp_silprob.txt "$oov_word" || exit 1
-  else
-    utils/lang/internal/modify_unk_pron.py $tmpdir/lexiconp.txt "$oov_word" || exit 1
-  fi
-  unk_opt="--first-allowed-disambig 4"
-else
-  unk_opt=
-fi
-if "$silprob"; then
-  ndisambig=$(utils/add_lex_disambig.pl $unk_opt --pron-probs --sil-probs $tmpdir/lexiconp_silprob.txt $tmpdir/lexiconp_silprob_disambig.txt)
-else
-  ndisambig=$(utils/add_lex_disambig.pl $unk_opt --pron-probs $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt)
-fi
-ndisambig=$[$ndisambig+$num_extra_phone_disambig_syms]; # add (at least) one disambig symbol for silence in lexicon FST.
-echo $ndisambig > $tmpdir/lex_ndisambig
-# Format of lexiconp_disambig.txt:
-# !SIL  1.0   SIL_S
-# <SPOKEN_NOISE>  1.0   SPN_S #1
-# <UNK>  1.0  SPN_S #2
-# <NOISE>  1.0  NSN_S
-# !EXCLAMATION-POINT  1.0  EH2_B K_I S_I K_I L_I AH0_I M_I EY1_I SH_I AH0_I N_I P_I OY2_I N_I T_E
-( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) >$dir/phones/disambig.txt
-# In case there are extra word-level disambiguation symbols they also
-# need to be added to the list of phone-level disambiguation symbols.
-if [ ! -z "$extra_word_disambig_syms" ]; then
-  # We expect a file containing valid word-level disambiguation symbols.
-  cat $extra_word_disambig_syms | awk '{ print $1 }' >> $dir/phones/disambig.txt
-fi
-# Create phone symbol table.
-if [[ ! -z $phone_symbol_table ]]; then
-  start_symbol=`grep \#0 $phone_symbol_table | awk '{print $2}'`
-  echo "<eps>" | cat - $dir/phones/{silence,nonsilence}.txt | awk -v f=$phone_symbol_table '
-  BEGIN { while ((getline < f) > 0) { phones[$1] = $2; }} { print $1" "phones[$1]; }' | sort -k2 -g |\
-    cat - <(cat $dir/phones/disambig.txt | awk -v x=$start_symbol '{n=x+NR-1; print $1, n;}') > $dir/phones.txt
-else
-  echo "<eps>" | cat - $dir/phones/{silence,nonsilence,disambig}.txt | \
-    awk '{n=NR-1; print $1, n;}' > $dir/phones.txt
-fi
-# Create a file that describes the word-boundary information for
-# each phone.  5 categories.
-if $position_dependent_phones; then
-  cat $dir/phones/{silence,nonsilence}.txt | \
-    awk '/_I$/{print $1, "internal"; next;} /_B$/{print $1, "begin"; next; }
-         /_S$/{print $1, "singleton"; next;} /_E$/{print $1, "end"; next; }
-         {print $1, "nonword";} ' > $dir/phones/word_boundary.txt
-else
-  # word_boundary.txt might have been generated by another source
-  [ -f $srcdir/word_boundary.txt ] && cp $srcdir/word_boundary.txt $dir/phones/word_boundary.txt
-fi
-# Create word symbol table.
-# <s> and </s> are only needed due to the need to rescore lattices with
-# ConstArpaLm format language model. They do not normally appear in G.fst or
-# L.fst.
-if "$silprob"; then
-  # remove the silprob
-  cat $tmpdir/lexiconp_silprob.txt |\
-    awk '{
-      for(i=1; i<=NF; i++) {
-        if(i!=3 && i!=4 && i!=5) printf("%s\t", $i); if(i==NF) print "";
-      }
-    }' > $tmpdir/lexiconp.txt
-fi
-cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq  | awk '
-  BEGIN {
-    print "<eps> 0";
-  }
-  {
-    if ($1 == "<s>") {
-      print "<s> is in the vocabulary!" | "cat 1>&2"
-      exit 1;
-    }
-    if ($1 == "</s>") {
-      print "</s> is in the vocabulary!" | "cat 1>&2"
-      exit 1;
-    }
-    printf("%s %d\n", $1, NR);
-  }
-  END {
-    printf("#0 %d\n", NR+1);
-    printf("<s> %d\n", NR+2);
-    printf("</s> %d\n", NR+3);
-  }' > $dir/words.txt || exit 1;
-# In case there are extra word-level disambiguation symbols they also
-# need to be added to words.txt
-if [ ! -z "$extra_word_disambig_syms" ]; then
-  # Since words.txt already exists, we need to extract the current word count.
-  word_count=`tail -n 1 $dir/words.txt | awk '{ print $2 }'`
-  # We expect a file containing valid word-level disambiguation symbols.
-  # The list of symbols is attached to the current words.txt (including
-  # a numeric identifier for each symbol).
-  cat $extra_word_disambig_syms | \
-    awk -v WC=$word_count '{ printf("%s %d\n", $1, ++WC); }' >> $dir/words.txt || exit 1;
-fi
-# format of $dir/words.txt:
-#<eps> 0
-#!EXCLAMATION-POINT 1
-#!SIL 2
-#"CLOSE-QUOTE 3
-#...
-silphone=`cat $srcdir/optional_silence.txt` || exit 1;
-[ -z "$silphone" ] && \
-  ( echo "You have no optional-silence phone; it is required in the current scripts"
-    echo "but you may use the option --sil-prob 0.0 to stop it being used." ) && \
-   exit 1;
-# create $dir/phones/align_lexicon.{txt,int}.
-# This is the method we use for lattice word alignment if we are not
-# using word-position-dependent phones.
-# First remove pron-probs from the lexicon.
-perl -ape 's/(\S+\s+)\S+\s+(.+)/$1$2/;' <$tmpdir/lexiconp.txt >$tmpdir/align_lexicon.txt
-# Note: here, $silphone will have no suffix e.g. _S because it occurs as optional-silence,
-# and is not part of a word.
-[ ! -z "$silphone" ] && echo "<eps> $silphone" >> $tmpdir/align_lexicon.txt
-cat $tmpdir/align_lexicon.txt | \
- perl -ane '@A = split; print $A[0], " ", join(" ", @A), "\n";' | sort | uniq > $dir/phones/align_lexicon.txt
-# create phones/align_lexicon.int
-cat $dir/phones/align_lexicon.txt | utils/sym2int.pl -f 3- $dir/phones.txt | \
-  utils/sym2int.pl -f 1-2 $dir/words.txt > $dir/phones/align_lexicon.int
-# Create the basic L.fst without disambiguation symbols, for use
-# in training.
-if $silprob; then
-  # Add silence probabilities (modlels the prob. of silence before and after each
-  # word).  On some setups this helps a bit.  See utils/dict_dir_add_pronprobs.sh
-  # and where it's called in the example scripts (run.sh).
-  utils/make_lexicon_fst_silprob.pl $tmpdir/lexiconp_silprob.txt $srcdir/silprob.txt $silphone "<eps>" | \
-     fstcompile --isymbols=$dir/phones.txt --osymbols=$dir/words.txt \
-     --keep_isymbols=false --keep_osymbols=false |   \
-     fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
-else
-  utils/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp.txt $sil_prob $silphone | \
-    fstcompile --isymbols=$dir/phones.txt --osymbols=$dir/words.txt \
-    --keep_isymbols=false --keep_osymbols=false | \
-     fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
-fi
-# The file oov.txt contains a word that we will map any OOVs to during
-# training.
-echo "$oov_word" > $dir/oov.txt || exit 1;
-cat $dir/oov.txt | utils/sym2int.pl $dir/words.txt >$dir/oov.int || exit 1;
-# integer version of oov symbol, used in some scripts.
-# the file wdisambig.txt contains a (line-by-line) list of the text-form of the
-# disambiguation symbols that are used in the grammar and passed through by the
-# lexicon.  At this stage it's hardcoded as '#0', but we're laying the groundwork
-# for more generality (which probably would be added by another script).
-# wdisambig_words.int contains the corresponding list interpreted by the
-# symbol table words.txt, and wdisambig_phones.int contains the corresponding
-# list interpreted by the symbol table phones.txt.
-echo '#0' >$dir/phones/wdisambig.txt
-# In case there are extra word-level disambiguation symbols they need
-# to be added to the existing word-level disambiguation symbols file.
-if [ ! -z "$extra_word_disambig_syms" ]; then
-  # We expect a file containing valid word-level disambiguation symbols.
-  # The regular expression for awk is just a paranoia filter (e.g. for empty lines).
-  cat $extra_word_disambig_syms | awk '{ print $1 }' >> $dir/phones/wdisambig.txt
-fi
-utils/sym2int.pl $dir/phones.txt <$dir/phones/wdisambig.txt >$dir/phones/wdisambig_phones.int
-utils/sym2int.pl $dir/words.txt <$dir/phones/wdisambig.txt >$dir/phones/wdisambig_words.int
-# Create these lists of phones in colon-separated integer list form too,
-# for purposes of being given to programs as command-line options.
-for f in silence nonsilence optional_silence disambig context_indep; do
-  utils/sym2int.pl $dir/phones.txt <$dir/phones/$f.txt >$dir/phones/$f.int
-  utils/sym2int.pl $dir/phones.txt <$dir/phones/$f.txt | \
-   awk '{printf(":%d", $1);} END{printf "\n"}' | sed s/:// > $dir/phones/$f.csl || exit 1;
-done
-for x in sets extra_questions; do
-  utils/sym2int.pl $dir/phones.txt <$dir/phones/$x.txt > $dir/phones/$x.int || exit 1;
-done
-utils/sym2int.pl -f 3- $dir/phones.txt <$dir/phones/roots.txt \
-   > $dir/phones/roots.int || exit 1;
-if [ -f $dir/phones/word_boundary.txt ]; then
-  utils/sym2int.pl -f 1 $dir/phones.txt <$dir/phones/word_boundary.txt \
-    > $dir/phones/word_boundary.int || exit 1;
-fi
-silphonelist=`cat $dir/phones/silence.csl`
-nonsilphonelist=`cat $dir/phones/nonsilence.csl`
-# Note: it's OK, after generating the 'lang' directory, to overwrite the topo file
-# with another one of your choice if the 'topo' file you want can't be generated by
-# utils/gen_topo.pl.  We do this in the 'chain' recipes.  Of course, the 'topo' file
-# should cover all the phones.  Try running utils/validate_lang.pl to check that
-# everything is OK after modifying the topo file.
-local/gen_topo.py $num_nonsil_states $num_sil_states 4 $nonsilphonelist $silphonelist $dir/phones.txt >$dir/topo
-# Create the lexicon FST with disambiguation symbols, and put it in lang_test.
-# There is an extra step where we create a loop to "pass through" the
-# disambiguation symbols from G.fst.
-if $silprob; then
-  utils/make_lexicon_fst_silprob.pl $tmpdir/lexiconp_silprob_disambig.txt $srcdir/silprob.txt $silphone '#'$ndisambig | \
-     fstcompile --isymbols=$dir/phones.txt --osymbols=$dir/words.txt \
-     --keep_isymbols=false --keep_osymbols=false |   \
-     fstaddselfloops  $dir/phones/wdisambig_phones.int $dir/phones/wdisambig_words.int | \
-     fstarcsort --sort_type=olabel > $dir/L_disambig.fst || exit 1;
-else
-  utils/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt $sil_prob $silphone '#'$ndisambig | \
-     fstcompile --isymbols=$dir/phones.txt --osymbols=$dir/words.txt \
-     --keep_isymbols=false --keep_osymbols=false |   \
-     fstaddselfloops  $dir/phones/wdisambig_phones.int $dir/phones/wdisambig_words.int | \
-     fstarcsort --sort_type=olabel > $dir/L_disambig.fst || exit 1;
-fi
-if [ ! -z "$unk_fst" ]; then
-  utils/lang/internal/apply_unk_lm.sh $unk_fst $dir || exit 1
-  if ! $position_dependent_phones; then
-    echo "$0: warning: you are using the --unk-lm option and setting --position-dependent-phones false."
-    echo " ... this will make it impossible to properly work out the word boundaries after"
-    echo " ... decoding; quite a few scripts will not work as a result, and many scoring scripts"
-    echo " ... will die."
-    sleep 4
-  fi
-fi
-echo "$(basename $0): validating output directory"
-! utils/validate_lang.pl $dir && echo "$(basename $0): error validating output" &&  exit 1;
-exit 0;
diff --git a/egs/iam/v2/run.sh b/egs/iam/v2/run.sh
index 319741d814d..92061121f6c 100755
--- a/egs/iam/v2/run.sh
+++ b/egs/iam/v2/run.sh
@@ -93,6 +93,9 @@ if [ $stage -le 4 ]; then
   local/prepare_dict.sh
   utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \
                         data/local/dict "<sil>" data/lang/temp data/lang
+  silphonelist=`cat data/lang/phones/silence.csl`
+  nonsilphonelist=`cat data/lang/phones/nonsilence.csl`
+  local/gen_topo.py 8 4 4 $nonsilphonelist $silphonelist data/lang/phones.txt >data/lang/topo
   utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang
 
   utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_big.arpa.gz \

From 4fc67057878a235d24c51a1939b979fa76595896 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Sat, 1 Sep 2018 10:16:21 -0400
Subject: [PATCH 10/37] fixing bugs

---
 egs/cifar/v1/image/copy_data_dir.sh | 118 ++++++++++++++++++++++++++++
 egs/iam/v2/local/gen_topo.py        |   5 +-
 2 files changed, 120 insertions(+), 3 deletions(-)
 create mode 100755 egs/cifar/v1/image/copy_data_dir.sh

diff --git a/egs/cifar/v1/image/copy_data_dir.sh b/egs/cifar/v1/image/copy_data_dir.sh
new file mode 100755
index 00000000000..c923f5cc07a
--- /dev/null
+++ b/egs/cifar/v1/image/copy_data_dir.sh
@@ -0,0 +1,118 @@
+#!/bin/bash
+
+# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
+# Apache 2.0
+
+# This script operates on a directory, such as in data/train/,
+# that contains some subset of the following files:
+#  feats.scp
+#  images.scp
+#  vad.scp
+#  spk2utt
+#  utt2spk
+#  text
+#
+# It copies to another directory, possibly adding a specified prefix or a suffix
+# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
+#
+
+
+# begin configuration section
+spk_prefix=
+utt_prefix=
+spk_suffix=
+utt_suffix=
+validate_opts=   # should rarely be needed.
+# end configuration section
+
+. utils/parse_options.sh
+
+if [ $# != 2 ]; then
+  echo "Usage: "
+  echo "  $0 [options] <srcdir> <destdir>"
+  echo "e.g.:"
+  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
+  echo "Options"
+  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
+  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
+  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
+  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
+  exit 1;
+fi
+
+
+export LC_ALL=C
+
+srcdir=$1
+destdir=$2
+
+if [ ! -f $srcdir/utt2spk ]; then
+  echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
+  exit 1;
+fi
+
+if [ "$destdir" == "$srcdir" ]; then
+  echo "$0: this script requires <srcdir> and <destdir> to be different."
+  exit 1
+fi
+
+set -e;
+
+mkdir -p $destdir
+
+cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
+cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
+
+if [ ! -f $srcdir/utt2uniq ]; then
+  if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
+    cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
+  fi
+else
+  cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
+fi
+
+cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
+  utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
+
+utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
+
+if [ -f $srcdir/feats.scp ]; then
+  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
+fi
+
+if [ -f $srcdir/vad.scp ]; then
+  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
+fi
+
+if [ -f $srcdir/images.scp ]; then
+  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/images.scp >$destdir/images.scp
+fi
+
+if [ -f $srcdir/text ]; then
+  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
+fi
+if [ -f $srcdir/utt2dur ]; then
+  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
+fi
+if [ -f $srcdir/cmvn.scp ]; then
+  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
+fi
+
+rm $destdir/spk_map $destdir/utt_map
+
+echo "$0: copied data from $srcdir to $destdir"
+
+for f in feats.scp cmvn.scp vad.scp utt2uniq utt2dur utt2num_frames text images.scp; do
+  if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
+    echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
+    echo " ... $destdir/.backup/$f"
+    mkdir -p $destdir/.backup
+    mv $destdir/$f $destdir/.backup/
+  fi
+done
+
+
+[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
+[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
+
+utils/validate_data_dir.sh $validate_opts $destdir
diff --git a/egs/iam/v2/local/gen_topo.py b/egs/iam/v2/local/gen_topo.py
index a74c6d4bbae..540bfbcf270 100755
--- a/egs/iam/v2/local/gen_topo.py
+++ b/egs/iam/v2/local/gen_topo.py
@@ -32,14 +32,13 @@
 all_phones = silence_phones +  nonsilence_phones
 
 punctuation_phones = []
-exclude = set(string.punctuation)
+exclude = set("!(),.?;:'-\"")
 with open(args.phone_list) as f:
     for line in f:
         line = line.strip()
-        phone = line.split('_')[0]
+        phone = line.split(' ')[0]
         if len(phone) == 1 and phone in exclude:
             punctuation_phones.append(int(line.split(' ')[1]))
-
 # For nonsilence phones that are not punctuations
 print("<Topology>")
 print("<TopologyEntry>")

From 8877530fa3e54e100427e2d411fda5fed9e75ac7 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Sun, 2 Sep 2018 04:29:26 -0400
Subject: [PATCH 11/37] updating result

---
 .../local/chain/tuning/run_cnn_e2eali_1d.sh   | 24 +++++++++----------
 egs/iam/v2/run_end2end.sh                     | 11 +--------
 2 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
index 9cf5fbadcc8..27988beafdd 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
@@ -3,18 +3,18 @@
 # This script does end2end chain training (i.e. from scratch)
 # ./local/chain/compare_wer.sh exp/chain/cnn_e2eali_1d/
 # System                      cnn_e2eali_1d
-# WER                              9.92
-# WER (rescored)                   9.50
-# CER                              4.53
-# CER (rescored)                   4.46
-# Final train prob              -0.0472
-# Final valid prob              -0.0713
-# Final train prob (xent)       -0.4751
-# Final valid prob (xent)       -0.5506
-# Parameters                      5.64M
+# WER                              9.52
+# WER (rescored)                   9.29
+# CER                              4.45
+# CER (rescored)                   4.43
+# Final train prob              -0.0473
+# Final valid prob              -0.0706
+# Final train prob (xent)       -0.4623
+# Final valid prob (xent)       -0.5371
+# Parameters                      5.08M
 
 # steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1d
-# exp/chain/cnn_e2eali_1d/: num-iters=40 nj=2..4 num-params=5.6M dim=40->392 combine=-0.051->-0.051 (over 1) xent:train/valid[25,39,final]=(-0.764,-0.493,-0.475/-0.770,-0.566,-0.551) logprob:train/valid[25,39,final]=(-0.094,-0.051,-0.047/-0.111,-0.075,-0.071)
+# exp/chain/cnn_e2eali_1d/: num-iters=40 nj=2..4 num-params=5.1M dim=40->400 combine=-0.052->-0.052 (over 1) xent:train/valid[25,39,final]=(-0.739,-0.483,-0.462/-0.763,-0.551,-0.537) logprob:train/valid[25,39,final]=(-0.092,-0.052,-0.047/-0.112,-0.076,-0.071)
 set -e -o pipefail
 
 stage=0
@@ -140,7 +140,7 @@ if [ $stage -le 4 ]; then
   output_opts="l2-regularize=0.04"
   common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
   common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
-  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=90"
+  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
   mkdir -p $dir/configs
   cat <<EOF > $dir/configs/network.xconfig
   input dim=40 name=input
@@ -194,7 +194,7 @@ if [ $stage -le 5 ]; then
     --chain.right-tolerance 3 \
     --trainer.srand=$srand \
     --trainer.max-param-change=2.0 \
-    --trainer.num-epochs=8 \
+    --trainer.num-epochs=5 \
     --trainer.frames-per-iter=1500000 \
     --trainer.optimization.num-jobs-initial=2 \
     --trainer.optimization.num-jobs-final=4 \
diff --git a/egs/iam/v2/run_end2end.sh b/egs/iam/v2/run_end2end.sh
index bd78c011b75..cf0d8476e55 100755
--- a/egs/iam/v2/run_end2end.sh
+++ b/egs/iam/v2/run_end2end.sh
@@ -6,15 +6,7 @@ stage=0
 nj=20
 username=
 password=
-# iam_database points to the database path on the JHU grid. If you have not
-# already downloaded the database you can set it to a local directory
-# like "data/download" and follow the instructions
-# in "local/prepare_data.sh" to download the database:
 iam_database=/export/corpora5/handwriting_ocr/IAM
-# wellington_database points to the database path on the JHU grid. The Wellington
-# corpus contains two directories WWC and WSC (Wellington Written and Spoken Corpus).
-# This corpus is of written NZ English that can be purchased here:
-# "https://www.victoria.ac.nz/lals/resources/corpora-default"
 wellington_database=/export/corpora5/Wellington/WWC/
 
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
@@ -114,9 +106,8 @@ fi
 if [ $stage -le 6 ]; then
   echo "$0: Aligning the training data using the e2e chain model..."
   steps/nnet3/align.sh --nj 50 --cmd "$cmd" \
-                       --use-gpu false \
                        --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
-                       data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
+                       data/train_aug data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
 fi
 
 if [ $stage -le 7 ]; then

From 59e2c8b19be4013e8d79680748328f36e8ef13a8 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Sun, 2 Sep 2018 04:44:18 -0400
Subject: [PATCH 12/37] updating documentation, results and parameter tuning

---
 egs/iam/v2/local/chain/run_e2e_cnn.sh         | 175 +-----------------
 .../v2/local/chain/tuning/run_e2e_cnn_1a.sh   | 174 +++++++++++++++++
 .../v2/local/chain/tuning/run_e2e_cnn_1b.sh   | 160 ++++++++++++++++
 egs/iam/v2/run.sh                             |  17 +-
 egs/iam/v2/run_end2end.sh                     |  16 +-
 5 files changed, 365 insertions(+), 177 deletions(-)
 mode change 100755 => 120000 egs/iam/v2/local/chain/run_e2e_cnn.sh
 create mode 100755 egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh
 create mode 100755 egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh

diff --git a/egs/iam/v2/local/chain/run_e2e_cnn.sh b/egs/iam/v2/local/chain/run_e2e_cnn.sh
deleted file mode 100755
index c1e9780876c..00000000000
--- a/egs/iam/v2/local/chain/run_e2e_cnn.sh
+++ /dev/null
@@ -1,174 +0,0 @@
-#!/bin/bash
-# Copyright    2017  Hossein Hadian
-
-# This script does end2end chain training (i.e. from scratch)
-# ./local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/
-# System                      e2e_cnn_1a
-# WER                             11.24
-# WER (rescored)                  10.80
-# CER                              5.32
-# CER (rescored)                   5.24
-# Final train prob               0.0568
-# Final valid prob               0.0381
-# Final train prob (xent)
-# Final valid prob (xent)
-# Parameters                      9.13M
-
-# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a
-# exp/chain/e2e_cnn_1a: num-iters=42 nj=2..4 num-params=9.1M dim=40->12640 combine=0.049->0.049 (over 1) logprob:train/valid[27,41,final]=(0.035,0.055,0.057/0.016,0.037,0.038)
-set -e
-
-# configs for 'chain'
-stage=0
-train_stage=-10
-get_egs_stage=-10
-affix=1a
-
-# training options
-tdnn_dim=450
-num_epochs=4
-num_jobs_initial=2
-num_jobs_final=4
-minibatch_size=150=100,64/300=50,32/600=25,16/1200=16,8
-common_egs_dir=
-l2_regularize=0.00005
-frames_per_iter=1000000
-cmvn_opts="--norm-means=true --norm-vars=true"
-train_set=train
-lang_decode=data/lang
-lang_rescore=data/lang_rescore_6g
-
-# End configuration section.
-echo "$0 $@"  # Print the command line for logging
-
-. ./cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh
-
-if ! cuda-compiled; then
-  cat <<EOF && exit 1
-This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
-If you want to use GPUs (and have them), go to src/, and configure and make on a machine
-where "nvcc" is installed.
-EOF
-fi
-
-lang=data/lang_e2e
-treedir=exp/chain/e2e_bitree  # it's actually just a trivial tree (no tree building)
-dir=exp/chain/e2e_cnn_${affix}
-
-if [ $stage -le 0 ]; then
-  # Create a version of the lang/ directory that has one state per phone in the
-  # topo file. [note, it really has two states.. the first one is only repeated
-  # once, the second one has zero or more repeats.]
-  rm -rf $lang
-  cp -r data/lang $lang
-  silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
-  nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
-  # Use our special topology... note that later on may have to tune this
-  # topology.
-  steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
-fi
-
-if [ $stage -le 1 ]; then
-  steps/nnet3/chain/e2e/prepare_e2e.sh --nj 30 --cmd "$cmd" \
-                                       --shared-phones true \
-                                       --type biphone \
-                                       data/$train_set $lang $treedir
-  $cmd $treedir/log/make_phone_lm.log \
-  cat data/$train_set/text \| \
-    steps/nnet3/chain/e2e/text_to_phones.py data/lang \| \
-    utils/sym2int.pl -f 2- data/lang/phones.txt \| \
-    chain-est-phone-lm --num-extra-lm-states=500 \
-                       ark:- $treedir/phone_lm.fst
-fi
-
-if [ $stage -le 2 ]; then
-  echo "$0: creating neural net configs using the xconfig parser";
-  num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}')
-
-  cnn_opts="l2-regularize=0.075"
-  tdnn_opts="l2-regularize=0.075"
-  output_opts="l2-regularize=0.1"
-  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
-  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
-  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
-  mkdir -p $dir/configs
-  cat <<EOF > $dir/configs/network.xconfig
-  input dim=40 name=input
-
-  conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
-  conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
-  conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
-  conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
-  conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
-  conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-1,0,1 $common3
-  conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-1,0,1 $common3
-  relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim $tdnn_opts
-  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
-  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
-
-  ## adding the layers for chain branch
-  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $output_opts
-  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
-EOF
-
-  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
-fi
-
-if [ $stage -le 3 ]; then
-  # no need to store the egs in a shared storage because we always
-  # remove them. Anyway, it takes only 5 minutes to generate them.
-
-  steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
-    --cmd "$cmd" \
-    --feat.cmvn-opts "$cmvn_opts" \
-    --chain.leaky-hmm-coefficient 0.1 \
-    --chain.l2-regularize $l2_regularize \
-    --chain.apply-deriv-weights false \
-    --egs.dir "$common_egs_dir" \
-    --egs.stage $get_egs_stage \
-    --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \
-    --chain.frame-subsampling-factor 4 \
-    --chain.alignment-subsampling-factor 4 \
-    --trainer.num-chunk-per-minibatch $minibatch_size \
-    --trainer.frames-per-iter $frames_per_iter \
-    --trainer.num-epochs $num_epochs \
-    --trainer.optimization.momentum 0 \
-    --trainer.optimization.num-jobs-initial $num_jobs_initial \
-    --trainer.optimization.num-jobs-final $num_jobs_final \
-    --trainer.optimization.initial-effective-lrate 0.001 \
-    --trainer.optimization.final-effective-lrate 0.0001 \
-    --trainer.optimization.shrink-value 1.0 \
-    --trainer.max-param-change 2.0 \
-    --cleanup.remove-egs true \
-    --feat-dir data/${train_set} \
-    --tree-dir $treedir \
-    --dir $dir  || exit 1;
-fi
-
-if [ $stage -le 4 ]; then
-  # The reason we are using data/lang here, instead of $lang, is just to
-  # emphasize that it's not actually important to give mkgraph.sh the
-  # lang directory with the matched topology (since it gets the
-  # topology file from the model).  So you could give it a different
-  # lang directory, one that contained a wordlist and LM of your choice,
-  # as long as phones.txt was compatible.
-
-  utils/mkgraph.sh \
-    --self-loop-scale 1.0 $lang_decode \
-    $dir $dir/graph || exit 1;
-fi
-
-if [ $stage -le 5 ]; then
-  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --nj 30 --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
-
-  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/test $dir/decode_test{,_rescored} || exit 1
-fi
-
-echo "Done. Date: $(date). Results:"
-local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v2/local/chain/run_e2e_cnn.sh b/egs/iam/v2/local/chain/run_e2e_cnn.sh
new file mode 120000
index 00000000000..7dca9c30e23
--- /dev/null
+++ b/egs/iam/v2/local/chain/run_e2e_cnn.sh
@@ -0,0 +1 @@
+tuning/run_e2e_cnn_1b.sh
\ No newline at end of file
diff --git a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh
new file mode 100755
index 00000000000..d88e1a38820
--- /dev/null
+++ b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -0,0 +1,174 @@
+#!/bin/bash
+# Copyright    2017  Hossein Hadian
+
+# This script does end2end chain training (i.e. from scratch)
+# ./local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/
+# System                      e2e_cnn_1a
+# WER                             11.24
+# WER (rescored)                  10.80
+# CER                              5.32
+# CER (rescored)                   5.24
+# Final train prob               0.0568
+# Final valid prob               0.0381
+# Final train prob (xent)
+# Final valid prob (xent)
+# Parameters                      9.13M
+
+# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a
+# exp/chain/e2e_cnn_1a: num-iters=42 nj=2..4 num-params=9.1M dim=40->12640 combine=0.049->0.049 (over 1) logprob:train/valid[27,41,final]=(0.035,0.055,0.057/0.016,0.037,0.038)
+
+
+set -e
+
+# configs for 'chain'
+stage=0
+train_stage=-10
+get_egs_stage=-10
+affix=1a
+
+# training options
+tdnn_dim=450
+num_epochs=4
+num_jobs_initial=2
+num_jobs_final=4
+minibatch_size=150=100,64/300=50,32/600=25,16/1200=16,8
+common_egs_dir=
+l2_regularize=0.00005
+frames_per_iter=1000000
+cmvn_opts="--norm-means=true --norm-vars=true"
+train_set=train
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+lang=data/lang_e2e
+treedir=exp/chain/e2e_bitree  # it's actually just a trivial tree (no tree building)
+dir=exp/chain/e2e_cnn_${affix}
+
+if [ $stage -le 0 ]; then
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  rm -rf $lang
+  cp -r data/lang $lang
+  silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+  nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+  # Use our special topology... note that later on may have to tune this
+  # topology.
+  steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+fi
+
+if [ $stage -le 1 ]; then
+  steps/nnet3/chain/e2e/prepare_e2e.sh --nj 30 --cmd "$cmd" \
+                                       --shared-phones true \
+                                       --type biphone \
+                                       data/$train_set $lang $treedir
+  $cmd $treedir/log/make_phone_lm.log \
+  cat data/$train_set/text \| \
+    steps/nnet3/chain/e2e/text_to_phones.py data/lang \| \
+    utils/sym2int.pl -f 2- data/lang/phones.txt \| \
+    chain-est-phone-lm --num-extra-lm-states=500 \
+                       ark:- $treedir/phone_lm.fst
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+  num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}')
+
+  cnn_opts="l2-regularize=0.075"
+  tdnn_opts="l2-regularize=0.075"
+  output_opts="l2-regularize=0.1"
+  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+  conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-1,0,1 $common3
+  conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-1,0,1 $common3
+  relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim $tdnn_opts
+  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
+  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $output_opts
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 3 ]; then
+  # no need to store the egs in a shared storage because we always
+  # remove them. Anyway, it takes only 5 minutes to generate them.
+
+  steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
+    --cmd "$cmd" \
+    --feat.cmvn-opts "$cmvn_opts" \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize $l2_regularize \
+    --chain.apply-deriv-weights false \
+    --egs.dir "$common_egs_dir" \
+    --egs.stage $get_egs_stage \
+    --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \
+    --chain.frame-subsampling-factor 4 \
+    --chain.alignment-subsampling-factor 4 \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter $frames_per_iter \
+    --trainer.num-epochs $num_epochs \
+    --trainer.optimization.momentum 0 \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.optimization.shrink-value 1.0 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs true \
+    --feat-dir data/${train_set} \
+    --tree-dir $treedir \
+    --dir $dir  || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 $lang_decode \
+    $dir $dir/graph || exit 1;
+fi
+
+if [ $stage -le 5 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+    --nj 30 --cmd "$cmd" \
+    $dir/graph data/test $dir/decode_test || exit 1;
+
+  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/test $dir/decode_test{,_rescored} || exit 1
+fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
new file mode 100755
index 00000000000..a3b0d8c582f
--- /dev/null
+++ b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
@@ -0,0 +1,160 @@
+#!/bin/bash
+# Copyright    2017  Hossein Hadian
+
+# This script does end2end chain training (i.e. from scratch)
+# ./local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/
+# System                      e2e_cnn_1a
+# WER                             11.24
+# WER (rescored)                  10.80
+# CER                              5.32
+# CER (rescored)                   5.24
+# Final train prob               0.0568
+# Final valid prob               0.0381
+# Final train prob (xent)
+# Final valid prob (xent)
+# Parameters                      9.13M
+
+# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a
+# exp/chain/e2e_cnn_1a: num-iters=42 nj=2..4 num-params=9.1M dim=40->12640 combine=0.049->0.049 (over 1) logprob:train/valid[27,41,final]=(0.035,0.055,0.057/0.016,0.037,0.038)
+set -e
+
+# configs for 'chain'
+stage=0
+train_stage=-10
+get_egs_stage=-10
+affix=1a
+
+# training options
+tdnn_dim=450
+minibatch_size=150=100,64/300=50,32/600=25,16/1200=16,8
+common_egs_dir=
+train_set=train
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+lang=data/lang_e2e
+treedir=exp/chain/e2e_bitree  # it's actually just a trivial tree (no tree building)
+dir=exp/chain/e2e_cnn_${affix}
+
+if [ $stage -le 0 ]; then
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  rm -rf $lang
+  cp -r data/lang $lang
+  silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+  nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+  # Use our special topology... note that later on may have to tune this
+  # topology.
+  steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+fi
+
+if [ $stage -le 1 ]; then
+  steps/nnet3/chain/e2e/prepare_e2e.sh --nj 30 --cmd "$cmd" \
+                                       --shared-phones true \
+                                       --type biphone \
+                                       data/$train_set $lang $treedir
+  $cmd $treedir/log/make_phone_lm.log \
+  cat data/$train_set/text \| \
+    steps/nnet3/chain/e2e/text_to_phones.py data/lang \| \
+    utils/sym2int.pl -f 2- data/lang/phones.txt \| \
+    chain-est-phone-lm --num-extra-lm-states=500 \
+                       ark:- $treedir/phone_lm.fst
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+  num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}')
+  common1="height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+
+  conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
+  relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-4,0,4) dim=$tdnn_dim
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $output_opts
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 3 ]; then
+  # no need to store the egs in a shared storage because we always
+  # remove them. Anyway, it takes only 5 minutes to generate them.
+
+  steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
+    --cmd "$cmd" \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights false \
+    --egs.dir "$common_egs_dir" \
+    --egs.stage $get_egs_stage \
+    --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \
+    --chain.frame-subsampling-factor 4 \
+    --chain.alignment-subsampling-factor 4 \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter 1000000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.momentum 0 \
+    --trainer.optimization.num-jobs-initial 2 \
+    --trainer.optimization.num-jobs-final 4 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.optimization.shrink-value 1.0 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs true \
+    --feat-dir data/${train_set} \
+    --tree-dir $treedir \
+    --dir $dir  || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 $lang_decode \
+    $dir $dir/graph || exit 1;
+fi
+
+if [ $stage -le 5 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+    --nj 30 --cmd "$cmd" \
+    $dir/graph data/test $dir/decode_test || exit 1;
+
+  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/test $dir/decode_test{,_rescored} || exit 1
+fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v2/run.sh b/egs/iam/v2/run.sh
index 92061121f6c..dcdbb92ed68 100755
--- a/egs/iam/v2/run.sh
+++ b/egs/iam/v2/run.sh
@@ -10,7 +10,15 @@ nj=70
 decode_gmm=false
 username=
 password=
+# iam_database points to the database path on the JHU grid. If you have not
+# already downloaded the database you can set it to a local directory
+# like "data/download" and follow the instructions
+# in "local/prepare_data.sh" to download the database:
 iam_database=/export/corpora5/handwriting_ocr/IAM
+# wellington_database points to the database path on the JHU grid. The Wellington
+# corpus contains two directories WWC and WSC (Wellington Written and Spoken Corpus).
+# This corpus is of written NZ English that can be purchased here:
+# "https://www.victoria.ac.nz/lals/resources/corpora-default"
 wellington_database=/export/corpora5/Wellington/WWC/
 
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
@@ -31,7 +39,10 @@ mkdir -p data/{train,test}/data
 
 if [ $stage -le 1 ]; then
   echo "$(date) stage 1: getting allowed image widths for e2e training..."
-  image/get_image2num_frames.py --feat-dim 40 data/train
+  image/get_image2num_frames.py --feat-dim 40 data/train # This will be needed for the next command
+  # The next command creates a "allowed_lengths.txt" file in data/train
+  # which will be used by local/make_features.py to enforce the images to
+  # have allowed lengths. The allowed lengths will be spaced by 10% difference in length.
   image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
   for set in train test; do
     echo "$(date) Extracting features, creating feats.scp file"
@@ -91,6 +102,8 @@ fi
 if [ $stage -le 4 ]; then
   echo "$0: Preparing dictionary and lang..."
   local/prepare_dict.sh
+  # This recipe uses byte-pair encoding, the silences are part of the words' pronunciations.
+  # So we set --sil-prob to 0.0
   utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \
                         data/local/dict "<sil>" data/lang/temp data/lang
   silphonelist=`cat data/lang/phones/silence.csl`
@@ -173,5 +186,5 @@ fi
 
 if [ $stage -le 15 ]; then
   local/chain/run_cnn_chainali.sh --train_set train_aug \
-    --chain-model-dir exp/chain/cnn_1a --stage 4
+    --chain-model-dir exp/chain/cnn_1a --stage 2
 fi
diff --git a/egs/iam/v2/run_end2end.sh b/egs/iam/v2/run_end2end.sh
index cf0d8476e55..346acbed1d3 100755
--- a/egs/iam/v2/run_end2end.sh
+++ b/egs/iam/v2/run_end2end.sh
@@ -6,7 +6,15 @@ stage=0
 nj=20
 username=
 password=
+# iam_database points to the database path on the JHU grid. If you have not
+# already downloaded the database you can set it to a local directory
+# like "data/download" and follow the instructions
+# in "local/prepare_data.sh" to download the database:
 iam_database=/export/corpora5/handwriting_ocr/IAM
+# wellington_database points to the database path on the JHU grid. The Wellington
+# corpus contains two directories WWC and WSC (Wellington Written and Spoken Corpus).
+# This corpus is of written NZ English that can be purchased here:
+# "https://www.victoria.ac.nz/lals/resources/corpora-default"
 wellington_database=/export/corpora5/Wellington/WWC/
 
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
@@ -28,7 +36,10 @@ mkdir -p data/{train,test}/data
 
 if [ $stage -le 1 ]; then
   echo "$(date) stage 1: getting allowed image widths for e2e training..."
-  image/get_image2num_frames.py --feat-dim 40 data/train
+  image/get_image2num_frames.py --feat-dim 40 data/train # This will be needed for the next command
+  # The next command creates a "allowed_lengths.txt" file in data/train
+  # which will be used by local/make_features.py to enforce the images to
+  # have allowed lengths. The allowed lengths will be spaced by 10% difference in length.
   image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
   for set in train test; do
     echo "$(date) Extracting features, creating feats.scp file"
@@ -88,6 +99,8 @@ fi
 if [ $stage -le 4 ]; then
   echo "$0: Preparing dictionary and lang..."
   local/prepare_dict.sh
+  # This recipe uses byte-pair encoding, the silences are part of the words' pronunciations.
+  # So we set --sil-prob to 0.0
   utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \
                         data/local/dict "<sil>" data/lang/temp data/lang
   utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang
@@ -106,6 +119,7 @@ fi
 if [ $stage -le 6 ]; then
   echo "$0: Aligning the training data using the e2e chain model..."
   steps/nnet3/align.sh --nj 50 --cmd "$cmd" \
+                       --use-gpu false \
                        --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
                        data/train_aug data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
 fi

From 5fc0d17914dcaf718c55c30455402a86a3bb0525 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Sun, 2 Sep 2018 13:17:25 -0400
Subject: [PATCH 13/37] fixing chain scripts

---
 egs/iam/v2/local/chain/tuning/run_cnn_1a.sh   |  2 +-
 .../local/chain/tuning/run_cnn_chainali_1b.sh | 12 +++++-----
 .../v2/local/chain/tuning/run_e2e_cnn_1b.sh   | 22 +++++++++----------
 3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_1a.sh b/egs/iam/v2/local/chain/tuning/run_cnn_1a.sh
index e39b14ac8dc..6583e1725c3 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_1a.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_1a.sh
@@ -186,7 +186,7 @@ if [ $stage -le 5 ]; then
     --chain.xent-regularize $xent_regularize \
     --chain.leaky-hmm-coefficient=0.1 \
     --chain.l2-regularize=0.00005 \
-    --chain.apply-deriv-weights=false \
+    --chain.apply-deriv-weights=true \
     --chain.lm-opts="--num-extra-lm-states=500" \
     --chain.frame-subsampling-factor=$frame_subsampling_factor \
     --chain.alignment-subsampling-factor=$frame_subsampling_factor \
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh b/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh
index 36a30b2df29..41b800c9136 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh
@@ -42,10 +42,10 @@ chunk_right_context=0
 tdnn_dim=550
 # training options
 srand=0
-remove_egs=false
-lang_test=lang
+remove_egs=true
 lang_decode=data/lang
 lang_rescore=data/lang_rescore_6g
+
 dropout_schedule='0,0@0.20,0.2@0.50,0'
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
@@ -120,10 +120,11 @@ if [ $stage -le 3 ]; then
   # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
   # those.  The num-leaves is always somewhat less than the num-leaves from
   # the GMM baseline.
-   if [ -f $tree_dir/final.mdl ]; then
-     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
-     exit 1;
+  if [ -f $tree_dir/final.mdl ]; then
+    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+    exit 1;
   fi
+
   steps/nnet3/chain/build_tree.sh \
     --frame-subsampling-factor $frame_subsampling_factor \
     --context-opts "--context-width=2 --central-position=1" \
@@ -156,6 +157,7 @@ if [ $stage -le 4 ]; then
   relu-batchnorm-dropout-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
   relu-batchnorm-dropout-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
   relu-batchnorm-dropout-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+
   ## adding the layers for chain branch
   relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
   output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
diff --git a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
index a3b0d8c582f..0ffc6b78fa7 100755
--- a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
+++ b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
@@ -2,20 +2,20 @@
 # Copyright    2017  Hossein Hadian
 
 # This script does end2end chain training (i.e. from scratch)
-# ./local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/
-# System                      e2e_cnn_1a
-# WER                             11.24
-# WER (rescored)                  10.80
-# CER                              5.32
-# CER (rescored)                   5.24
-# Final train prob               0.0568
-# Final valid prob               0.0381
+# ./local/chain/compare_wer.sh exp/chain/e2e_cnn_1b/
+# System                      e2e_cnn_1b
+# WER                             13.59
+# WER (rescored)                  13.27
+# CER                              6.92
+# CER (rescored)                   6.71
+# Final train prob               0.0345
+# Final valid prob               0.0269
 # Final train prob (xent)
 # Final valid prob (xent)
-# Parameters                      9.13M
+# Parameters                      9.52M
 
-# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a
-# exp/chain/e2e_cnn_1a: num-iters=42 nj=2..4 num-params=9.1M dim=40->12640 combine=0.049->0.049 (over 1) logprob:train/valid[27,41,final]=(0.035,0.055,0.057/0.016,0.037,0.038)
+# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1b
+# exp/chain/e2e_cnn_1b: num-iters=42 nj=2..4 num-params=9.5M dim=40->12640 combine=0.041->0.041 (over 2) logprob:train/valid[27,41,final]=(0.032,0.035,0.035/0.025,0.026,0.027)
 set -e
 
 # configs for 'chain'

From 1138ee31dfae5aa9824a809c86029f49e139f668 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Sun, 2 Sep 2018 14:19:06 -0400
Subject: [PATCH 14/37] updating parameters

---
 egs/iam/v2/local/chain/tuning/run_cnn_1a.sh          | 8 ++++++--
 egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_1a.sh b/egs/iam/v2/local/chain/tuning/run_cnn_1a.sh
index 6583e1725c3..fe19c16ff13 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_1a.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_1a.sh
@@ -48,7 +48,8 @@ tdnn_dim=450
 # training options
 srand=0
 remove_egs=false
-lang_test=lang
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
@@ -226,7 +227,7 @@ if [ $stage -le 6 ]; then
   # as long as phones.txt was compatible.
 
   utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
+    --self-loop-scale 1.0 $lang_decode \
     $dir $dir/graph || exit 1;
 fi
 
@@ -240,4 +241,7 @@ if [ $stage -le 7 ]; then
     --frames-per-chunk $frames_per_chunk \
     --nj $nj --cmd "$cmd" \
     $dir/graph data/test $dir/decode_test || exit 1;
+
+  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/test $dir/decode_test{,_rescored} || exit 1
 fi
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh b/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh
index 41b800c9136..95c299f36db 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh
@@ -144,7 +144,7 @@ if [ $stage -le 4 ]; then
   output_opts="l2-regularize=0.04"
   common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
   common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
-  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=90"
+  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
   mkdir -p $dir/configs
   cat <<EOF > $dir/configs/network.xconfig
   input dim=40 name=input

From b3532ced75ba5c0518873a71181062f8e2766820 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Mon, 3 Sep 2018 19:05:09 -0400
Subject: [PATCH 15/37] updating parameters and results

---
 egs/iam/v2/local/chain/tuning/run_cnn_1a.sh   | 21 ++++++-----
 .../local/chain/tuning/run_cnn_chainali_1b.sh | 34 ++++++++++--------
 .../local/chain/tuning/run_cnn_e2eali_1d.sh   | 36 ++++++++++---------
 .../v2/local/chain/tuning/run_e2e_cnn_1b.sh   |  2 +-
 4 files changed, 52 insertions(+), 41 deletions(-)

diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_1a.sh b/egs/iam/v2/local/chain/tuning/run_cnn_1a.sh
index fe19c16ff13..cf4024c9d16 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_1a.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_1a.sh
@@ -5,18 +5,18 @@
 #              2017 Ashish Arora
 
 # steps/info/chain_dir_info.pl exp/chain/cnn_1a/
-# exp/chain/cnn_1a/: num-iters=42 nj=2..4 num-params=4.4M dim=40->400 combine=-0.039->-0.039 (over 2) xent:train/valid[27,41,final]=(-0.502,-0.380,-0.376/-0.679,-0.626,-0.625) logprob:train/valid[27,41,final]=(-0.038,-0.032,-0.032/-0.063,-0.064,-0.064)
+# exp/chain/cnn_1a/: num-iters=42 nj=2..4 num-params=4.4M dim=40->400 combine=-0.039->-0.039 (over 2) xent:train/valid[27,41,final]=(-0.547,-0.404,-0.401/-0.746,-0.685,-0.684) logprob:train/valid[27,41,final]=(-0.046,-0.036,-0.036/-0.072,-0.071,-0.071)
 
 # ./local/chain/compare_wer.sh exp/chain/cnn_1a/
 # System                         cnn_1a
-# WER                             14.91
-# WER (rescored)                     --
-# CER                              7.92
-# CER (rescored)                     --
-# Final train prob              -0.0320
-# Final valid prob              -0.0643
-# Final train prob (xent)       -0.3762
-# Final valid prob (xent)       -0.6247
+# WER                             17.05
+# WER (rescored)                  16.70
+# CER                              9.75
+# CER (rescored)                   9.61
+# Final train prob              -0.0358
+# Final valid prob              -0.0709
+# Final train prob (xent)       -0.4013
+# Final valid prob (xent)       -0.6841
 # Parameters                      4.39M
 
 set -e -o pipefail
@@ -245,3 +245,6 @@ if [ $stage -le 7 ]; then
   steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
                                 data/test $dir/decode_test{,_rescored} || exit 1
 fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh b/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh
index 95c299f36db..105b8f50854 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh
@@ -1,19 +1,20 @@
 #!/bin/bash
 
-# local/chain/compare_wer.sh exp/chain/cnn_chainali_1b
-# System                      cnn_chainali_1b
-# WER                              9.49
-# WER (rescored)                   9.27
-# CER                              4.39
-# CER (rescored)                   4.32
-# Final train prob              -0.0466
-# Final valid prob              -0.0692
-# Final train prob (xent)       -0.4811
-# Final valid prob (xent)       -0.5538
-# Parameters                      5.65M
+# ./local/chain/compare_wer.sh exp/chain/cnn_1a/ exp/chain/cnn_chainali_1b
+# System                         cnn_1a cnn_chainali_1b
+# WER                             17.05      9.45
+# WER (rescored)                  16.70      9.01
+# CER                              9.75      4.43
+# CER (rescored)                   9.61      4.28
+# Final train prob              -0.0358   -0.0522
+# Final valid prob              -0.0709   -0.0702
+# Final train prob (xent)       -0.4013   -0.4992
+# Final valid prob (xent)       -0.6841   -0.5658
+# Parameters                      4.39M     5.13M
 
 # steps/info/chain_dir_info.pl exp/chain/cnn_chainali_1b
-# exp/chain/cnn_chainali_1d: num-iters=40 nj=2..4 num-params=5.7M dim=40->400 combine=-0.051->-0.051 (over 1) xent:train/valid[25,39,final]=(-0.818,-0.500,-0.481/-0.828,-0.570,-0.554) logprob:train/valid[25,39,final]=(-0.097,-0.050,-0.047/-0.114,-0.073,-0.069)
+# exp/chain/cnn_chainali_1b/: num-iters=36 nj=3..5 num-params=5.1M dim=40->400 combine=-0.054->-0.054 (over 1) xent:train/valid[23,35,final]=(-0.769,-0.524,-0.499/-0.773,-0.584,-0.566) logprob:train/valid[23,35,final]=(-0.092,-0.056,-0.052/-0.107,-0.076,-0.070)
+
 set -e -o pipefail
 
 stage=0
@@ -198,10 +199,10 @@ if [ $stage -le 5 ]; then
     --chain.right-tolerance 3 \
     --trainer.srand=$srand \
     --trainer.max-param-change=2.0 \
-    --trainer.num-epochs=5 \
+    --trainer.num-epochs=6 \
     --trainer.frames-per-iter=1500000 \
-    --trainer.optimization.num-jobs-initial=2 \
-    --trainer.optimization.num-jobs-final=4 \
+    --trainer.optimization.num-jobs-initial=3 \
+    --trainer.optimization.num-jobs-final=5 \
     --trainer.dropout-schedule $dropout_schedule \
     --trainer.optimization.initial-effective-lrate=0.001 \
     --trainer.optimization.final-effective-lrate=0.0001 \
@@ -251,3 +252,6 @@ if [ $stage -le 7 ]; then
   steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
                                 data/test $dir/decode_test{,_rescored} || exit 1
 fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
index 27988beafdd..e8287cf929d 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
@@ -1,20 +1,21 @@
 #!/bin/bash
 
 # This script does end2end chain training (i.e. from scratch)
-# ./local/chain/compare_wer.sh exp/chain/cnn_e2eali_1d/
-# System                      cnn_e2eali_1d
-# WER                              9.52
-# WER (rescored)                   9.29
-# CER                              4.45
-# CER (rescored)                   4.43
-# Final train prob              -0.0473
-# Final valid prob              -0.0706
-# Final train prob (xent)       -0.4623
-# Final valid prob (xent)       -0.5371
-# Parameters                      5.08M
+# ./local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/ exp/chain/cnn_e2eali_1d
+# System                      e2e_cnn_1a cnn_e2eali_1d
+# WER                             13.59      9.45
+# WER (rescored)                  13.27      9.28
+# CER                              6.92      4.41
+# CER (rescored)                   6.71      4.31
+# Final train prob               0.0345   -0.0451
+# Final valid prob               0.0269   -0.0684
+# Final train prob (xent)                 -0.4241
+# Final valid prob (xent)                 -0.5068
+# Parameters                      9.52M     5.13M
 
 # steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1d
-# exp/chain/cnn_e2eali_1d/: num-iters=40 nj=2..4 num-params=5.1M dim=40->400 combine=-0.052->-0.052 (over 1) xent:train/valid[25,39,final]=(-0.739,-0.483,-0.462/-0.763,-0.551,-0.537) logprob:train/valid[25,39,final]=(-0.092,-0.052,-0.047/-0.112,-0.076,-0.071)
+# exp/chain/cnn_e2eali_1d/: num-iters=36 nj=3..5 num-params=5.1M dim=40->400 combine=-0.047->-0.047 (over 1) xent:train/valid[23,35,final]=(-0.705,-0.446,-0.424/-0.714,-0.523,-0.507) logprob:train/valid[23,35,final]=(-0.095,-0.049,-0.045/-0.110,-0.073,-0.068)
+
 set -e -o pipefail
 
 stage=0
@@ -22,7 +23,7 @@ stage=0
 nj=30
 train_set=train
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
-affix=_1d  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+affix=_1df  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
 e2echain_model_dir=exp/chain/e2e_cnn_1a
 common_egs_dir=
 reporting_email=
@@ -194,10 +195,10 @@ if [ $stage -le 5 ]; then
     --chain.right-tolerance 3 \
     --trainer.srand=$srand \
     --trainer.max-param-change=2.0 \
-    --trainer.num-epochs=5 \
+    --trainer.num-epochs=6 \
     --trainer.frames-per-iter=1500000 \
-    --trainer.optimization.num-jobs-initial=2 \
-    --trainer.optimization.num-jobs-final=4 \
+    --trainer.optimization.num-jobs-initial=3 \
+    --trainer.optimization.num-jobs-final=5 \
     --trainer.dropout-schedule $dropout_schedule \
     --trainer.optimization.initial-effective-lrate=0.001 \
     --trainer.optimization.final-effective-lrate=0.0001 \
@@ -247,3 +248,6 @@ if [ $stage -le 7 ]; then
   steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
                                 data/test $dir/decode_test{,_rescored} || exit 1
 fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
index 0ffc6b78fa7..7fb81c97ea7 100755
--- a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
+++ b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
@@ -22,7 +22,7 @@ set -e
 stage=0
 train_stage=-10
 get_egs_stage=-10
-affix=1a
+affix=1b
 
 # training options
 tdnn_dim=450

From 9b67d9d0ad7600c25d5237549497b54ed4b778ff Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Mon, 3 Sep 2018 19:26:23 -0400
Subject: [PATCH 16/37] adding overwrite option and punctuation topology

---
 .../local/chain/tuning/run_cnn_e2eali_1d.sh   |  2 +-
 egs/iam/v2/local/prepare_data.sh              | 19 ++++++++++++-------
 egs/iam/v2/run.sh                             |  4 +++-
 egs/iam/v2/run_end2end.sh                     |  9 +++++++--
 4 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
index e8287cf929d..9771245c683 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
@@ -23,7 +23,7 @@ stage=0
 nj=30
 train_set=train
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
-affix=_1df  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+affix=_1d  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
 e2echain_model_dir=exp/chain/e2e_cnn_1a
 common_egs_dir=
 reporting_email=
diff --git a/egs/iam/v2/local/prepare_data.sh b/egs/iam/v2/local/prepare_data.sh
index 9c01ac90f28..32e76143931 100755
--- a/egs/iam/v2/local/prepare_data.sh
+++ b/egs/iam/v2/local/prepare_data.sh
@@ -18,6 +18,7 @@
 
 stage=0
 download_dir=data/download
+overwrite=false
 wellington_dir=
 username=
 password=       # username and password for downloading the IAM database
@@ -161,11 +162,15 @@ cat $test_old > $test_new
 cat $val1_old $val2_old > $val_new
 
 if [ $stage -le 0 ]; then
-  local/process_data.py data/local data/train --dataset train || exit 1
-  local/process_data.py data/local data/test --dataset test || exit 1
-  local/process_data.py data/local data/val --dataset validation || exit 1
-
-  image/fix_data_dir.sh data/train
-  image/fix_data_dir.sh data/test
-  image/fix_data_dir.sh data/val
+  if [ ! -f data/train/text ] || $overwrite; then
+    local/process_data.py data/local data/train --dataset train || exit 1
+    local/process_data.py data/local data/test --dataset test || exit 1
+    local/process_data.py data/local data/val --dataset validation || exit 1
+
+    image/fix_data_dir.sh data/train
+    image/fix_data_dir.sh data/test
+    image/fix_data_dir.sh data/val
+  else
+    echo "Not processing data since it is already processed"
+  fi
 fi
diff --git a/egs/iam/v2/run.sh b/egs/iam/v2/run.sh
index dcdbb92ed68..8a56c35e052 100755
--- a/egs/iam/v2/run.sh
+++ b/egs/iam/v2/run.sh
@@ -10,6 +10,7 @@ nj=70
 decode_gmm=false
 username=
 password=
+overwrite=false
 # iam_database points to the database path on the JHU grid. If you have not
 # already downloaded the database you can set it to a local directory
 # like "data/download" and follow the instructions
@@ -33,7 +34,8 @@ if [ $stage -le 0 ]; then
   echo "$0: Preparing data..."
   local/prepare_data.sh --download-dir "$iam_database" \
     --wellington-dir "$wellington_database" \
-    --username "$username" --password "$password"
+    --username "$username" --password "$password" \
+    --overwrite $overwrite
 fi
 mkdir -p data/{train,test}/data
 
diff --git a/egs/iam/v2/run_end2end.sh b/egs/iam/v2/run_end2end.sh
index 346acbed1d3..dd6aa73de63 100755
--- a/egs/iam/v2/run_end2end.sh
+++ b/egs/iam/v2/run_end2end.sh
@@ -6,6 +6,7 @@ stage=0
 nj=20
 username=
 password=
+overwrite=false
 # iam_database points to the database path on the JHU grid. If you have not
 # already downloaded the database you can set it to a local directory
 # like "data/download" and follow the instructions
@@ -30,10 +31,11 @@ if [ $stage -le 0 ]; then
   echo "$0: Preparing data..."
   local/prepare_data.sh --download-dir "$iam_database" \
     --wellington-dir "$wellington_database" \
-    --username "$username" --password "$password"
+    --username "$username" --password "$password" \
+    --overwrite $overwrite
 fi
-mkdir -p data/{train,test}/data
 
+mkdir -p data/{train,test}/data
 if [ $stage -le 1 ]; then
   echo "$(date) stage 1: getting allowed image widths for e2e training..."
   image/get_image2num_frames.py --feat-dim 40 data/train # This will be needed for the next command
@@ -103,6 +105,9 @@ if [ $stage -le 4 ]; then
   # So we set --sil-prob to 0.0
   utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \
                         data/local/dict "<sil>" data/lang/temp data/lang
+  silphonelist=`cat data/lang/phones/silence.csl`
+  nonsilphonelist=`cat data/lang/phones/nonsilence.csl`
+  local/gen_topo.py 8 4 4 $nonsilphonelist $silphonelist data/lang/phones.txt >data/lang/topo
   utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang
 
   utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_big.arpa.gz \

From 89c9ec79ff7dce369d1c5e1c030ef225bae053e2 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Mon, 3 Sep 2018 20:26:57 -0400
Subject: [PATCH 17/37] adding overwrite option

---
 egs/iam/v2/run.sh         | 44 +++++++++++++++++++++++----------------
 egs/iam/v2/run_end2end.sh | 28 ++++++++++++++++---------
 2 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/egs/iam/v2/run.sh b/egs/iam/v2/run.sh
index 8a56c35e052..44a85928d63 100755
--- a/egs/iam/v2/run.sh
+++ b/egs/iam/v2/run.sh
@@ -31,6 +31,14 @@ wellington_database=/export/corpora5/Wellington/WWC/
 ./local/check_tools.sh
 
 if [ $stage -le 0 ]; then
+  if [ -f data/train/text ] && ! $overwrite; then
+    echo "Not processing, probably script have run from wrong stage"
+    echo "Exiting with status 1 to avoid data corruption"
+    exit 1;
+  fi
+fi
+
+if [ $stage -le 1 ]; then
   echo "$0: Preparing data..."
   local/prepare_data.sh --download-dir "$iam_database" \
     --wellington-dir "$wellington_database" \
@@ -39,8 +47,8 @@ if [ $stage -le 0 ]; then
 fi
 mkdir -p data/{train,test}/data
 
-if [ $stage -le 1 ]; then
-  echo "$(date) stage 1: getting allowed image widths for e2e training..."
+if [ $stage -le 2 ]; then
+  echo "$(date) stage 2: getting allowed image widths for e2e training..."
   image/get_image2num_frames.py --feat-dim 40 data/train # This will be needed for the next command
   # The next command creates a "allowed_lengths.txt" file in data/train
   # which will be used by local/make_features.py to enforce the images to
@@ -54,15 +62,15 @@ if [ $stage -le 1 ]; then
   image/fix_data_dir.sh data/train
 fi
 
-if [ $stage -le 2 ]; then
+if [ $stage -le 3 ]; then
   for set in train; do
-    echo "$(date) stage 2: Performing augmentation, it will double training data"
+    echo "$(date) stage 3: Performing augmentation, it will double training data"
     local/augment_data.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set} data/${set}_aug data
     steps/compute_cmvn_stats.sh data/${set}_aug || exit 1;
   done
 fi
 
-if [ $stage -le 2 ]; then
+if [ $stage -le 4 ]; then
   echo "$0: Preparing BPE..."
   # getting non-silence phones.
   cut -d' ' -f2- data/train/text | \
@@ -96,12 +104,12 @@ END
   done
 fi
 
-if [ $stage -le 3 ]; then
+if [ $stage -le 5 ]; then
   echo "$0: Estimating a language model for decoding..."
   local/train_lm.sh
 fi
 
-if [ $stage -le 4 ]; then
+if [ $stage -le 6 ]; then
   echo "$0: Preparing dictionary and lang..."
   local/prepare_dict.sh
   # This recipe uses byte-pair encoding, the silences are part of the words' pronunciations.
@@ -119,19 +127,19 @@ if [ $stage -le 4 ]; then
                                data/lang data/lang_rescore_6g
 fi
 
-if [ $stage -le 5 ]; then
+if [ $stage -le 7 ]; then
   steps/train_mono.sh --nj $nj --cmd $cmd --totgauss 10000 data/train_aug \
     data/lang exp/mono
 fi
 
-if [ $stage -le 6 ] && $decode_gmm; then
+if [ $stage -le 8 ] && $decode_gmm; then
   utils/mkgraph.sh --mono data/lang_test exp/mono exp/mono/graph
 
   steps/decode.sh --nj $nj --cmd $cmd exp/mono/graph data/test \
     exp/mono/decode_test
 fi
 
-if [ $stage -le 7 ]; then
+if [ $stage -le 9 ]; then
   steps/align_si.sh --nj $nj --cmd $cmd data/train_aug data/lang \
     exp/mono exp/mono_ali
 
@@ -139,14 +147,14 @@ if [ $stage -le 7 ]; then
     exp/mono_ali exp/tri
 fi
 
-if [ $stage -le 8 ] && $decode_gmm; then
+if [ $stage -le 10 ] && $decode_gmm; then
   utils/mkgraph.sh data/lang exp/tri exp/tri/graph
 
   steps/decode.sh --nj $nj --cmd $cmd exp/tri/graph data/test \
     exp/tri/decode_test
 fi
 
-if [ $stage -le 9 ]; then
+if [ $stage -le 11 ]; then
   steps/align_si.sh --nj $nj --cmd $cmd data/train_aug data/lang \
     exp/tri exp/tri_ali
 
@@ -155,14 +163,14 @@ if [ $stage -le 9 ]; then
     data/train_aug data/lang exp/tri_ali exp/tri2
 fi
 
-if [ $stage -le 10 ] && $decode_gmm; then
+if [ $stage -le 12 ] && $decode_gmm; then
   utils/mkgraph.sh data/lang exp/tri2 exp/tri2/graph
 
   steps/decode.sh --nj $nj --cmd $cmd exp/tri2/graph \
     data/test exp/tri2/decode_test
 fi
 
-if [ $stage -le 11 ]; then
+if [ $stage -le 13 ]; then
   steps/align_fmllr.sh --nj $nj --cmd $cmd --use-graphs true \
     data/train_aug data/lang exp/tri2 exp/tri2_ali
 
@@ -170,23 +178,23 @@ if [ $stage -le 11 ]; then
     data/train_aug data/lang exp/tri2_ali exp/tri3
 fi
 
-if [ $stage -le 12 ] && $decode_gmm; then
+if [ $stage -le 14 ] && $decode_gmm; then
   utils/mkgraph.sh data/lang exp/tri3 exp/tri3/graph
 
   steps/decode_fmllr.sh --nj $nj --cmd $cmd exp/tri3/graph \
     data/test exp/tri3/decode_test
 fi
 
-if [ $stage -le 13 ]; then
+if [ $stage -le 15 ]; then
   steps/align_fmllr.sh --nj $nj --cmd $cmd --use-graphs true \
     data/train_aug data/lang exp/tri3 exp/tri3_ali
 fi
 
-if [ $stage -le 14 ]; then
+if [ $stage -le 16 ]; then
   local/chain/run_cnn.sh --train_set train_aug
 fi
 
-if [ $stage -le 15 ]; then
+if [ $stage -le 17 ]; then
   local/chain/run_cnn_chainali.sh --train_set train_aug \
     --chain-model-dir exp/chain/cnn_1a --stage 2
 fi
diff --git a/egs/iam/v2/run_end2end.sh b/egs/iam/v2/run_end2end.sh
index dd6aa73de63..a5e8906e406 100755
--- a/egs/iam/v2/run_end2end.sh
+++ b/egs/iam/v2/run_end2end.sh
@@ -28,6 +28,14 @@ wellington_database=/export/corpora5/Wellington/WWC/
 ./local/check_tools.sh
 
 if [ $stage -le 0 ]; then
+  if [ -f data/train/text ] && ! $overwrite; then
+    echo "Not processing, probably script have run from wrong stage"
+    echo "Exiting with status 1 to avoid data corruption"
+    exit 1;
+  fi
+fi
+
+if [ $stage -le 1 ]; then
   echo "$0: Preparing data..."
   local/prepare_data.sh --download-dir "$iam_database" \
     --wellington-dir "$wellington_database" \
@@ -36,8 +44,8 @@ if [ $stage -le 0 ]; then
 fi
 
 mkdir -p data/{train,test}/data
-if [ $stage -le 1 ]; then
-  echo "$(date) stage 1: getting allowed image widths for e2e training..."
+if [ $stage -le 2 ]; then
+  echo "$(date) stage 2: getting allowed image widths for e2e training..."
   image/get_image2num_frames.py --feat-dim 40 data/train # This will be needed for the next command
   # The next command creates a "allowed_lengths.txt" file in data/train
   # which will be used by local/make_features.py to enforce the images to
@@ -51,15 +59,15 @@ if [ $stage -le 1 ]; then
   image/fix_data_dir.sh data/train
 fi
 
-if [ $stage -le 2 ]; then
+if [ $stage -le 3 ]; then
   for set in train; do
-    echo "$(date) stage 2: Performing augmentation, it will double training data"
+    echo "$(date) stage 3: Performing augmentation, it will double training data"
     local/augment_data.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set} data/${set}_aug data
     steps/compute_cmvn_stats.sh data/${set}_aug || exit 1;
   done
 fi
 
-if [ $stage -le 2 ]; then
+if [ $stage -le 4 ]; then
   echo "$0: Preparing BPE..."
   # getting non-silence phones.
   cut -d' ' -f2- data/train/text | \
@@ -93,12 +101,12 @@ END
   done
 fi
 
-if [ $stage -le 3 ]; then
+if [ $stage -le 5 ]; then
   echo "$0: Estimating a language model for decoding..."
   local/train_lm.sh
 fi
 
-if [ $stage -le 4 ]; then
+if [ $stage -le 6 ]; then
   echo "$0: Preparing dictionary and lang..."
   local/prepare_dict.sh
   # This recipe uses byte-pair encoding, the silences are part of the words' pronunciations.
@@ -116,12 +124,12 @@ if [ $stage -le 4 ]; then
                                data/lang data/lang_rescore_6g
 fi
 
-if [ $stage -le 5 ]; then
+if [ $stage -le 7 ]; then
   echo "$0: Calling the flat-start chain recipe..."
   local/chain/run_e2e_cnn.sh --train_set train_aug
 fi
 
-if [ $stage -le 6 ]; then
+if [ $stage -le 8 ]; then
   echo "$0: Aligning the training data using the e2e chain model..."
   steps/nnet3/align.sh --nj 50 --cmd "$cmd" \
                        --use-gpu false \
@@ -129,7 +137,7 @@ if [ $stage -le 6 ]; then
                        data/train_aug data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
 fi
 
-if [ $stage -le 7 ]; then
+if [ $stage -le 9 ]; then
   echo "$0: Building a tree and training a regular chain model using the e2e alignments..."
   local/chain/run_cnn_e2eali.sh --train_set train_aug
 fi

From c05cd4df19953c65f76c09827ffa47513aa6953c Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Tue, 4 Sep 2018 00:05:25 -0400
Subject: [PATCH 18/37] adding aachen splits

---
 egs/iam/v2/local/prepare_data.sh          | 18 ++---
 egs/iam/v2/local/process_aachen_splits.py | 88 +++++++++++++++++++++++
 egs/iam/v2/local/process_data.py          |  1 -
 3 files changed, 97 insertions(+), 10 deletions(-)
 create mode 100755 egs/iam/v2/local/process_aachen_splits.py

diff --git a/egs/iam/v2/local/prepare_data.sh b/egs/iam/v2/local/prepare_data.sh
index 32e76143931..a220c2725b1 100755
--- a/egs/iam/v2/local/prepare_data.sh
+++ b/egs/iam/v2/local/prepare_data.sh
@@ -161,16 +161,16 @@ cat $train_old > $train_new
 cat $test_old > $test_new
 cat $val1_old $val2_old > $val_new
 
-if [ $stage -le 0 ]; then
-  if [ ! -f data/train/text ] || $overwrite; then
+if $process_aachen_split; then
+    local/process_aachen_splits.py data/local aachen_split data/train --dataset train || exit 1
+    local/process_aachen_splits.py data/local aachen_split data/test --dataset test || exit 1
+    local/process_aachen_splits.py data/local aachen_split data/val --dataset validation || exit 1
+else
     local/process_data.py data/local data/train --dataset train || exit 1
     local/process_data.py data/local data/test --dataset test || exit 1
     local/process_data.py data/local data/val --dataset validation || exit 1
-
-    image/fix_data_dir.sh data/train
-    image/fix_data_dir.sh data/test
-    image/fix_data_dir.sh data/val
-  else
-    echo "Not processing data since it is already processed"
-  fi
 fi
+
+image/fix_data_dir.sh data/train
+image/fix_data_dir.sh data/test
+image/fix_data_dir.sh data/val
diff --git a/egs/iam/v2/local/process_aachen_splits.py b/egs/iam/v2/local/process_aachen_splits.py
new file mode 100755
index 00000000000..b8c59d0a7c8
--- /dev/null
+++ b/egs/iam/v2/local/process_aachen_splits.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+
+# Copyright      2017  Chun Chieh Chang
+#                2017  Ashish Arora
+
+""" This script reads the extracted IAM database files and creates
+    the following files (for the data subset selected via --dataset):
+    text, utt2spk, images.scp.
+
+  Eg. local/process_aachen_splits.py data/local data/train data --dataset train
+  Eg. text file: 000_a01-000u-00 A MOVE to stop Mr. Gaitskell from
+      utt2spk file: 000_a01-000u-00 000
+      images.scp file: 000_a01-000u-00 data/local/lines/a01/a01-000u/a01-000u-00.png
+"""
+
+import argparse
+import os
+import sys
+import xml.dom.minidom as minidom
+
+parser = argparse.ArgumentParser(description="""Creates text, utt2spk
+                                                and images.scp files.""")
+parser.add_argument('database_path', type=str,
+                    help='Path to the downloaded (and extracted) IAM data')
+parser.add_argument('split_path', type=str,
+                    help='location of the train/test/val set')
+parser.add_argument('out_dir', type=str,
+                    help='location to write output files.')
+parser.add_argument('--dataset', type=str, default='train',
+                    choices=['train_list', 'dev_list', 'eval_list'],
+                    help='Subset of data to process.')
+args = parser.parse_args()
+
+text_file = os.path.join(args.out_dir + '/', 'text')
+text_fh = open(text_file, 'w')
+
+utt2spk_file = os.path.join(args.out_dir + '/', 'utt2spk')
+utt2spk_fh = open(utt2spk_file, 'w')
+
+image_file = os.path.join(args.out_dir + '/', 'images.scp')
+image_fh = open(image_file, 'w')
+
+dataset_path = os.path.join(args.split_path,
+                            args.dataset + '.txt')
+
+text_file_path = os.path.join(args.database_path,
+                              'ascii','lines.txt')
+text_dict = {}
+def process_text_file_for_word_model():
+  with open (text_file_path, 'rt') as in_file:
+    for line in in_file:
+      if line[0]=='#':
+        continue
+      line = line.strip()
+      utt_id = line.split(' ')[0]
+      text_vect = line.split(' ')[8:]
+      text = "".join(text_vect)
+      text = text.replace("|", " ")
+      text_dict[utt_id] = text
+
+
+### main ###
+
+print("Processing '{}' data...".format(args.dataset))
+process_text_file_for_word_model()
+
+with open(dataset_path) as f:
+  for line in f:
+    line = line.strip()
+    line_vect = line.split('-')
+    xml_file = line_vect[0] + '-' + line_vect[1]
+    xml_path = os.path.join(args.database_path, 'xml', xml_file + '.xml')
+    doc = minidom.parse(xml_path)
+    form_elements = doc.getElementsByTagName('form')[0]
+    writer_id = form_elements.getAttribute('writer-id')
+    outerfolder = form_elements.getAttribute('id')[0:3]
+    innerfolder = form_elements.getAttribute('id')
+    lines_path = os.path.join(args.database_path, 'lines',
+                              outerfolder, innerfolder)
+    for file in os.listdir(lines_path):
+      if file.endswith(".png"):
+        image_file_path = os.path.join(lines_path, file)
+        base_name = os.path.splitext(os.path.basename(image_file_path))[0]
+        text =  text_dict[base_name]
+        utt_id = writer_id + '_' + base_name
+        text_fh.write(utt_id + ' ' + text + '\n')
+        utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
+        image_fh.write(utt_id + ' ' + image_file_path + '\n')
diff --git a/egs/iam/v2/local/process_data.py b/egs/iam/v2/local/process_data.py
index fa5eb484707..2adae7bf7be 100755
--- a/egs/iam/v2/local/process_data.py
+++ b/egs/iam/v2/local/process_data.py
@@ -67,7 +67,6 @@ def process_text_file_for_word_model():
     xml_path = os.path.join(args.database_path, 'xml', xml_file + '.xml')
     img_num = line[-3:]
     doc = minidom.parse(xml_path)
-
     form_elements = doc.getElementsByTagName('form')[0]
     writer_id = form_elements.getAttribute('writer-id')
     outerfolder = form_elements.getAttribute('id')[0:3]

From 5dfe8fcbb5fcd2d85249e3b46a1f1d2f9bf9a07f Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Tue, 4 Sep 2018 00:59:40 -0400
Subject: [PATCH 19/37] fixing bugs

---
 egs/iam/v2/local/prepare_data.sh          | 8 ++++----
 egs/iam/v2/local/process_aachen_splits.py | 4 ++--
 egs/iam/v2/run.sh                         | 4 ++--
 egs/iam/v2/run_end2end.sh                 | 4 ++--
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/egs/iam/v2/local/prepare_data.sh b/egs/iam/v2/local/prepare_data.sh
index a220c2725b1..8c2505601cc 100755
--- a/egs/iam/v2/local/prepare_data.sh
+++ b/egs/iam/v2/local/prepare_data.sh
@@ -18,7 +18,7 @@
 
 stage=0
 download_dir=data/download
-overwrite=false
+process_aachen_split=false
 wellington_dir=
 username=
 password=       # username and password for downloading the IAM database
@@ -162,9 +162,9 @@ cat $test_old > $test_new
 cat $val1_old $val2_old > $val_new
 
 if $process_aachen_split; then
-    local/process_aachen_splits.py data/local aachen_split data/train --dataset train || exit 1
-    local/process_aachen_splits.py data/local aachen_split data/test --dataset test || exit 1
-    local/process_aachen_splits.py data/local aachen_split data/val --dataset validation || exit 1
+    local/process_aachen_splits.py data/local extra/splits data/train --dataset train || exit 1
+    local/process_aachen_splits.py data/local extra/splits data/test --dataset test || exit 1
+    local/process_aachen_splits.py data/local extra/splits data/val --dataset validation || exit 1
 else
     local/process_data.py data/local data/train --dataset train || exit 1
     local/process_data.py data/local data/test --dataset test || exit 1
diff --git a/egs/iam/v2/local/process_aachen_splits.py b/egs/iam/v2/local/process_aachen_splits.py
index b8c59d0a7c8..cb6a6d4f0d8 100755
--- a/egs/iam/v2/local/process_aachen_splits.py
+++ b/egs/iam/v2/local/process_aachen_splits.py
@@ -27,7 +27,7 @@
 parser.add_argument('out_dir', type=str,
                     help='location to write output files.')
 parser.add_argument('--dataset', type=str, default='train',
-                    choices=['train_list', 'dev_list', 'eval_list'],
+                    choices=['train', 'test','validation'],
                     help='Subset of data to process.')
 args = parser.parse_args()
 
@@ -41,7 +41,7 @@
 image_fh = open(image_file, 'w')
 
 dataset_path = os.path.join(args.split_path,
-                            args.dataset + '.txt')
+                            args.dataset + '.uttlist')
 
 text_file_path = os.path.join(args.database_path,
                               'ascii','lines.txt')
diff --git a/egs/iam/v2/run.sh b/egs/iam/v2/run.sh
index 44a85928d63..41c6fdc1aec 100755
--- a/egs/iam/v2/run.sh
+++ b/egs/iam/v2/run.sh
@@ -10,7 +10,7 @@ nj=70
 decode_gmm=false
 username=
 password=
-overwrite=false
+process_aachen_split=false
 # iam_database points to the database path on the JHU grid. If you have not
 # already downloaded the database you can set it to a local directory
 # like "data/download" and follow the instructions
@@ -43,7 +43,7 @@ if [ $stage -le 1 ]; then
   local/prepare_data.sh --download-dir "$iam_database" \
     --wellington-dir "$wellington_database" \
     --username "$username" --password "$password" \
-    --overwrite $overwrite
+    --process_aachen_split $process_aachen_split
 fi
 mkdir -p data/{train,test}/data
 
diff --git a/egs/iam/v2/run_end2end.sh b/egs/iam/v2/run_end2end.sh
index a5e8906e406..6ecca67bb9d 100755
--- a/egs/iam/v2/run_end2end.sh
+++ b/egs/iam/v2/run_end2end.sh
@@ -6,7 +6,7 @@ stage=0
 nj=20
 username=
 password=
-overwrite=false
+process_aachen_split=false
 # iam_database points to the database path on the JHU grid. If you have not
 # already downloaded the database you can set it to a local directory
 # like "data/download" and follow the instructions
@@ -40,7 +40,7 @@ if [ $stage -le 1 ]; then
   local/prepare_data.sh --download-dir "$iam_database" \
     --wellington-dir "$wellington_database" \
     --username "$username" --password "$password" \
-    --overwrite $overwrite
+    --process_aachen_split $process_aachen_split
 fi
 
 mkdir -p data/{train,test}/data

From d7448dfb20fca39df15270d616a09a141ae66fa9 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 5 Sep 2018 07:43:44 -0400
Subject: [PATCH 20/37] modification from review

---
 egs/cifar/v1/image/copy_data_dir.sh           | 118 --------
 egs/iam/v2/local/chain/run_cnn.sh             |   1 -
 egs/iam/v2/local/chain/run_cnn_chainali.sh    |   1 -
 egs/iam/v2/local/chain/tuning/run_cnn_1a.sh   | 250 -----------------
 .../local/chain/tuning/run_cnn_chainali_1a.sh | 250 -----------------
 .../local/chain/tuning/run_cnn_chainali_1b.sh | 257 ------------------
 .../local/chain/tuning/run_cnn_e2eali_1d.sh   |   2 +-
 egs/iam/v2/run.sh                             | 200 --------------
 egs/iam/v2/run_end2end.sh                     |   3 +-
 9 files changed, 3 insertions(+), 1079 deletions(-)
 delete mode 100755 egs/cifar/v1/image/copy_data_dir.sh
 delete mode 120000 egs/iam/v2/local/chain/run_cnn.sh
 delete mode 120000 egs/iam/v2/local/chain/run_cnn_chainali.sh
 delete mode 100755 egs/iam/v2/local/chain/tuning/run_cnn_1a.sh
 delete mode 100755 egs/iam/v2/local/chain/tuning/run_cnn_chainali_1a.sh
 delete mode 100755 egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh
 delete mode 100755 egs/iam/v2/run.sh

diff --git a/egs/cifar/v1/image/copy_data_dir.sh b/egs/cifar/v1/image/copy_data_dir.sh
deleted file mode 100755
index c923f5cc07a..00000000000
--- a/egs/cifar/v1/image/copy_data_dir.sh
+++ /dev/null
@@ -1,118 +0,0 @@
-#!/bin/bash
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-# Apache 2.0
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  feats.scp
-#  images.scp
-#  vad.scp
-#  spk2utt
-#  utt2spk
-#  text
-#
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
-#
-
-
-# begin configuration section
-spk_prefix=
-utt_prefix=
-spk_suffix=
-utt_suffix=
-validate_opts=   # should rarely be needed.
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <srcdir> <destdir>"
-  echo "e.g.:"
-  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
-  echo "Options"
-  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-srcdir=$1
-destdir=$2
-
-if [ ! -f $srcdir/utt2spk ]; then
-  echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
-  exit 1;
-fi
-
-if [ "$destdir" == "$srcdir" ]; then
-  echo "$0: this script requires <srcdir> and <destdir> to be different."
-  exit 1
-fi
-
-set -e;
-
-mkdir -p $destdir
-
-cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
-cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
-
-if [ ! -f $srcdir/utt2uniq ]; then
-  if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
-    cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
-  fi
-else
-  cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
-fi
-
-cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
-  utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
-
-utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
-
-if [ -f $srcdir/feats.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
-fi
-
-if [ -f $srcdir/vad.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
-fi
-
-if [ -f $srcdir/images.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/images.scp >$destdir/images.scp
-fi
-
-if [ -f $srcdir/text ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
-fi
-if [ -f $srcdir/utt2dur ]; then
-  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
-fi
-if [ -f $srcdir/cmvn.scp ]; then
-  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
-fi
-
-rm $destdir/spk_map $destdir/utt_map
-
-echo "$0: copied data from $srcdir to $destdir"
-
-for f in feats.scp cmvn.scp vad.scp utt2uniq utt2dur utt2num_frames text images.scp; do
-  if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
-    echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
-    echo " ... $destdir/.backup/$f"
-    mkdir -p $destdir/.backup
-    mv $destdir/$f $destdir/.backup/
-  fi
-done
-
-
-[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
-[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
-
-utils/validate_data_dir.sh $validate_opts $destdir
diff --git a/egs/iam/v2/local/chain/run_cnn.sh b/egs/iam/v2/local/chain/run_cnn.sh
deleted file mode 120000
index df6f0a468c1..00000000000
--- a/egs/iam/v2/local/chain/run_cnn.sh
+++ /dev/null
@@ -1 +0,0 @@
-tuning/run_cnn_1a.sh
\ No newline at end of file
diff --git a/egs/iam/v2/local/chain/run_cnn_chainali.sh b/egs/iam/v2/local/chain/run_cnn_chainali.sh
deleted file mode 120000
index 86568421fe1..00000000000
--- a/egs/iam/v2/local/chain/run_cnn_chainali.sh
+++ /dev/null
@@ -1 +0,0 @@
-tuning/run_cnn_chainali_1b.sh
\ No newline at end of file
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_1a.sh b/egs/iam/v2/local/chain/tuning/run_cnn_1a.sh
deleted file mode 100755
index cf4024c9d16..00000000000
--- a/egs/iam/v2/local/chain/tuning/run_cnn_1a.sh
+++ /dev/null
@@ -1,250 +0,0 @@
-#!/bin/bash
-
-# Copyright    2017 Hossein Hadian
-#              2017 Chun Chieh Chang
-#              2017 Ashish Arora
-
-# steps/info/chain_dir_info.pl exp/chain/cnn_1a/
-# exp/chain/cnn_1a/: num-iters=42 nj=2..4 num-params=4.4M dim=40->400 combine=-0.039->-0.039 (over 2) xent:train/valid[27,41,final]=(-0.547,-0.404,-0.401/-0.746,-0.685,-0.684) logprob:train/valid[27,41,final]=(-0.046,-0.036,-0.036/-0.072,-0.071,-0.071)
-
-# ./local/chain/compare_wer.sh exp/chain/cnn_1a/
-# System                         cnn_1a
-# WER                             17.05
-# WER (rescored)                  16.70
-# CER                              9.75
-# CER (rescored)                   9.61
-# Final train prob              -0.0358
-# Final valid prob              -0.0709
-# Final train prob (xent)       -0.4013
-# Final valid prob (xent)       -0.6841
-# Parameters                      4.39M
-
-set -e -o pipefail
-
-stage=0
-
-nj=30
-train_set=train
-gmm=tri3        # this is the source gmm-dir that we'll use for alignments; it
-                # should have alignments for the specified training data.
-nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
-affix=_1a  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
-ali=tri3_ali
-common_egs_dir=
-reporting_email=
-
-# chain options
-train_stage=-10
-xent_regularize=0.1
-frame_subsampling_factor=4
-alignment_subsampling_factor=1
-# training chunk-options
-chunk_width=340,300,200,100
-num_leaves=500
-# we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
-tdnn_dim=450
-# training options
-srand=0
-remove_egs=false
-lang_decode=data/lang
-lang_rescore=data/lang_rescore_6g
-# End configuration section.
-echo "$0 $@"  # Print the command line for logging
-
-
-. ./cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh
-
-
-if ! cuda-compiled; then
-  cat <<EOF && exit 1
-This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
-If you want to use GPUs (and have them), go to src/, and configure and make on a machine
-where "nvcc" is installed.
-EOF
-fi
-
-gmm_dir=exp/${gmm}
-ali_dir=exp/${ali}
-lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_lats
-dir=exp/chain${nnet3_affix}/cnn${affix}
-train_data_dir=data/${train_set}
-tree_dir=exp/chain${nnet3_affix}/tree
-
-# the 'lang' directory is created by this script.
-# If you create such a directory with a non-standard topology
-# you should probably name it differently.
-lang=data/lang_chain
-
-for f in $train_data_dir/feats.scp \
-    $train_data_dir/feats.scp $gmm_dir/final.mdl \
-    $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
-  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
-done
-
-
-if [ $stage -le 1 ]; then
-  echo "$0: creating lang directory $lang with chain-type topology"
-  # Create a version of the lang/ directory that has one state per phone in the
-  # topo file. [note, it really has two states.. the first one is only repeated
-  # once, the second one has zero or more repeats.]
-  if [ -d $lang ]; then
-    if [ $lang/L.fst -nt data/lang/L.fst ]; then
-      echo "$0: $lang already exists, not overwriting it; continuing"
-    else
-      echo "$0: $lang already exists and seems to be older than data/lang..."
-      echo " ... not sure what to do.  Exiting."
-      exit 1;
-    fi
-  else
-    cp -r data/lang $lang
-    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
-    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
-    # Use our special topology... note that later on may have to tune this
-    # topology.
-    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
-  fi
-fi
-
-if [ $stage -le 2 ]; then
-  # Get the alignments as lattices (gives the chain training more freedom).
-  # use the same num-jobs as the alignments
-  steps/align_fmllr_lats.sh --nj $nj --cmd "$cmd" ${train_data_dir} \
-    data/lang $gmm_dir $lat_dir
-  rm $lat_dir/fsts.*.gz # save space
-fi
-
-if [ $stage -le 3 ]; then
-  # Build a tree using our new topology.  We know we have alignments for the
-  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
-  # those.  The num-leaves is always somewhat less than the num-leaves from
-  # the GMM baseline.
-   if [ -f $tree_dir/final.mdl ]; then
-     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
-     exit 1;
-  fi
-  steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor $frame_subsampling_factor \
-    --context-opts "--context-width=2 --central-position=1" \
-    --cmd "$cmd" $num_leaves ${train_data_dir} \
-    $lang $ali_dir $tree_dir
-fi
-
-
-if [ $stage -le 4 ]; then
-  mkdir -p $dir
-  echo "$0: creating neural net configs using the xconfig parser";
-
-  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
-  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
-  common1="height-offsets=-2,-1,0,1,2 num-filters-out=36"
-  common2="height-offsets=-2,-1,0,1,2 num-filters-out=70"
-  mkdir -p $dir/configs
-  cat <<EOF > $dir/configs/network.xconfig
-  input dim=40 name=input
-
-  conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
-  conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
-  conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
-  conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
-  relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim
-  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim
-  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim
-  relu-batchnorm-layer name=tdnn4 input=Append(-4,0,4) dim=$tdnn_dim
-
-  ## adding the layers for chain branch
-  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5
-  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
-
-  # adding the layers for xent branch
-  # This block prints the configs for a separate output that will be
-  # trained with a cross-entropy objective in the 'chain' mod?els... this
-  # has the effect of regularizing the hidden parts of the model.  we use
-  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
-  # 0.5 / args.xent_regularize is suitable as it means the xent
-  # final-layer learns at a rate independent of the regularization
-  # constant; and the 0.5 was tuned so as to make the relative progress
-  # similar in the xent and regular final layers.
-  relu-batchnorm-layer name=prefinal-xent input=tdnn4 dim=$tdnn_dim target-rms=0.5
-  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
-EOF
-  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
-fi
-
-
-if [ $stage -le 5 ]; then
-  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
-    utils/create_split_dir.pl \
-     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
-  fi
-
-  steps/nnet3/chain/train.py --stage=$train_stage \
-    --cmd="$cmd" \
-    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
-    --chain.xent-regularize $xent_regularize \
-    --chain.leaky-hmm-coefficient=0.1 \
-    --chain.l2-regularize=0.00005 \
-    --chain.apply-deriv-weights=true \
-    --chain.lm-opts="--num-extra-lm-states=500" \
-    --chain.frame-subsampling-factor=$frame_subsampling_factor \
-    --chain.alignment-subsampling-factor=$frame_subsampling_factor \
-    --trainer.srand=$srand \
-    --trainer.max-param-change=2.0 \
-    --trainer.num-epochs=4 \
-    --trainer.frames-per-iter=1000000 \
-    --trainer.optimization.num-jobs-initial=2 \
-    --trainer.optimization.num-jobs-final=4 \
-    --trainer.optimization.initial-effective-lrate=0.001 \
-    --trainer.optimization.final-effective-lrate=0.0001 \
-    --trainer.optimization.shrink-value=1.0 \
-    --trainer.num-chunk-per-minibatch=64,32 \
-    --trainer.optimization.momentum=0.0 \
-    --egs.chunk-width=$chunk_width \
-    --egs.chunk-left-context=$chunk_left_context \
-    --egs.chunk-right-context=$chunk_right_context \
-    --egs.chunk-left-context-initial=0 \
-    --egs.chunk-right-context-final=0 \
-    --egs.dir="$common_egs_dir" \
-    --egs.opts="--frames-overlap-per-eg 0" \
-    --cleanup.remove-egs=$remove_egs \
-    --use-gpu=true \
-    --reporting.email="$reporting_email" \
-    --feat-dir=$train_data_dir \
-    --tree-dir=$tree_dir \
-    --lat-dir=$lat_dir \
-    --dir=$dir  || exit 1;
-fi
-
-if [ $stage -le 6 ]; then
-  # The reason we are using data/lang here, instead of $lang, is just to
-  # emphasize that it's not actually important to give mkgraph.sh the
-  # lang directory with the matched topology (since it gets the
-  # topology file from the model).  So you could give it a different
-  # lang directory, one that contained a wordlist and LM of your choice,
-  # as long as phones.txt was compatible.
-
-  utils/mkgraph.sh \
-    --self-loop-scale 1.0 $lang_decode \
-    $dir $dir/graph || exit 1;
-fi
-
-if [ $stage -le 7 ]; then
-  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
-    --frames-per-chunk $frames_per_chunk \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
-
-  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/test $dir/decode_test{,_rescored} || exit 1
-fi
-
-echo "Done. Date: $(date). Results:"
-local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1a.sh b/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1a.sh
deleted file mode 100755
index 07bdac88468..00000000000
--- a/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1a.sh
+++ /dev/null
@@ -1,250 +0,0 @@
-#!/bin/bash
-
-# ./local/chain/compare_wer.sh exp/chain/cnn_chainali_1a/
-# System                      cnn_chainali_1a
-# WER                             10.48
-# WER (rescored)                  10.23
-# CER                              4.82
-# CER (rescored)                   4.69
-# Final train prob              -0.0444
-# Final valid prob              -0.0645
-# Final train prob (xent)       -0.4523
-# Final valid prob (xent)       -0.5350
-# Parameters                      5.65M
-
-set -e -o pipefail
-
-stage=0
-
-nj=30
-train_set=train
-gmm=tri3        # this is the source gmm-dir that we'll use for alignments; it
-                # should have alignments for the specified training data.
-nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
-affix=_1a  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
-ali=tri3_ali
-chain_model_dir=exp/chain${nnet3_affix}/cnn_1a
-common_egs_dir=
-reporting_email=
-
-# chain options
-train_stage=-10
-xent_regularize=0.1
-frame_subsampling_factor=4
-# training chunk-options
-chunk_width=340,300,200,100
-num_leaves=500
-# we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
-tdnn_dim=450
-# training options
-srand=0
-remove_egs=false
-lang_test=lang
-lang_decode=data/lang
-lang_rescore=data/lang_rescore_6g
-# End configuration section.
-echo "$0 $@"  # Print the command line for logging
-
-
-. ./cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh
-
-
-if ! cuda-compiled; then
-  cat <<EOF && exit 1
-This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
-If you want to use GPUs (and have them), go to src/, and configure and make on a machine
-where "nvcc" is installed.
-EOF
-fi
-
-gmm_dir=exp/${gmm}
-ali_dir=exp/${ali}
-lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_lats_chain
-gmm_lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_lats
-dir=exp/chain${nnet3_affix}/cnn_chainali${affix}
-train_data_dir=data/${train_set}
-tree_dir=exp/chain${nnet3_affix}/tree_chain
-
-# the 'lang' directory is created by this script.
-# If you create such a directory with a non-standard topology
-# you should probably name it differently.
-lang=data/lang_chain
-for f in $train_data_dir/feats.scp \
-    $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
-  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
-done
-
-
-if [ $stage -le 1 ]; then
-  echo "$0: creating lang directory $lang with chain-type topology"
-  # Create a version of the lang/ directory that has one state per phone in the
-  # topo file. [note, it really has two states.. the first one is only repeated
-  # once, the second one has zero or more repeats.]
-  if [ -d $lang ]; then
-    if [ $lang/L.fst -nt data/lang/L.fst ]; then
-      echo "$0: $lang already exists, not overwriting it; continuing"
-    else
-      echo "$0: $lang already exists and seems to be older than data/lang..."
-      echo " ... not sure what to do.  Exiting."
-      exit 1;
-    fi
-  else
-    cp -r data/lang $lang
-    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
-    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
-    # Use our special topology... note that later on may have to tune this
-    # topology.
-    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
-  fi
-fi
-
-if [ $stage -le 2 ]; then
-  # Get the alignments as lattices (gives the chain training more freedom).
-  # use the same num-jobs as the alignments
-  steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
-                            --acoustic-scale 1.0 \
-                            --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
-                            ${train_data_dir} data/lang $chain_model_dir $lat_dir
-  cp $gmm_lat_dir/splice_opts $lat_dir/splice_opts
-fi
-
-if [ $stage -le 3 ]; then
-  # Build a tree using our new topology.  We know we have alignments for the
-  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
-  # those.  The num-leaves is always somewhat less than the num-leaves from
-  # the GMM baseline.
-   if [ -f $tree_dir/final.mdl ]; then
-     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
-     exit 1;
-  fi
-  steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor $frame_subsampling_factor \
-    --context-opts "--context-width=2 --central-position=1" \
-    --cmd "$cmd" $num_leaves ${train_data_dir} \
-    $lang $ali_dir $tree_dir
-fi
-
-
-if [ $stage -le 4 ]; then
-  mkdir -p $dir
-  echo "$0: creating neural net configs using the xconfig parser";
-
-  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
-  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
-  cnn_opts="l2-regularize=0.075"
-  tdnn_opts="l2-regularize=0.075"
-  output_opts="l2-regularize=0.1"
-  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
-  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
-  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
-  mkdir -p $dir/configs
-  cat <<EOF > $dir/configs/network.xconfig
-  input dim=40 name=input
-
-  conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
-  conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
-  conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
-  conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
-  conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
-  conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-1,0,1 $common3
-  conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-1,0,1 $common3
-  relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim $tdnn_opts
-  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
-  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
-
-  ## adding the layers for chain branch
-  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
-  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
-
-  # adding the layers for xent branch
-  # This block prints the configs for a separate output that will be
-  # trained with a cross-entropy objective in the 'chain' mod?els... this
-  # has the effect of regularizing the hidden parts of the model.  we use
-  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
-  # 0.5 / args.xent_regularize is suitable as it means the xent
-  # final-layer learns at a rate independent of the regularization
-  # constant; and the 0.5 was tuned so as to make the relative progress
-  # similar in the xent and regular final layers.
-  relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 $tdnn_opts
-  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $output_opts
-EOF
-  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
-fi
-
-
-if [ $stage -le 5 ]; then
-  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
-    utils/create_split_dir.pl \
-     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
-  fi
-
-  steps/nnet3/chain/train.py --stage=$train_stage \
-    --cmd="$cmd" \
-    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
-    --chain.xent-regularize $xent_regularize \
-    --chain.leaky-hmm-coefficient=0.1 \
-    --chain.l2-regularize=0.00005 \
-    --chain.apply-deriv-weights=false \
-    --chain.lm-opts="--num-extra-lm-states=500" \
-    --chain.frame-subsampling-factor=$frame_subsampling_factor \
-    --chain.alignment-subsampling-factor=1 \
-    --chain.left-tolerance 3 \
-    --chain.right-tolerance 3 \
-    --trainer.srand=$srand \
-    --trainer.max-param-change=2.0 \
-    --trainer.num-epochs=4 \
-    --trainer.frames-per-iter=1000000 \
-    --trainer.optimization.num-jobs-initial=2 \
-    --trainer.optimization.num-jobs-final=4 \
-    --trainer.optimization.initial-effective-lrate=0.001 \
-    --trainer.optimization.final-effective-lrate=0.0001 \
-    --trainer.optimization.shrink-value=1.0 \
-    --trainer.num-chunk-per-minibatch=64,32 \
-    --trainer.optimization.momentum=0.0 \
-    --egs.chunk-width=$chunk_width \
-    --egs.chunk-left-context=$chunk_left_context \
-    --egs.chunk-right-context=$chunk_right_context \
-    --egs.chunk-left-context-initial=0 \
-    --egs.chunk-right-context-final=0 \
-    --egs.dir="$common_egs_dir" \
-    --egs.opts="--frames-overlap-per-eg 0" \
-    --cleanup.remove-egs=$remove_egs \
-    --use-gpu=true \
-    --reporting.email="$reporting_email" \
-    --feat-dir=$train_data_dir \
-    --tree-dir=$tree_dir \
-    --lat-dir=$lat_dir \
-    --dir=$dir  || exit 1;
-fi
-
-if [ $stage -le 6 ]; then
-  # The reason we are using data/lang here, instead of $lang, is just to
-  # emphasize that it's not actually important to give mkgraph.sh the
-  # lang directory with the matched topology (since it gets the
-  # topology file from the model).  So you could give it a different
-  # lang directory, one that contained a wordlist and LM of your choice,
-  # as long as phones.txt was compatible.
-
-  utils/mkgraph.sh \
-    --self-loop-scale 1.0 $lang_decode \
-    $dir $dir/graph || exit 1;
-fi
-
-if [ $stage -le 7 ]; then
-  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
-    --frames-per-chunk $frames_per_chunk \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
-
-  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/test $dir/decode_test{,_rescored} || exit 1
-fi
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh b/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh
deleted file mode 100755
index 105b8f50854..00000000000
--- a/egs/iam/v2/local/chain/tuning/run_cnn_chainali_1b.sh
+++ /dev/null
@@ -1,257 +0,0 @@
-#!/bin/bash
-
-# ./local/chain/compare_wer.sh exp/chain/cnn_1a/ exp/chain/cnn_chainali_1b
-# System                         cnn_1a cnn_chainali_1b
-# WER                             17.05      9.45
-# WER (rescored)                  16.70      9.01
-# CER                              9.75      4.43
-# CER (rescored)                   9.61      4.28
-# Final train prob              -0.0358   -0.0522
-# Final valid prob              -0.0709   -0.0702
-# Final train prob (xent)       -0.4013   -0.4992
-# Final valid prob (xent)       -0.6841   -0.5658
-# Parameters                      4.39M     5.13M
-
-# steps/info/chain_dir_info.pl exp/chain/cnn_chainali_1b
-# exp/chain/cnn_chainali_1b/: num-iters=36 nj=3..5 num-params=5.1M dim=40->400 combine=-0.054->-0.054 (over 1) xent:train/valid[23,35,final]=(-0.769,-0.524,-0.499/-0.773,-0.584,-0.566) logprob:train/valid[23,35,final]=(-0.092,-0.056,-0.052/-0.107,-0.076,-0.070)
-
-set -e -o pipefail
-
-stage=0
-
-nj=30
-train_set=train
-gmm=tri3        # this is the source gmm-dir that we'll use for alignments; it
-                # should have alignments for the specified training data.
-nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
-affix=_1b  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
-ali=tri3_ali
-chain_model_dir=exp/chain${nnet3_affix}/cnn_1a
-common_egs_dir=
-reporting_email=
-
-# chain options
-train_stage=-10
-xent_regularize=0.1
-frame_subsampling_factor=4
-# training chunk-options
-chunk_width=340,300,200,100
-num_leaves=500
-# we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
-tdnn_dim=550
-# training options
-srand=0
-remove_egs=true
-lang_decode=data/lang
-lang_rescore=data/lang_rescore_6g
-
-dropout_schedule='0,0@0.20,0.2@0.50,0'
-# End configuration section.
-echo "$0 $@"  # Print the command line for logging
-
-
-. ./cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh
-
-
-if ! cuda-compiled; then
-  cat <<EOF && exit 1
-This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
-If you want to use GPUs (and have them), go to src/, and configure and make on a machine
-where "nvcc" is installed.
-EOF
-fi
-
-gmm_dir=exp/${gmm}
-ali_dir=exp/${ali}
-lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_lats_chain
-gmm_lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_lats
-dir=exp/chain${nnet3_affix}/cnn_chainali${affix}
-train_data_dir=data/${train_set}
-tree_dir=exp/chain${nnet3_affix}/tree_chain
-
-# the 'lang' directory is created by this script.
-# If you create such a directory with a non-standard topology
-# you should probably name it differently.
-lang=data/lang_chain
-for f in $train_data_dir/feats.scp \
-    $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
-  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
-done
-
-
-if [ $stage -le 1 ]; then
-  echo "$0: creating lang directory $lang with chain-type topology"
-  # Create a version of the lang/ directory that has one state per phone in the
-  # topo file. [note, it really has two states.. the first one is only repeated
-  # once, the second one has zero or more repeats.]
-  if [ -d $lang ]; then
-    if [ $lang/L.fst -nt data/lang/L.fst ]; then
-      echo "$0: $lang already exists, not overwriting it; continuing"
-    else
-      echo "$0: $lang already exists and seems to be older than data/lang..."
-      echo " ... not sure what to do.  Exiting."
-      exit 1;
-    fi
-  else
-    cp -r data/lang $lang
-    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
-    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
-    # Use our special topology... note that later on may have to tune this
-    # topology.
-    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
-  fi
-fi
-
-if [ $stage -le 2 ]; then
-  # Get the alignments as lattices (gives the chain training more freedom).
-  # use the same num-jobs as the alignments
-  steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
-                            --acoustic-scale 1.0 \
-                            --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
-                            ${train_data_dir} data/lang $chain_model_dir $lat_dir
-  cp $gmm_lat_dir/splice_opts $lat_dir/splice_opts
-fi
-
-if [ $stage -le 3 ]; then
-  # Build a tree using our new topology.  We know we have alignments for the
-  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
-  # those.  The num-leaves is always somewhat less than the num-leaves from
-  # the GMM baseline.
-  if [ -f $tree_dir/final.mdl ]; then
-    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
-    exit 1;
-  fi
-
-  steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor $frame_subsampling_factor \
-    --context-opts "--context-width=2 --central-position=1" \
-    --cmd "$cmd" $num_leaves ${train_data_dir} \
-    $lang $ali_dir $tree_dir
-fi
-
-
-if [ $stage -le 4 ]; then
-  mkdir -p $dir
-  echo "$0: creating neural net configs using the xconfig parser";
-
-  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
-  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
-  cnn_opts="l2-regularize=0.03 dropout-proportion=0.0"
-  tdnn_opts="l2-regularize=0.03"
-  output_opts="l2-regularize=0.04"
-  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
-  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
-  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
-  mkdir -p $dir/configs
-  cat <<EOF > $dir/configs/network.xconfig
-  input dim=40 name=input
-
-  conv-relu-batchnorm-dropout-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
-  conv-relu-batchnorm-dropout-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
-  conv-relu-batchnorm-dropout-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
-  conv-relu-batchnorm-dropout-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
-  conv-relu-batchnorm-dropout-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common3 height-subsample-out=2
-  relu-batchnorm-dropout-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
-  relu-batchnorm-dropout-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
-  relu-batchnorm-dropout-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
-
-  ## adding the layers for chain branch
-  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
-  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
-
-  # adding the layers for xent branch
-  # This block prints the configs for a separate output that will be
-  # trained with a cross-entropy objective in the 'chain' mod?els... this
-  # has the effect of regularizing the hidden parts of the model.  we use
-  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
-  # 0.5 / args.xent_regularize is suitable as it means the xent
-  # final-layer learns at a rate independent of the regularization
-  # constant; and the 0.5 was tuned so as to make the relative progress
-  # similar in the xent and regular final layers.
-  relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 $tdnn_opts
-  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $output_opts
-EOF
-  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
-fi
-
-
-if [ $stage -le 5 ]; then
-  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
-    utils/create_split_dir.pl \
-     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
-  fi
-
-  steps/nnet3/chain/train.py --stage=$train_stage \
-    --cmd="$cmd" \
-    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
-    --chain.xent-regularize $xent_regularize \
-    --chain.leaky-hmm-coefficient=0.1 \
-    --chain.l2-regularize=0.00005 \
-    --chain.apply-deriv-weights=true \
-    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
-    --chain.frame-subsampling-factor=$frame_subsampling_factor \
-    --chain.alignment-subsampling-factor=1 \
-    --chain.left-tolerance 3 \
-    --chain.right-tolerance 3 \
-    --trainer.srand=$srand \
-    --trainer.max-param-change=2.0 \
-    --trainer.num-epochs=6 \
-    --trainer.frames-per-iter=1500000 \
-    --trainer.optimization.num-jobs-initial=3 \
-    --trainer.optimization.num-jobs-final=5 \
-    --trainer.dropout-schedule $dropout_schedule \
-    --trainer.optimization.initial-effective-lrate=0.001 \
-    --trainer.optimization.final-effective-lrate=0.0001 \
-    --trainer.optimization.shrink-value=1.0 \
-    --trainer.num-chunk-per-minibatch=32,16 \
-    --trainer.optimization.momentum=0.0 \
-    --egs.chunk-width=$chunk_width \
-    --egs.chunk-left-context=$chunk_left_context \
-    --egs.chunk-right-context=$chunk_right_context \
-    --egs.chunk-left-context-initial=0 \
-    --egs.chunk-right-context-final=0 \
-    --egs.dir="$common_egs_dir" \
-    --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
-    --cleanup.remove-egs=$remove_egs \
-    --use-gpu=true \
-    --reporting.email="$reporting_email" \
-    --feat-dir=$train_data_dir \
-    --tree-dir=$tree_dir \
-    --lat-dir=$lat_dir \
-    --dir=$dir  || exit 1;
-fi
-
-if [ $stage -le 6 ]; then
-  # The reason we are using data/lang here, instead of $lang, is just to
-  # emphasize that it's not actually important to give mkgraph.sh the
-  # lang directory with the matched topology (since it gets the
-  # topology file from the model).  So you could give it a different
-  # lang directory, one that contained a wordlist and LM of your choice,
-  # as long as phones.txt was compatible.
-
-  utils/mkgraph.sh \
-    --self-loop-scale 1.0 $lang_decode \
-    $dir $dir/graph || exit 1;
-fi
-
-if [ $stage -le 7 ]; then
-  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
-    --frames-per-chunk $frames_per_chunk \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
-
-  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/test $dir/decode_test{,_rescored} || exit 1
-fi
-
-echo "Done. Date: $(date). Results:"
-local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
index 9771245c683..3ed5dd745e5 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
@@ -24,7 +24,7 @@ nj=30
 train_set=train
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
 affix=_1d  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
-e2echain_model_dir=exp/chain/e2e_cnn_1a
+e2echain_model_dir=exp/chain/e2e_cnn_1b
 common_egs_dir=
 reporting_email=
 
diff --git a/egs/iam/v2/run.sh b/egs/iam/v2/run.sh
deleted file mode 100755
index 41c6fdc1aec..00000000000
--- a/egs/iam/v2/run.sh
+++ /dev/null
@@ -1,200 +0,0 @@
-#!/bin/bash
-
-# Copyright      2017  Chun Chieh Chang
-#                2017  Ashish Arora
-#                2017  Hossein Hadian
-
-set -e
-stage=0
-nj=70
-decode_gmm=false
-username=
-password=
-process_aachen_split=false
-# iam_database points to the database path on the JHU grid. If you have not
-# already downloaded the database you can set it to a local directory
-# like "data/download" and follow the instructions
-# in "local/prepare_data.sh" to download the database:
-iam_database=/export/corpora5/handwriting_ocr/IAM
-# wellington_database points to the database path on the JHU grid. The Wellington
-# corpus contains two directories WWC and WSC (Wellington Written and Spoken Corpus).
-# This corpus is of written NZ English that can be purchased here:
-# "https://www.victoria.ac.nz/lals/resources/corpora-default"
-wellington_database=/export/corpora5/Wellington/WWC/
-
-. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
-           ## This relates to the queue.
-. ./path.sh
-. ./utils/parse_options.sh  # e.g. this parses the above options
-                            # if supplied.
-
-./local/check_tools.sh
-
-if [ $stage -le 0 ]; then
-  if [ -f data/train/text ] && ! $overwrite; then
-    echo "Not processing, probably script have run from wrong stage"
-    echo "Exiting with status 1 to avoid data corruption"
-    exit 1;
-  fi
-fi
-
-if [ $stage -le 1 ]; then
-  echo "$0: Preparing data..."
-  local/prepare_data.sh --download-dir "$iam_database" \
-    --wellington-dir "$wellington_database" \
-    --username "$username" --password "$password" \
-    --process_aachen_split $process_aachen_split
-fi
-mkdir -p data/{train,test}/data
-
-if [ $stage -le 2 ]; then
-  echo "$(date) stage 2: getting allowed image widths for e2e training..."
-  image/get_image2num_frames.py --feat-dim 40 data/train # This will be needed for the next command
-  # The next command creates a "allowed_lengths.txt" file in data/train
-  # which will be used by local/make_features.py to enforce the images to
-  # have allowed lengths. The allowed lengths will be spaced by 10% difference in length.
-  image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
-  for set in train test; do
-    echo "$(date) Extracting features, creating feats.scp file"
-    local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set}
-    steps/compute_cmvn_stats.sh data/${set} || exit 1;
-  done
-  image/fix_data_dir.sh data/train
-fi
-
-if [ $stage -le 3 ]; then
-  for set in train; do
-    echo "$(date) stage 3: Performing augmentation, it will double training data"
-    local/augment_data.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set} data/${set}_aug data
-    steps/compute_cmvn_stats.sh data/${set}_aug || exit 1;
-  done
-fi
-
-if [ $stage -le 4 ]; then
-  echo "$0: Preparing BPE..."
-  # getting non-silence phones.
-  cut -d' ' -f2- data/train/text | \
-python3 <(
-cat << "END"
-import os, sys, io;
-infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8');
-output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8');
-phone_dict = dict();
-for line in infile:
-    line_vect = line.strip().split();
-    for word in line_vect:
-        for phone in word:
-            phone_dict[phone] = phone;
-for phone in phone_dict.keys():
-      output.write(phone+ '\n');
-END
-   ) > data/local/phones.txt
-
-  cut -d' ' -f2- data/train/text > data/local/train_data.txt
-  cat data/local/phones.txt data/local/train_data.txt | \
-    local/prepend_words.py | \
-    utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt
-  for set in test train val train_aug; do
-    cut -d' ' -f1 data/$set/text > data/$set/ids
-    cut -d' ' -f2- data/$set/text | \
-      local/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
-      | sed 's/@@//g' > data/$set/bpe_text
-    mv data/$set/text data/$set/text.old
-    paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text
-  done
-fi
-
-if [ $stage -le 5 ]; then
-  echo "$0: Estimating a language model for decoding..."
-  local/train_lm.sh
-fi
-
-if [ $stage -le 6 ]; then
-  echo "$0: Preparing dictionary and lang..."
-  local/prepare_dict.sh
-  # This recipe uses byte-pair encoding, the silences are part of the words' pronunciations.
-  # So we set --sil-prob to 0.0
-  utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \
-                        data/local/dict "<sil>" data/lang/temp data/lang
-  silphonelist=`cat data/lang/phones/silence.csl`
-  nonsilphonelist=`cat data/lang/phones/nonsilence.csl`
-  local/gen_topo.py 8 4 4 $nonsilphonelist $silphonelist data/lang/phones.txt >data/lang/topo
-  utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang
-
-  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_big.arpa.gz \
-                     data/local/dict/lexicon.txt data/lang
-  utils/build_const_arpa_lm.sh data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \
-                               data/lang data/lang_rescore_6g
-fi
-
-if [ $stage -le 7 ]; then
-  steps/train_mono.sh --nj $nj --cmd $cmd --totgauss 10000 data/train_aug \
-    data/lang exp/mono
-fi
-
-if [ $stage -le 8 ] && $decode_gmm; then
-  utils/mkgraph.sh --mono data/lang_test exp/mono exp/mono/graph
-
-  steps/decode.sh --nj $nj --cmd $cmd exp/mono/graph data/test \
-    exp/mono/decode_test
-fi
-
-if [ $stage -le 9 ]; then
-  steps/align_si.sh --nj $nj --cmd $cmd data/train_aug data/lang \
-    exp/mono exp/mono_ali
-
-  steps/train_deltas.sh --cmd $cmd 500 20000 data/train_aug data/lang \
-    exp/mono_ali exp/tri
-fi
-
-if [ $stage -le 10 ] && $decode_gmm; then
-  utils/mkgraph.sh data/lang exp/tri exp/tri/graph
-
-  steps/decode.sh --nj $nj --cmd $cmd exp/tri/graph data/test \
-    exp/tri/decode_test
-fi
-
-if [ $stage -le 11 ]; then
-  steps/align_si.sh --nj $nj --cmd $cmd data/train_aug data/lang \
-    exp/tri exp/tri_ali
-
-  steps/train_lda_mllt.sh --cmd $cmd \
-    --splice-opts "--left-context=3 --right-context=3" 500 20000 \
-    data/train_aug data/lang exp/tri_ali exp/tri2
-fi
-
-if [ $stage -le 12 ] && $decode_gmm; then
-  utils/mkgraph.sh data/lang exp/tri2 exp/tri2/graph
-
-  steps/decode.sh --nj $nj --cmd $cmd exp/tri2/graph \
-    data/test exp/tri2/decode_test
-fi
-
-if [ $stage -le 13 ]; then
-  steps/align_fmllr.sh --nj $nj --cmd $cmd --use-graphs true \
-    data/train_aug data/lang exp/tri2 exp/tri2_ali
-
-  steps/train_sat.sh --cmd $cmd 500 20000 \
-    data/train_aug data/lang exp/tri2_ali exp/tri3
-fi
-
-if [ $stage -le 14 ] && $decode_gmm; then
-  utils/mkgraph.sh data/lang exp/tri3 exp/tri3/graph
-
-  steps/decode_fmllr.sh --nj $nj --cmd $cmd exp/tri3/graph \
-    data/test exp/tri3/decode_test
-fi
-
-if [ $stage -le 15 ]; then
-  steps/align_fmllr.sh --nj $nj --cmd $cmd --use-graphs true \
-    data/train_aug data/lang exp/tri3 exp/tri3_ali
-fi
-
-if [ $stage -le 16 ]; then
-  local/chain/run_cnn.sh --train_set train_aug
-fi
-
-if [ $stage -le 17 ]; then
-  local/chain/run_cnn_chainali.sh --train_set train_aug \
-    --chain-model-dir exp/chain/cnn_1a --stage 2
-fi
diff --git a/egs/iam/v2/run_end2end.sh b/egs/iam/v2/run_end2end.sh
index 6ecca67bb9d..cfc4653f24b 100755
--- a/egs/iam/v2/run_end2end.sh
+++ b/egs/iam/v2/run_end2end.sh
@@ -7,6 +7,7 @@ nj=20
 username=
 password=
 process_aachen_split=false
+overwrite=false
 # iam_database points to the database path on the JHU grid. If you have not
 # already downloaded the database you can set it to a local directory
 # like "data/download" and follow the instructions
@@ -134,7 +135,7 @@ if [ $stage -le 8 ]; then
   steps/nnet3/align.sh --nj 50 --cmd "$cmd" \
                        --use-gpu false \
                        --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
-                       data/train_aug data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
+                       data/train_aug data/lang exp/chain/e2e_cnn_1b exp/chain/e2e_ali_train
 fi
 
 if [ $stage -le 9 ]; then

From d7d5c22b63c6c4fe5a964bfb1768f9dba61f5c0d Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Thu, 6 Sep 2018 02:12:24 -0400
Subject: [PATCH 21/37] updating parameter and result

---
 .../local/chain/tuning/run_cnn_e2eali_1d.sh   | 27 +++++++++----------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
index 3ed5dd745e5..bf70991ec2a 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
@@ -1,21 +1,20 @@
 #!/bin/bash
 
 # This script does end2end chain training (i.e. from scratch)
-# ./local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/ exp/chain/cnn_e2eali_1d
-# System                      e2e_cnn_1a cnn_e2eali_1d
-# WER                             13.59      9.45
-# WER (rescored)                  13.27      9.28
-# CER                              6.92      4.41
-# CER (rescored)                   6.71      4.31
-# Final train prob               0.0345   -0.0451
-# Final valid prob               0.0269   -0.0684
-# Final train prob (xent)                 -0.4241
-# Final valid prob (xent)                 -0.5068
-# Parameters                      9.52M     5.13M
+# ./local/chain/compare_wer.sh exp/chain/e2e_cnn_1b/ exp/chain/cnn_e2eali_1d
+# System                      e2e_cnn_1b cnn_e2eali_1d
+# WER                             13.91      9.59
+# WER (rescored)                  13.64      9.09
+# CER                              7.08      4.49
+# CER (rescored)                   6.82      4.35
+# Final train prob               0.0148   -0.0504
+# Final valid prob               0.0105   -0.0716
+# Final train prob (xent)                 -0.4695
+# Final valid prob (xent)                 -0.5347
+# Parameters                      9.52M     5.08M
 
 # steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1d
-# exp/chain/cnn_e2eali_1d/: num-iters=36 nj=3..5 num-params=5.1M dim=40->400 combine=-0.047->-0.047 (over 1) xent:train/valid[23,35,final]=(-0.705,-0.446,-0.424/-0.714,-0.523,-0.507) logprob:train/valid[23,35,final]=(-0.095,-0.049,-0.045/-0.110,-0.073,-0.068)
-
+# exp/chain/cnn_e2eali_1d: num-iters=24 nj=3..5 num-params=5.1M dim=40->400 combine=-0.054->-0.054 (over 1) xent:train/valid[15,23,final]=(-0.727,-0.497,-0.470/-0.734,-0.557,-0.535) logprob:train/valid[15,23,final]=(-0.093,-0.057,-0.050/-0.110,-0.078,-0.072)
 set -e -o pipefail
 
 stage=0
@@ -196,7 +195,7 @@ if [ $stage -le 5 ]; then
     --trainer.srand=$srand \
     --trainer.max-param-change=2.0 \
     --trainer.num-epochs=6 \
-    --trainer.frames-per-iter=1500000 \
+    --trainer.frames-per-iter=2000000 \
     --trainer.optimization.num-jobs-initial=3 \
     --trainer.optimization.num-jobs-final=5 \
     --trainer.dropout-schedule $dropout_schedule \

From 43e9af9601992e37f7985611c4b404115c3a13cf Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Thu, 6 Sep 2018 12:13:43 -0400
Subject: [PATCH 22/37] updating parameter and result

---
 .../local/chain/tuning/run_cnn_e2eali_1d.sh   | 27 ++++++++++---------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
index bf70991ec2a..d1930efefd7 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
@@ -1,20 +1,20 @@
 #!/bin/bash
 
 # This script does end2end chain training (i.e. from scratch)
-# ./local/chain/compare_wer.sh exp/chain/e2e_cnn_1b/ exp/chain/cnn_e2eali_1d
+# local/chain/compare_wer.sh exp/chain/e2e_cnn_1b/ exp/chain/cnn_e2eali_1d
 # System                      e2e_cnn_1b cnn_e2eali_1d
-# WER                             13.91      9.59
-# WER (rescored)                  13.64      9.09
-# CER                              7.08      4.49
-# CER (rescored)                   6.82      4.35
-# Final train prob               0.0148   -0.0504
-# Final valid prob               0.0105   -0.0716
+# WER                             13.91      9.32
+# WER (rescored)                  13.64      9.07
+# CER                              7.08      4.35
+# CER (rescored)                   6.82      4.24
+# Final train prob               0.0148   -0.0524
+# Final valid prob               0.0105   -0.0713
 # Final train prob (xent)                 -0.4695
-# Final valid prob (xent)                 -0.5347
-# Parameters                      9.52M     5.08M
+# Final valid prob (xent)                 -0.5310
+# Parameters                      9.52M     4.36M
 
 # steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1d
-# exp/chain/cnn_e2eali_1d: num-iters=24 nj=3..5 num-params=5.1M dim=40->400 combine=-0.054->-0.054 (over 1) xent:train/valid[15,23,final]=(-0.727,-0.497,-0.470/-0.734,-0.557,-0.535) logprob:train/valid[15,23,final]=(-0.093,-0.057,-0.050/-0.110,-0.078,-0.072)
+# exp/chain/cnn_e2eali_1d: num-iters=30 nj=3..5 num-params=4.4M dim=40->400 combine=-0.055->-0.055 (over 1) xent:train/valid[19,29,final]=(-0.683,-0.489,-0.469/-0.703,-0.544,-0.531) logprob:train/valid[19,29,final]=(-0.090,-0.057,-0.052/-0.107,-0.076,-0.071)
 set -e -o pipefail
 
 stage=0
@@ -150,7 +150,8 @@ if [ $stage -le 4 ]; then
   conv-relu-batchnorm-dropout-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
   conv-relu-batchnorm-dropout-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
   conv-relu-batchnorm-dropout-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common3 height-subsample-out=2
-  relu-batchnorm-dropout-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+  conv-relu-batchnorm-dropout-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
+  relu-batchnorm-dropout-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
   relu-batchnorm-dropout-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
   relu-batchnorm-dropout-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
 
@@ -194,8 +195,8 @@ if [ $stage -le 5 ]; then
     --chain.right-tolerance 3 \
     --trainer.srand=$srand \
     --trainer.max-param-change=2.0 \
-    --trainer.num-epochs=6 \
-    --trainer.frames-per-iter=2000000 \
+    --trainer.num-epochs=5 \
+    --trainer.frames-per-iter=1500000 \
     --trainer.optimization.num-jobs-initial=3 \
     --trainer.optimization.num-jobs-final=5 \
     --trainer.dropout-schedule $dropout_schedule \

From 17c506b68e9c4629b55eba3bd4cea8acbdfeedaf Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 7 Sep 2018 00:58:39 -0400
Subject: [PATCH 23/37] adding data preprocessing in test and val

---
 egs/iam/v2/local/augment_data.sh                   |  8 ++++----
 egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh |  5 +++--
 egs/iam/v2/run_end2end.sh                          | 11 +++++++----
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/egs/iam/v2/local/augment_data.sh b/egs/iam/v2/local/augment_data.sh
index 82fa5230a43..31e4a8217ca 100755
--- a/egs/iam/v2/local/augment_data.sh
+++ b/egs/iam/v2/local/augment_data.sh
@@ -17,11 +17,11 @@ echo "$0 $@"
 srcdir=$1
 outdir=$2
 datadir=$3
-
+aug_set=aug1
 mkdir -p $datadir/augmentations
-echo "copying $srcdir to $datadir/augmentations/aug1, allowed length, creating feats.scp"
+echo "copying $srcdir to $datadir/augmentations/$aug_set, allowed length, creating feats.scp"
 
-for set in aug1; do
+for set in $aug_set; do
   image/copy_data_dir.sh --spk-prefix $set- --utt-prefix $set- \
     $srcdir $datadir/augmentations/$set
   cat $srcdir/allowed_lengths.txt > $datadir/augmentations/$set/allowed_lengths.txt
@@ -30,5 +30,5 @@ for set in aug1; do
 done
 
 echo " combine original data and data from different augmentations"
-utils/combine_data.sh --extra-files images.scp $outdir $srcdir $datadir/augmentations/aug1
+utils/combine_data.sh --extra-files images.scp $outdir $srcdir $datadir/augmentations/$aug_set
 cat $srcdir/allowed_lengths.txt > $outdir/allowed_lengths.txt
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
index d1930efefd7..c1e98de68b4 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
@@ -21,6 +21,7 @@ stage=0
 
 nj=30
 train_set=train
+test_dir=data/test
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
 affix=_1d  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
 e2echain_model_dir=exp/chain/e2e_cnn_1b
@@ -243,10 +244,10 @@ if [ $stage -le 7 ]; then
     --extra-right-context-final 0 \
     --frames-per-chunk $frames_per_chunk \
     --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
+    $dir/graph $test_dir $dir/decode_test || exit 1;
 
   steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/test $dir/decode_test{,_rescored} || exit 1
+                                $test_dir $dir/decode_test{,_rescored} || exit 1
 fi
 
 echo "Done. Date: $(date). Results:"
diff --git a/egs/iam/v2/run_end2end.sh b/egs/iam/v2/run_end2end.sh
index cfc4653f24b..ed6ae935401 100755
--- a/egs/iam/v2/run_end2end.sh
+++ b/egs/iam/v2/run_end2end.sh
@@ -52,12 +52,15 @@ if [ $stage -le 2 ]; then
   # which will be used by local/make_features.py to enforce the images to
   # have allowed lengths. The allowed lengths will be spaced by 10% difference in length.
   image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
-  for set in train test; do
-    echo "$(date) Extracting features, creating feats.scp file"
-    local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set}
+  echo "$(date) Extracting features, creating feats.scp file"
+  local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/train
+  steps/compute_cmvn_stats.sh data/train || exit 1;
+  for set in val test; do
+    local/extract_features.sh --nj $nj --cmd "$cmd" --augment true \
+    --feat-dim 40 data/${set}
     steps/compute_cmvn_stats.sh data/${set} || exit 1;
   done
-  image/fix_data_dir.sh data/train
+  utils/fix_data_dir.sh data/train
 fi
 
 if [ $stage -le 3 ]; then

From d6407429bd4006db34efa7b8d032215c791f9f0c Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 7 Sep 2018 01:01:06 -0400
Subject: [PATCH 24/37] updating results

---
 egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
index c1e98de68b4..2251595bec0 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
@@ -3,10 +3,10 @@
 # This script does end2end chain training (i.e. from scratch)
 # local/chain/compare_wer.sh exp/chain/e2e_cnn_1b/ exp/chain/cnn_e2eali_1d
 # System                      e2e_cnn_1b cnn_e2eali_1d
-# WER                             13.91      9.32
-# WER (rescored)                  13.64      9.07
-# CER                              7.08      4.35
-# CER (rescored)                   6.82      4.24
+# WER                             13.91      8.80
+# WER (rescored)                  13.64      8.52
+# CER                              7.08      4.06
+# CER (rescored)                   6.82      3.98
 # Final train prob               0.0148   -0.0524
 # Final valid prob               0.0105   -0.0713
 # Final train prob (xent)                 -0.4695

From 94a80ade3373fd1e17c43011a95048b786258ca0 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 7 Sep 2018 01:17:31 -0400
Subject: [PATCH 25/37] replacing prepend words with common prepend words

---
 egs/iam/v2/local/prepend_words.py | 13 -------------
 egs/iam/v2/local/train_lm.sh      |  6 +++---
 egs/iam/v2/run_end2end.sh         |  4 ++--
 3 files changed, 5 insertions(+), 18 deletions(-)
 delete mode 100755 egs/iam/v2/local/prepend_words.py

diff --git a/egs/iam/v2/local/prepend_words.py b/egs/iam/v2/local/prepend_words.py
deleted file mode 100755
index d53eb8974bf..00000000000
--- a/egs/iam/v2/local/prepend_words.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-# This script, prepend '|' to every words in the transcript to mark
-# the beginning of the words for finding the initial-space of every word
-# after decoding.
-
-import sys, io
-
-infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
-output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
-for line in infile:
-    output.write(' '.join(["|" + word for word in line.split()]) + '\n')
diff --git a/egs/iam/v2/local/train_lm.sh b/egs/iam/v2/local/train_lm.sh
index 35eb56b1341..ff674c4de22 100755
--- a/egs/iam/v2/local/train_lm.sh
+++ b/egs/iam/v2/local/train_lm.sh
@@ -64,14 +64,14 @@ if [ $stage -le 0 ]; then
                                                > data/local/lob-train-only.txt
   fi
   cat data/local/lob-train-only.txt | \
-    local/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
+    utils/lang/bpe/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
     | sed 's/@@//g' > ${dir}/data/text/lob.txt
   cat data/local/browncorpus/brown.txt | \
-    local/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
+    utils/lang/bpe/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
     | sed 's/@@//g' > ${dir}/data/text/brown.txt
   if [ -d "data/local/wellingtoncorpus" ]; then
     cat data/local/wellingtoncorpus/Wellington_annotation_removed.txt | \
-      local/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
+      utils/lang/bpe/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
       | sed 's/@@//g' > ${dir}/data/text/wellington.txt
   fi
 
diff --git a/egs/iam/v2/run_end2end.sh b/egs/iam/v2/run_end2end.sh
index ed6ae935401..d1de8c5c6c2 100755
--- a/egs/iam/v2/run_end2end.sh
+++ b/egs/iam/v2/run_end2end.sh
@@ -93,12 +93,12 @@ END
 
   cut -d' ' -f2- data/train/text > data/local/train_data.txt
   cat data/local/phones.txt data/local/train_data.txt | \
-    local/prepend_words.py | \
+    utils/lang/bpe/prepend_words.py | \
     utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt
   for set in test train val train_aug; do
     cut -d' ' -f1 data/$set/text > data/$set/ids
     cut -d' ' -f2- data/$set/text | \
-      local/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
+      utils/lang/bpe/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
       | sed 's/@@//g' > data/$set/bpe_text
     mv data/$set/text data/$set/text.old
     paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text

From 711c3c9739a5ec2a5c0da1a4843827f9c2bb54b2 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 7 Sep 2018 03:04:57 -0400
Subject: [PATCH 26/37] updating remove_test_utterances_from_lob for aachen
 split

---
 .../local/remove_test_utterances_from_lob.py  | 30 +++++++++++--------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/egs/iam/v2/local/remove_test_utterances_from_lob.py b/egs/iam/v2/local/remove_test_utterances_from_lob.py
index 1b414ef47f6..bf3c72e1e2e 100755
--- a/egs/iam/v2/local/remove_test_utterances_from_lob.py
+++ b/egs/iam/v2/local/remove_test_utterances_from_lob.py
@@ -89,22 +89,26 @@ def read_utterances(text_file_path):
 remaining_utterances = dict()
 for line_id, line_to_find in utterance_dict.items():
     found_line = False
-    for i in range(1, (len(corpus_text_lowercase_wo_sc) - 2)):
-        # Combine 3 consecutive lines of the corpus into a single line
-        prev_words = corpus_text_lowercase_wo_sc[i - 1].strip()
-        curr_words = corpus_text_lowercase_wo_sc[i].strip()
-        next_words = corpus_text_lowercase_wo_sc[i + 1].strip()
-        new_line = prev_words + curr_words + next_words
-        transcript = ''.join(new_line)
-        if line_to_find in transcript:
-            found_line = True
-            row_to_keep[i-1] = False
-            row_to_keep[i] = False
-            row_to_keep[i+1] = False
+    # avoiding very small utterance, it causes removing
+    # complete lob text
+    if len(line_to_find) < 13:
+        remaining_utterances[line_id] = line_to_find
+    else:
+        for i in range(1, (len(corpus_text_lowercase_wo_sc) - 2)):
+            # Combine 3 consecutive lines of the corpus into a single line
+            prev_words = corpus_text_lowercase_wo_sc[i - 1].strip()
+            curr_words = corpus_text_lowercase_wo_sc[i].strip()
+            next_words = corpus_text_lowercase_wo_sc[i + 1].strip()
+            new_line = prev_words + curr_words + next_words
+            transcript = ''.join(new_line)
+            if line_to_find in transcript:
+                found_line = True
+                row_to_keep[i-1] = False
+                row_to_keep[i] = False
+                row_to_keep[i+1] = False
     if not found_line:
         remaining_utterances[line_id] = line_to_find
 
-
 for i in range(len(original_corpus_text)):
     transcript = original_corpus_text[i].strip()
     if row_to_keep[i]:

From 5f2d96066de84b4edb9161f06578e9fc694a8e2e Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 7 Sep 2018 13:28:21 -0400
Subject: [PATCH 27/37] removing data/val/text from train_lm

---
 egs/iam/v2/local/train_lm.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/egs/iam/v2/local/train_lm.sh b/egs/iam/v2/local/train_lm.sh
index ff674c4de22..cc0119eb748 100755
--- a/egs/iam/v2/local/train_lm.sh
+++ b/egs/iam/v2/local/train_lm.sh
@@ -68,7 +68,8 @@ if [ $stage -le 0 ]; then
     | sed 's/@@//g' > ${dir}/data/text/lob.txt
   cat data/local/browncorpus/brown.txt | \
     utils/lang/bpe/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
-    | sed 's/@@//g' > ${dir}/data/text/brown.txt
+    | sed 's/@@//g' > ${dir}/brown.txt
+  tail -n +5000 ${dir}/brown.txt > ${dir}/data/text/brown.txt
   if [ -d "data/local/wellingtoncorpus" ]; then
     cat data/local/wellingtoncorpus/Wellington_annotation_removed.txt | \
       utils/lang/bpe/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
@@ -78,8 +79,7 @@ if [ $stage -le 0 ]; then
   # use the validation data as the dev set.
   # Note: the name 'dev' is treated specially by pocolm, it automatically
   # becomes the dev set.
-
-  cat data/val/text | cut -d " " -f 2-  > ${dir}/data/text/dev.txt
+  head -5000 ${dir}/brown.txt > ${dir}/data/text/dev.txt
 
   # use the training data as an additional data source.
   # we can later fold the dev data into this.

From 7f2ad0ba4b4b33c6b9cd43d2a31ce7672b04d5db Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 7 Sep 2018 14:32:34 -0400
Subject: [PATCH 28/37] cosmetic fixes in unk arc decoding

---
 .../v1/local/unk_arc_post_to_transcription.py | 137 +++++++++--------
 .../v1/local/unk_arc_post_to_transcription.py | 141 ++++++++++--------
 2 files changed, 159 insertions(+), 119 deletions(-)

diff --git a/egs/iam/v1/local/unk_arc_post_to_transcription.py b/egs/iam/v1/local/unk_arc_post_to_transcription.py
index c5ad1235427..f8b69820601 100755
--- a/egs/iam/v1/local/unk_arc_post_to_transcription.py
+++ b/egs/iam/v1/local/unk_arc_post_to_transcription.py
@@ -1,88 +1,107 @@
 #!/usr/bin/env python3
 
-# Copyright     2017  Ashish Arora
+#Copyright      2017  Ashish Arora
 
+""" This module will be used by scripts for open vocabulary setup.
+ If the hypothesis transcription contains <unk>, then it will replace the 
+ <unk> with the word predicted by <unk> model by concatenating phones decoded 
+ from the unk-model. It is currently supported only for triphone setup.
+ Args:
+  phones: File name of a file that contains the phones.txt, (symbol-table for phones).
+          phone and phoneID, Eg. a 217, phoneID of 'a' is 217. 
+  words: File name of a file that contains the words.txt, (symbol-table for words). 
+         word and wordID. Eg. ACCOUNTANCY 234, wordID of 'ACCOUNTANCY' is 234.
+  unk: ID of <unk>. Eg. 231.
+  one-best-arc-post: A file in arc-post format, which is a list of timing info and posterior
+               of arcs along the one-best path from the lattice.
+               E.g. 506_m01-049-00 8 12  1 7722  282 272 288 231
+                    <utterance-id> <start-frame> <num-frames> <posterior> <word> [<ali>] 
+                    [<phone1> <phone2>...]
+  output-text: File containing hypothesis transcription with <unk> recognized by the
+               unk-model.
+               E.g. A move to stop mr. gaitskell.
+  
+  Eg. local/unk_arc_post_to_transcription.py lang/phones.txt lang/words.txt 
+      data/lang/oov.int
+"""
 import argparse
+import os
 import sys
-
 parser = argparse.ArgumentParser(description="""uses phones to convert unk to word""")
-parser.add_argument('phones', type=str, help='phones and phonesID')
-parser.add_argument('words', type=str, help='word and wordID')
-parser.add_argument('unk', type=str, default='-', help='location of unk file')
-parser.add_argument('--input-ark', type=str, default='-', help='where to read the input data')
-parser.add_argument('--out-ark', type=str, default='-', help='where to write the output data')
+parser.add_argument('phones', type=str, help='File name of a file that contains the'
+                    'symbol-table for phones. Each line must be: <phone> <phoneID>')
+parser.add_argument('words', type=str, help='File name of a file that contains the'
+                    'symbol-table for words. Each line must be: <word> <word-id>')
+parser.add_argument('unk', type=str, default='-', help='File name of a file that'
+                    'contains the ID of <unk>. The content must be: <oov-id>, e.g. 231')
+parser.add_argument('--one-best-arc-post', type=str, default='-', help='A file in arc-post'
+                    'format, which is a list of timing info and posterior of arcs'
+                    'along the one-best path from the lattice')
+parser.add_argument('--output-text', type=str, default='-', help='File containing'
+                    'hypothesis transcription with <unk> recognized by the unk-model')
 args = parser.parse_args()
 
-
 ### main ###
-phone_fh = open(args.phones, 'r', encoding='latin-1')
-word_fh = open(args.words, 'r', encoding='latin-1')
-unk_fh = open(args.unk, 'r', encoding='latin-1')
-if args.input_ark == '-':
-    input_fh = sys.stdin
+phone_handle = open(args.phones, 'r', encoding='latin-1') # Create file handles 
+word_handle = open(args.words, 'r', encoding='latin-1')
+unk_handle = open(args.unk,'r', encoding='latin-1')
+if args.one_best_arc_post == '-':
+    arc_post_handle = sys.stdin
 else:
-    input_fh = open(args.input_ark, 'r', encoding='latin-1')
-if args.out_ark == '-':
-    out_fh = sys.stdout
+    arc_post_handle = open(args.one_best_arc_post, 'r', encoding='latin-1')
+if args.output_text == '-':
+    output_text_handle = sys.stdout
 else:
-    out_fh = open(args.out_ark, 'w', encoding='latin-1')
+    output_text_handle = open(args.output_text, 'w', encoding='latin-1')
 
-phone_dict = dict()  # Stores phoneID and phone mapping
-phone_data_vect = phone_fh.read().strip().split("\n")
-for key_val in phone_data_vect:
+id2phone = dict() # Stores the mapping from phone_id (int) to phone (char)
+phones_data = phone_handle.read().strip().split("\n")
+
+for key_val in phones_data:
   key_val = key_val.split(" ")
-  phone_dict[key_val[1]] = key_val[0]
+  id2phone[key_val[1]] = key_val[0]
+
 word_dict = dict()
-word_data_vect = word_fh.read().strip().split("\n")
+word_data_vect = word_handle.read().strip().split("\n")
+
 for key_val in word_data_vect:
   key_val = key_val.split(" ")
   word_dict[key_val[1]] = key_val[0]
-unk_val = unk_fh.read().strip().split(" ")[0]
+unk_val = unk_handle.read().strip().split(" ")[0]
 
-utt_word_dict = dict()
-utt_phone_dict = dict()  # Stores utteranceID and phoneID
-unk_word_dict = dict()
-count=0
-for line in input_fh:
+utt_word_dict = dict() # Dict of list, stores mapping from utteranceID(int) to words(str)
+for line in arc_post_handle:
   line_vect = line.strip().split("\t")
-  if len(line_vect) < 6:
-    print("Bad line: '{}'   Expecting 6 fields. Skipping...".format(line),
+  if len(line_vect) < 6: # Check for 1best-arc-post output
+    print("Error: Bad line: '{}'   Expecting 6 fields. Skipping...".format(line),
           file=sys.stderr)
     continue
-  uttID = line_vect[0]
+  utt_id = line_vect[0]
   word = line_vect[4]
   phones = line_vect[5]
-  if uttID in utt_word_dict.keys():
-    utt_word_dict[uttID][count] = word
-    utt_phone_dict[uttID][count] = phones
-  else:
-    count = 0
-    utt_word_dict[uttID] = dict()
-    utt_phone_dict[uttID] = dict()
-    utt_word_dict[uttID][count] = word
-    utt_phone_dict[uttID][count] = phones
-  if word == unk_val:   # Get character sequence for unk
-    phone_key_vect = phones.split(" ")
-    phone_val_vect = list()
-    for pkey in phone_key_vect:
-      phone_val_vect.append(phone_dict[pkey])
+  if utt_id not in list(utt_word_dict.keys()):
+    utt_word_dict[utt_id] = list()
+
+  if word == unk_val: # Get the 1best phone sequence given by the unk-model
+    phone_id_seq = phones.split(" ")
+    phone_seq = list()
+    for pkey in phone_id_seq:
+      phone_seq.append(id2phone[pkey]) # Convert the phone-id sequence to a phone sequence.
     phone_2_word = list()
-    for phone_val in phone_val_vect:
-      phone_2_word.append(phone_val.split('_')[0])
-    phone_2_word = ''.join(phone_2_word)
-    utt_word_dict[uttID][count] = phone_2_word
+    for phone_val in phone_seq:
+      phone_2_word.append(phone_val.split('_')[0]) # Removing the world-position markers(e.g. _B)
+    phone_2_word = ''.join(phone_2_word) # Concatnate phone sequence
+    utt_word_dict[utt_id].append(phone_2_word) # Store word from unk-model
   else:
-    if word == '0':
+    if word == '0': # Store space/silence
       word_val = ' '
     else:
       word_val = word_dict[word]
-    utt_word_dict[uttID][count] = word_val
-  count += 1
+    utt_word_dict[utt_id].append(word_val) # Store word from 1best-arc-post
 
-transcription = ""
-for key in sorted(utt_word_dict.keys()):
-  transcription = key
-  for index in sorted(utt_word_dict[key].keys()):
-    value = utt_word_dict[key][index]
-    transcription = transcription + " " + value
-  out_fh.write(transcription + '\n')
+transcription = "" # Output transcription
+for utt_key in sorted(utt_word_dict.keys()):
+  transcription = utt_key
+  for word in utt_word_dict[utt_key]:
+    transcription = transcription + " " + word
+  output_text_handle.write(transcription + '\n')
diff --git a/egs/uw3/v1/local/unk_arc_post_to_transcription.py b/egs/uw3/v1/local/unk_arc_post_to_transcription.py
index c86d35e4b8a..f8b69820601 100755
--- a/egs/uw3/v1/local/unk_arc_post_to_transcription.py
+++ b/egs/uw3/v1/local/unk_arc_post_to_transcription.py
@@ -1,86 +1,107 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
-# Copyright     2017  Ashish Arora
+#Copyright      2017  Ashish Arora
 
+""" This module will be used by scripts for open vocabulary setup.
+ If the hypothesis transcription contains <unk>, then it will replace the 
+ <unk> with the word predicted by <unk> model by concatenating phones decoded 
+ from the unk-model. It is currently supported only for triphone setup.
+ Args:
+  phones: File name of a file that contains the phones.txt, (symbol-table for phones).
+          phone and phoneID, Eg. a 217, phoneID of 'a' is 217. 
+  words: File name of a file that contains the words.txt, (symbol-table for words). 
+         word and wordID. Eg. ACCOUNTANCY 234, wordID of 'ACCOUNTANCY' is 234.
+  unk: ID of <unk>. Eg. 231.
+  one-best-arc-post: A file in arc-post format, which is a list of timing info and posterior
+               of arcs along the one-best path from the lattice.
+               E.g. 506_m01-049-00 8 12  1 7722  282 272 288 231
+                    <utterance-id> <start-frame> <num-frames> <posterior> <word> [<ali>] 
+                    [<phone1> <phone2>...]
+  output-text: File containing hypothesis transcription with <unk> recognized by the
+               unk-model.
+               E.g. A move to stop mr. gaitskell.
+  
+  Eg. local/unk_arc_post_to_transcription.py lang/phones.txt lang/words.txt 
+      data/lang/oov.int
+"""
 import argparse
+import os
 import sys
-
 parser = argparse.ArgumentParser(description="""uses phones to convert unk to word""")
-parser.add_argument('phones', type=str, help='phones and phonesID')
-parser.add_argument('words', type=str, help='word and wordID')
-parser.add_argument('unk', type=str, default='-', help='location of unk file')
-parser.add_argument('--input-ark', type=str, default='-', help='where to read the input data')
-parser.add_argument('--out-ark', type=str, default='-', help='where to write the output data')
+parser.add_argument('phones', type=str, help='File name of a file that contains the'
+                    'symbol-table for phones. Each line must be: <phone> <phoneID>')
+parser.add_argument('words', type=str, help='File name of a file that contains the'
+                    'symbol-table for words. Each line must be: <word> <word-id>')
+parser.add_argument('unk', type=str, default='-', help='File name of a file that'
+                    'contains the ID of <unk>. The content must be: <oov-id>, e.g. 231')
+parser.add_argument('--one-best-arc-post', type=str, default='-', help='A file in arc-post'
+                    'format, which is a list of timing info and posterior of arcs'
+                    'along the one-best path from the lattice')
+parser.add_argument('--output-text', type=str, default='-', help='File containing'
+                    'hypothesis transcription with <unk> recognized by the unk-model')
 args = parser.parse_args()
+
 ### main ###
-phone_fh = open(args.phones, 'r')
-word_fh = open(args.words, 'r')
-unk_fh = open(args.unk,'r')
-if args.input_ark == '-':
-    input_fh = sys.stdin
+phone_handle = open(args.phones, 'r', encoding='latin-1') # Create file handles 
+word_handle = open(args.words, 'r', encoding='latin-1')
+unk_handle = open(args.unk,'r', encoding='latin-1')
+if args.one_best_arc_post == '-':
+    arc_post_handle = sys.stdin
 else:
-    input_fh = open(args.input_ark,'r')
-if args.out_ark == '-':
-    out_fh = sys.stdout
+    arc_post_handle = open(args.one_best_arc_post, 'r', encoding='latin-1')
+if args.output_text == '-':
+    output_text_handle = sys.stdout
 else:
-    out_fh = open(args.out_ark,'wb')
+    output_text_handle = open(args.output_text, 'w', encoding='latin-1')
 
-phone_dict = dict()# stores phoneID and phone mapping
-phone_data_vect = phone_fh.read().strip().split("\n")
-for key_val in phone_data_vect:
+id2phone = dict() # Stores the mapping from phone_id (int) to phone (char)
+phones_data = phone_handle.read().strip().split("\n")
+
+for key_val in phones_data:
   key_val = key_val.split(" ")
-  phone_dict[key_val[1]] = key_val[0]
+  id2phone[key_val[1]] = key_val[0]
+
 word_dict = dict()
-word_data_vect = word_fh.read().strip().split("\n")
+word_data_vect = word_handle.read().strip().split("\n")
+
 for key_val in word_data_vect:
   key_val = key_val.split(" ")
   word_dict[key_val[1]] = key_val[0]
-unk_val = unk_fh.read().strip().split(" ")[0]
+unk_val = unk_handle.read().strip().split(" ")[0]
 
-utt_word_dict = dict()
-utt_phone_dict = dict()# stores utteranceID and phoneID
-unk_word_dict = dict()
-count=0
-for line in input_fh:
+utt_word_dict = dict() # Dict of list, stores mapping from utteranceID(int) to words(str)
+for line in arc_post_handle:
   line_vect = line.strip().split("\t")
-  if len(line_vect) < 6:
-    print "IndexError"
-    print line_vect
+  if len(line_vect) < 6: # Check for 1best-arc-post output
+    print("Error: Bad line: '{}'   Expecting 6 fields. Skipping...".format(line),
+          file=sys.stderr)
     continue
-  uttID = line_vect[0]
+  utt_id = line_vect[0]
   word = line_vect[4]
   phones = line_vect[5]
-  if uttID in utt_word_dict.keys():
-    utt_word_dict[uttID][count] = word
-    utt_phone_dict[uttID][count] = phones
-  else:
-    count = 0
-    utt_word_dict[uttID] = dict()
-    utt_phone_dict[uttID] = dict()
-    utt_word_dict[uttID][count] = word
-    utt_phone_dict[uttID][count] = phones
-  if word == unk_val: # get character sequence for unk
-    phone_key_vect = phones.split(" ")
-    phone_val_vect = list()
-    for pkey in phone_key_vect:
-      phone_val_vect.append(phone_dict[pkey])
+  if utt_id not in list(utt_word_dict.keys()):
+    utt_word_dict[utt_id] = list()
+
+  if word == unk_val: # Get the 1best phone sequence given by the unk-model
+    phone_id_seq = phones.split(" ")
+    phone_seq = list()
+    for pkey in phone_id_seq:
+      phone_seq.append(id2phone[pkey]) # Convert the phone-id sequence to a phone sequence.
     phone_2_word = list()
-    for phone_val in phone_val_vect:
-      phone_2_word.append(phone_val.split('_')[0])
-    phone_2_word = ''.join(phone_2_word)
-    utt_word_dict[uttID][count] = phone_2_word
+    for phone_val in phone_seq:
+      phone_2_word.append(phone_val.split('_')[0]) # Removing the world-position markers(e.g. _B)
+    phone_2_word = ''.join(phone_2_word) # Concatnate phone sequence
+    utt_word_dict[utt_id].append(phone_2_word) # Store word from unk-model
   else:
-    if word == '0':
+    if word == '0': # Store space/silence
       word_val = ' '
     else:
       word_val = word_dict[word]
-    utt_word_dict[uttID][count] = word_val
-  count += 1
+    utt_word_dict[utt_id].append(word_val) # Store word from 1best-arc-post
 
-transcription = ""
-for key in sorted(utt_word_dict.iterkeys()):
-  transcription = key
-  for index in sorted(utt_word_dict[key].iterkeys()):
-    value = utt_word_dict[key][index]
-    transcription = transcription + " " + value
-  out_fh.write(transcription + '\n')
+transcription = "" # Output transcription
+for utt_key in sorted(utt_word_dict.keys()):
+  transcription = utt_key
+  for word in utt_word_dict[utt_key]:
+    transcription = transcription + " " + word
+  output_text_handle.write(transcription + '\n')

From 8f2ac25d50f7ee677987c5c8cf397435ef6e246a Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 7 Sep 2018 16:59:49 -0400
Subject: [PATCH 29/37] adding val data for decoding

---
 egs/iam/v2/local/chain/compare_wer.sh         | 30 +++++++++++++++++++
 .../local/chain/tuning/run_cnn_e2eali_1a.sh   | 19 ++++++++++++
 .../local/chain/tuning/run_cnn_e2eali_1b.sh   | 19 ++++++++++++
 .../local/chain/tuning/run_cnn_e2eali_1c.sh   | 19 ++++++++++++
 .../local/chain/tuning/run_cnn_e2eali_1d.sh   | 21 +++++++++++--
 .../v2/local/chain/tuning/run_e2e_cnn_1a.sh   | 14 ++++++++-
 .../v2/local/chain/tuning/run_e2e_cnn_1b.sh   | 14 ++++++++-
 7 files changed, 131 insertions(+), 5 deletions(-)

diff --git a/egs/iam/v2/local/chain/compare_wer.sh b/egs/iam/v2/local/chain/compare_wer.sh
index d4076457463..1488981a348 100755
--- a/egs/iam/v2/local/chain/compare_wer.sh
+++ b/egs/iam/v2/local/chain/compare_wer.sh
@@ -50,6 +50,36 @@ for x in $*; do
 done
 echo
 
+echo -n "# WER val                        "
+for x in $*; do
+  wer=$(cat $x/decode_val/scoring_kaldi/best_wer | awk '{print $2}')
+  printf "% 10s" $wer
+done
+echo
+
+echo -n "# WER (rescored) val            "
+for x in $*; do
+  wer="--"
+  [ -d $x/decode_val_rescored ] && wer=$(cat $x/decode_test_rescored/scoring_kaldi/best_wer | awk '{print $2}')
+  printf "% 10s" $wer
+done
+echo
+
+echo -n "# CER val                        "
+for x in $*; do
+  cer=$(cat $x/decode_val/scoring_kaldi/best_cer | awk '{print $2}')
+  printf "% 10s" $cer
+done
+echo
+
+echo -n "# CER (rescored) val             "
+for x in $*; do
+  cer="--"
+  [ -d $x/decode_val_rescored ] && cer=$(cat $x/decode_test_rescored/scoring_kaldi/best_cer | awk '{print $2}')
+  printf "% 10s" $cer
+done
+echo
+
 if $used_epochs; then
   exit 0;  # the diagnostics aren't comparable between regular and discriminatively trained systems.
 fi
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh
index ba28f681708..a5672417aff 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh
@@ -22,6 +22,7 @@ stage=0
 
 nj=30
 train_set=train
+decode_val=true
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
 affix=_1a  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
 e2echain_model_dir=exp/chain/e2e_cnn_1a
@@ -243,3 +244,21 @@ if [ $stage -le 7 ]; then
     --nj $nj --cmd "$cmd" \
     $dir/graph data/test $dir/decode_test || exit 1;
 fi
+
+if [ $stage -le 8 ] && $decode_val; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+    --extra-left-context $chunk_left_context \
+    --extra-right-context $chunk_right_context \
+    --extra-left-context-initial 0 \
+    --extra-right-context-final 0 \
+    --frames-per-chunk $frames_per_chunk \
+    --nj $nj --cmd "$cmd" \
+    $dir/graph data/val $dir/decode_val || exit 1;
+
+  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/val $dir/decode_val{,_rescored} || exit 1
+fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh
index 298e7053086..ea27386164d 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh
@@ -23,6 +23,7 @@ stage=0
 
 nj=30
 train_set=train
+decode_val=true
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
 affix=_1b  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
 e2echain_model_dir=exp/chain/e2e_cnn_1a
@@ -249,3 +250,21 @@ if [ $stage -le 7 ]; then
   steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
                                 data/test $dir/decode_test{,_rescored} || exit 1
 fi
+
+if [ $stage -le 8 ] && $decode_val; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+    --extra-left-context $chunk_left_context \
+    --extra-right-context $chunk_right_context \
+    --extra-left-context-initial 0 \
+    --extra-right-context-final 0 \
+    --frames-per-chunk $frames_per_chunk \
+    --nj $nj --cmd "$cmd" \
+    $dir/graph data/val $dir/decode_val || exit 1;
+
+  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/val $dir/decode_val{,_rescored} || exit 1
+fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh
index 48e0a76dead..6411a300a12 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh
@@ -25,6 +25,7 @@ stage=0
 
 nj=30
 train_set=train
+decode_val=true
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
 affix=_1c  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
 e2echain_model_dir=exp/chain/e2e_cnn_1a
@@ -251,3 +252,21 @@ if [ $stage -le 7 ]; then
   steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
                                 data/test $dir/decode_test{,_rescored} || exit 1
 fi
+
+if [ $stage -le 8 ] && $decode_val; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+    --extra-left-context $chunk_left_context \
+    --extra-right-context $chunk_right_context \
+    --extra-left-context-initial 0 \
+    --extra-right-context-final 0 \
+    --frames-per-chunk $frames_per_chunk \
+    --nj $nj --cmd "$cmd" \
+    $dir/graph data/val $dir/decode_val || exit 1;
+
+  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/val $dir/decode_val{,_rescored} || exit 1
+fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
index 2251595bec0..a7d3af59038 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
@@ -21,7 +21,7 @@ stage=0
 
 nj=30
 train_set=train
-test_dir=data/test
+decode_val=true
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
 affix=_1d  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
 e2echain_model_dir=exp/chain/e2e_cnn_1b
@@ -244,10 +244,25 @@ if [ $stage -le 7 ]; then
     --extra-right-context-final 0 \
     --frames-per-chunk $frames_per_chunk \
     --nj $nj --cmd "$cmd" \
-    $dir/graph $test_dir $dir/decode_test || exit 1;
+    $dir/graph data/test $dir/decode_test || exit 1;
 
   steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                $test_dir $dir/decode_test{,_rescored} || exit 1
+                                data/test $dir/decode_test{,_rescored} || exit 1
+fi
+
+if [ $stage -le 8 ] && $decode_val; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+    --extra-left-context $chunk_left_context \
+    --extra-right-context $chunk_right_context \
+    --extra-left-context-initial 0 \
+    --extra-right-context-final 0 \
+    --frames-per-chunk $frames_per_chunk \
+    --nj $nj --cmd "$cmd" \
+    $dir/graph data/val $dir/decode_val || exit 1;
+
+  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/val $dir/decode_val{,_rescored} || exit 1
 fi
 
 echo "Done. Date: $(date). Results:"
diff --git a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh
index d88e1a38820..078e4e2255c 100755
--- a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -25,6 +25,7 @@ stage=0
 train_stage=-10
 get_egs_stage=-10
 affix=1a
+nj=30
 
 # training options
 tdnn_dim=450
@@ -37,6 +38,7 @@ l2_regularize=0.00005
 frames_per_iter=1000000
 cmvn_opts="--norm-means=true --norm-vars=true"
 train_set=train
+decode_val=true
 lang_decode=data/lang
 lang_rescore=data/lang_rescore_6g
 
@@ -163,12 +165,22 @@ fi
 if [ $stage -le 5 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
   steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --nj 30 --cmd "$cmd" \
+    --nj $nj --cmd "$cmd" \
     $dir/graph data/test $dir/decode_test || exit 1;
 
   steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
                                 data/test $dir/decode_test{,_rescored} || exit 1
 fi
 
+if [ $stage -le 6 ] && $decode_val; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+    --nj $nj --cmd "$cmd" \
+    $dir/graph data/val $dir/decode_val || exit 1;
+
+  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/val $dir/decode_val{,_rescored} || exit 1
+fi
+
 echo "Done. Date: $(date). Results:"
 local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
index 7fb81c97ea7..db225263ef5 100755
--- a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
+++ b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
@@ -23,12 +23,14 @@ stage=0
 train_stage=-10
 get_egs_stage=-10
 affix=1b
+nj=30
 
 # training options
 tdnn_dim=450
 minibatch_size=150=100,64/300=50,32/600=25,16/1200=16,8
 common_egs_dir=
 train_set=train
+decode_val=true
 lang_decode=data/lang
 lang_rescore=data/lang_rescore_6g
 
@@ -149,12 +151,22 @@ fi
 if [ $stage -le 5 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
   steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --nj 30 --cmd "$cmd" \
+    --nj $nj --cmd "$cmd" \
     $dir/graph data/test $dir/decode_test || exit 1;
 
   steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
                                 data/test $dir/decode_test{,_rescored} || exit 1
 fi
 
+if [ $stage -le 6 ] && $decode_val; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+    --nj $nj --cmd "$cmd" \
+    $dir/graph data/val $dir/decode_val || exit 1;
+
+  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/val $dir/decode_val{,_rescored} || exit 1
+fi
+
 echo "Done. Date: $(date). Results:"
 local/chain/compare_wer.sh $dir

From b8e71b29e70bd1369705357e1d20afb3a6c02da7 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Mon, 10 Sep 2018 02:54:48 -0400
Subject: [PATCH 30/37] modification from the review

---
 egs/iam/v2/local/chain/compare_wer.sh         | 12 +++---
 .../local/chain/tuning/run_cnn_e2eali_1a.sh   | 42 ++++++++-----------
 .../local/chain/tuning/run_cnn_e2eali_1b.sh   | 40 +++++++-----------
 .../local/chain/tuning/run_cnn_e2eali_1c.sh   | 41 +++++++-----------
 .../local/chain/tuning/run_cnn_e2eali_1d.sh   | 40 +++++++-----------
 .../v2/local/chain/tuning/run_e2e_cnn_1a.sh   | 25 ++++-------
 .../v2/local/chain/tuning/run_e2e_cnn_1b.sh   | 27 ++++--------
 7 files changed, 82 insertions(+), 145 deletions(-)

diff --git a/egs/iam/v2/local/chain/compare_wer.sh b/egs/iam/v2/local/chain/compare_wer.sh
index 1488981a348..2ce14e13694 100755
--- a/egs/iam/v2/local/chain/compare_wer.sh
+++ b/egs/iam/v2/local/chain/compare_wer.sh
@@ -50,32 +50,32 @@ for x in $*; do
 done
 echo
 
-echo -n "# WER val                        "
+echo -n "# WER val                    "
 for x in $*; do
   wer=$(cat $x/decode_val/scoring_kaldi/best_wer | awk '{print $2}')
   printf "% 10s" $wer
 done
 echo
 
-echo -n "# WER (rescored) val            "
+echo -n "# WER (rescored) val         "
 for x in $*; do
   wer="--"
-  [ -d $x/decode_val_rescored ] && wer=$(cat $x/decode_test_rescored/scoring_kaldi/best_wer | awk '{print $2}')
+  [ -d $x/decode_val_rescored ] && wer=$(cat $x/decode_val_rescored/scoring_kaldi/best_wer | awk '{print $2}')
   printf "% 10s" $wer
 done
 echo
 
-echo -n "# CER val                        "
+echo -n "# CER val                    "
 for x in $*; do
   cer=$(cat $x/decode_val/scoring_kaldi/best_cer | awk '{print $2}')
   printf "% 10s" $cer
 done
 echo
 
-echo -n "# CER (rescored) val             "
+echo -n "# CER (rescored) val         "
 for x in $*; do
   cer="--"
-  [ -d $x/decode_val_rescored ] && cer=$(cat $x/decode_test_rescored/scoring_kaldi/best_cer | awk '{print $2}')
+  [ -d $x/decode_val_rescored ] && cer=$(cat $x/decode_val_rescored/scoring_kaldi/best_cer | awk '{print $2}')
   printf "% 10s" $cer
 done
 echo
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh
index a5672417aff..c39f4bfe9e3 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh
@@ -43,7 +43,9 @@ tdnn_dim=450
 # training options
 srand=0
 remove_egs=true
-lang_test=lang_unk
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
+if $decode_val; then maybe_val=val; else maybe_val= ; fi
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
@@ -229,35 +231,25 @@ if [ $stage -le 6 ]; then
   # as long as phones.txt was compatible.
 
   utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
+    --self-loop-scale 1.0 $lang_decode \
     $dir $dir/graph || exit 1;
 fi
 
 if [ $stage -le 7 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
-    --frames-per-chunk $frames_per_chunk \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
-fi
-
-if [ $stage -le 8 ] && $decode_val; then
-  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
-    --frames-per-chunk $frames_per_chunk \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/val $dir/decode_val || exit 1;
-
-  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/val $dir/decode_val{,_rescored} || exit 1
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --extra-left-context $chunk_left_context \
+      --extra-right-context $chunk_right_context \
+      --extra-left-context-initial 0 \
+      --extra-right-context-final 0 \
+      --frames-per-chunk $frames_per_chunk \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+
+    steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/$decode_set $dir/decode_$decode_set{,_rescored} || exit 1
+  done
 fi
 
 echo "Done. Date: $(date). Results:"
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh
index ea27386164d..cc4fd84a85a 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh
@@ -46,6 +46,7 @@ srand=0
 remove_egs=true
 lang_decode=data/lang
 lang_rescore=data/lang_rescore_6g
+if $decode_val; then maybe_val=val; else maybe_val= ; fi
 
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
@@ -238,32 +239,19 @@ fi
 
 if [ $stage -le 7 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
-    --frames-per-chunk $frames_per_chunk \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
-
-  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/test $dir/decode_test{,_rescored} || exit 1
-fi
-
-if [ $stage -le 8 ] && $decode_val; then
-  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
-    --frames-per-chunk $frames_per_chunk \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/val $dir/decode_val || exit 1;
-
-  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/val $dir/decode_val{,_rescored} || exit 1
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --extra-left-context $chunk_left_context \
+      --extra-right-context $chunk_right_context \
+      --extra-left-context-initial 0 \
+      --extra-right-context-final 0 \
+      --frames-per-chunk $frames_per_chunk \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+
+    steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/$decode_set $dir/decode_$decode_set{,_rescored} || exit 1
+  done
 fi
 
 echo "Done. Date: $(date). Results:"
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh
index 6411a300a12..752cb76c21b 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh
@@ -48,7 +48,7 @@ srand=0
 remove_egs=true
 lang_decode=data/lang
 lang_rescore=data/lang_rescore_6g
-
+if $decode_val; then maybe_val=val; else maybe_val= ; fi
 dropout_schedule='0,0@0.20,0.2@0.50,0'
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
@@ -240,32 +240,19 @@ fi
 
 if [ $stage -le 7 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
-    --frames-per-chunk $frames_per_chunk \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
-
-  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/test $dir/decode_test{,_rescored} || exit 1
-fi
-
-if [ $stage -le 8 ] && $decode_val; then
-  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
-    --frames-per-chunk $frames_per_chunk \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/val $dir/decode_val || exit 1;
-
-  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/val $dir/decode_val{,_rescored} || exit 1
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --extra-left-context $chunk_left_context \
+      --extra-right-context $chunk_right_context \
+      --extra-left-context-initial 0 \
+      --extra-right-context-final 0 \
+      --frames-per-chunk $frames_per_chunk \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+
+    steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/$decode_set $dir/decode_$decode_set{,_rescored} || exit 1
+  done
 fi
 
 echo "Done. Date: $(date). Results:"
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
index a7d3af59038..45712b76499 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
@@ -44,7 +44,7 @@ srand=0
 remove_egs=true
 lang_decode=data/lang
 lang_rescore=data/lang_rescore_6g
-
+if $decode_val; then maybe_val=val; else maybe_val= ; fi
 dropout_schedule='0,0@0.20,0.2@0.50,0'
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
@@ -237,33 +237,21 @@ fi
 
 if [ $stage -le 7 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
-    --frames-per-chunk $frames_per_chunk \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
-
-  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/test $dir/decode_test{,_rescored} || exit 1
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --extra-left-context $chunk_left_context \
+      --extra-right-context $chunk_right_context \
+      --extra-left-context-initial 0 \
+      --extra-right-context-final 0 \
+      --frames-per-chunk $frames_per_chunk \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+
+    steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/$decode_set $dir/decode_$decode_set{,_rescored} || exit 1
+  done
 fi
 
-if [ $stage -le 8 ] && $decode_val; then
-  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
-    --frames-per-chunk $frames_per_chunk \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/val $dir/decode_val || exit 1;
-
-  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/val $dir/decode_val{,_rescored} || exit 1
-fi
 
 echo "Done. Date: $(date). Results:"
 local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh
index 078e4e2255c..4eb3e4bdff1 100755
--- a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -163,23 +163,14 @@ if [ $stage -le 4 ]; then
 fi
 
 if [ $stage -le 5 ]; then
-  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
-
-  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/test $dir/decode_test{,_rescored} || exit 1
-fi
-
-if [ $stage -le 6 ] && $decode_val; then
-  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/val $dir/decode_val || exit 1;
-
-  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/val $dir/decode_val{,_rescored} || exit 1
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+
+    steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/$decode_set $dir/decode_$decode_set{,_rescored} || exit 1
+  done
 fi
 
 echo "Done. Date: $(date). Results:"
diff --git a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
index db225263ef5..495d5076cfc 100755
--- a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
+++ b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
@@ -33,7 +33,7 @@ train_set=train
 decode_val=true
 lang_decode=data/lang
 lang_rescore=data/lang_rescore_6g
-
+if $decode_val; then maybe_val=val; else maybe_val= ; fi
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
@@ -149,23 +149,14 @@ if [ $stage -le 4 ]; then
 fi
 
 if [ $stage -le 5 ]; then
-  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
-
-  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/test $dir/decode_test{,_rescored} || exit 1
-fi
-
-if [ $stage -le 6 ] && $decode_val; then
-  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/val $dir/decode_val || exit 1;
-
-  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/val $dir/decode_val{,_rescored} || exit 1
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+
+    steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/$decode_set $dir/decode_$decode_set{,_rescored} || exit 1
+  done
 fi
 
 echo "Done. Date: $(date). Results:"

From e9a75f6cfaffdcba4d9f372944ed3ddbf21a2747 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Mon, 10 Sep 2018 03:03:38 -0400
Subject: [PATCH 31/37] modification from review

---
 egs/iam/v2/run_end2end.sh | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/egs/iam/v2/run_end2end.sh b/egs/iam/v2/run_end2end.sh
index d1de8c5c6c2..c515c85fc72 100755
--- a/egs/iam/v2/run_end2end.sh
+++ b/egs/iam/v2/run_end2end.sh
@@ -29,14 +29,13 @@ wellington_database=/export/corpora5/Wellington/WWC/
 ./local/check_tools.sh
 
 if [ $stage -le 0 ]; then
+
   if [ -f data/train/text ] && ! $overwrite; then
-    echo "Not processing, probably script have run from wrong stage"
+    echo "$0: Not processing, probably script have run from wrong stage"
     echo "Exiting with status 1 to avoid data corruption"
     exit 1;
   fi
-fi
 
-if [ $stage -le 1 ]; then
   echo "$0: Preparing data..."
   local/prepare_data.sh --download-dir "$iam_database" \
     --wellington-dir "$wellington_database" \
@@ -45,8 +44,8 @@ if [ $stage -le 1 ]; then
 fi
 
 mkdir -p data/{train,test}/data
-if [ $stage -le 2 ]; then
-  echo "$(date) stage 2: getting allowed image widths for e2e training..."
+if [ $stage -le 1 ]; then
+  echo "$(date) stage 1: getting allowed image widths for e2e training..."
   image/get_image2num_frames.py --feat-dim 40 data/train # This will be needed for the next command
   # The next command creates a "allowed_lengths.txt" file in data/train
   # which will be used by local/make_features.py to enforce the images to
@@ -63,15 +62,15 @@ if [ $stage -le 2 ]; then
   utils/fix_data_dir.sh data/train
 fi
 
-if [ $stage -le 3 ]; then
+if [ $stage -le 2 ]; then
   for set in train; do
-    echo "$(date) stage 3: Performing augmentation, it will double training data"
+    echo "$(date) stage 2: Performing augmentation, it will double training data"
     local/augment_data.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set} data/${set}_aug data
     steps/compute_cmvn_stats.sh data/${set}_aug || exit 1;
   done
 fi
 
-if [ $stage -le 4 ]; then
+if [ $stage -le 3 ]; then
   echo "$0: Preparing BPE..."
   # getting non-silence phones.
   cut -d' ' -f2- data/train/text | \
@@ -105,12 +104,12 @@ END
   done
 fi
 
-if [ $stage -le 5 ]; then
+if [ $stage -le 4 ]; then
   echo "$0: Estimating a language model for decoding..."
   local/train_lm.sh
 fi
 
-if [ $stage -le 6 ]; then
+if [ $stage -le 5 ]; then
   echo "$0: Preparing dictionary and lang..."
   local/prepare_dict.sh
   # This recipe uses byte-pair encoding, the silences are part of the words' pronunciations.
@@ -128,12 +127,12 @@ if [ $stage -le 6 ]; then
                                data/lang data/lang_rescore_6g
 fi
 
-if [ $stage -le 7 ]; then
+if [ $stage -le 6 ]; then
   echo "$0: Calling the flat-start chain recipe..."
   local/chain/run_e2e_cnn.sh --train_set train_aug
 fi
 
-if [ $stage -le 8 ]; then
+if [ $stage -le 7 ]; then
   echo "$0: Aligning the training data using the e2e chain model..."
   steps/nnet3/align.sh --nj 50 --cmd "$cmd" \
                        --use-gpu false \
@@ -141,7 +140,7 @@ if [ $stage -le 8 ]; then
                        data/train_aug data/lang exp/chain/e2e_cnn_1b exp/chain/e2e_ali_train
 fi
 
-if [ $stage -le 9 ]; then
+if [ $stage -le 8 ]; then
   echo "$0: Building a tree and training a regular chain model using the e2e alignments..."
   local/chain/run_cnn_e2eali.sh --train_set train_aug
 fi

From ae674ed137e8e4652368b9875855531eea11b0a7 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Mon, 10 Sep 2018 03:14:56 -0400
Subject: [PATCH 32/37] modification from review

---
 egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
index 45712b76499..aeafce2baf4 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
@@ -1,5 +1,8 @@
 #!/bin/bash
 
+# e2eali_1c is the same as e2eali_1c but has more CNN layers, different filter size
+# smaller lm-opts, less epochs, more initial/finaljobs, less minibatch, frams-per-iter.
+
 # This script does end2end chain training (i.e. from scratch)
 # local/chain/compare_wer.sh exp/chain/e2e_cnn_1b/ exp/chain/cnn_e2eali_1d
 # System                      e2e_cnn_1b cnn_e2eali_1d

From 7651f37607287ffa896049f716ce3ef470bc10f7 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Mon, 10 Sep 2018 19:00:52 -0400
Subject: [PATCH 33/37] modification for  downloading aachen splits

---
 egs/iam/v2/local/prepare_data.sh | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/egs/iam/v2/local/prepare_data.sh b/egs/iam/v2/local/prepare_data.sh
index 8c2505601cc..abfc5327149 100755
--- a/egs/iam/v2/local/prepare_data.sh
+++ b/egs/iam/v2/local/prepare_data.sh
@@ -54,6 +54,8 @@ ascii_url=http://www.fki.inf.unibe.ch/DBs/iamDB/data/ascii/ascii.tgz
 brown_corpus_url=http://www.sls.hawaii.edu/bley-vroman/brown.txt
 lob_corpus_url=http://ota.ox.ac.uk/text/0167.zip
 wellington_corpus_loc=/export/corpora5/Wellington/WWC/
+aachen_split_url=http://www.openslr.org/resources/56/splits.zip
+aachen_splits=data/local/aachensplits
 mkdir -p $download_dir data/local
 
 # download and extact images and transcription
@@ -145,6 +147,19 @@ else
   echo "$0: Wellington Corpus not included because wellington_dir not provided"
 fi
 
+if [ -d $aachen_splits ]; then
+  echo "$0: Not downloading the Aachen splits as it is already there."
+else
+  if [ ! -f $aachen_splits/splits.zip ]; then
+    echo "$0: Downloading Aachen splits ..."
+    mkdir -p $aachen_splits
+    wget -P $aachen_splits/ $aachen_split_url || exit 1;
+  fi
+  unzip $aachen_splits/splits.zip -d $aachen_splits || exit 1;
+  echo "$0: Done downloading and extracting Aachen splits"
+fi
+
+
 mkdir -p data/{train,test,val}
 file_name=largeWriterIndependentTextLineRecognitionTask
 

From 417d97cef6bc1743485775786cd40be39e7f2cba Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Mon, 10 Sep 2018 22:03:30 -0400
Subject: [PATCH 34/37] fixing bug in rescoring

---
 egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh | 2 +-
 egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh | 2 +-
 egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh | 2 +-
 egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh | 2 +-
 egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh    | 2 +-
 egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh    | 2 +-
 egs/iam/v2/local/prepare_data.sh                   | 6 +++---
 7 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh
index c39f4bfe9e3..a80bb02290b 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh
@@ -248,7 +248,7 @@ if [ $stage -le 7 ]; then
       $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
 
     steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/$decode_set $dir/decode_$decode_set{,_rescored} || exit 1
+                                data/$decode_set $dir/decode_${decode_set}{,_rescored} || exit 1
   done
 fi
 
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh
index cc4fd84a85a..6615c4669d6 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh
@@ -250,7 +250,7 @@ if [ $stage -le 7 ]; then
       $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
 
     steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/$decode_set $dir/decode_$decode_set{,_rescored} || exit 1
+                                data/$decode_set $dir/decode_${decode_set}{,_rescored} || exit 1
   done
 fi
 
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh
index 752cb76c21b..f44c073635e 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh
@@ -251,7 +251,7 @@ if [ $stage -le 7 ]; then
       $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
 
     steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/$decode_set $dir/decode_$decode_set{,_rescored} || exit 1
+                                data/$decode_set $dir/decode_${decode_set}{,_rescored} || exit 1
   done
 fi
 
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
index aeafce2baf4..7395781dd96 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
@@ -251,7 +251,7 @@ if [ $stage -le 7 ]; then
       $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
 
     steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/$decode_set $dir/decode_$decode_set{,_rescored} || exit 1
+                                data/$decode_set $dir/decode_${decode_set}{,_rescored} || exit 1
   done
 fi
 
diff --git a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh
index 4eb3e4bdff1..cb2bfa0a82d 100755
--- a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -169,7 +169,7 @@ if [ $stage -le 5 ]; then
       $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
 
     steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/$decode_set $dir/decode_$decode_set{,_rescored} || exit 1
+                                data/$decode_set $dir/decode_${decode_set}{,_rescored} || exit 1
   done
 fi
 
diff --git a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
index 495d5076cfc..d5f79602695 100755
--- a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
+++ b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
@@ -155,7 +155,7 @@ if [ $stage -le 5 ]; then
       $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
 
     steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/$decode_set $dir/decode_$decode_set{,_rescored} || exit 1
+                                data/$decode_set $dir/decode_${decode_set}{,_rescored} || exit 1
   done
 fi
 
diff --git a/egs/iam/v2/local/prepare_data.sh b/egs/iam/v2/local/prepare_data.sh
index abfc5327149..cf729d9a939 100755
--- a/egs/iam/v2/local/prepare_data.sh
+++ b/egs/iam/v2/local/prepare_data.sh
@@ -177,9 +177,9 @@ cat $test_old > $test_new
 cat $val1_old $val2_old > $val_new
 
 if $process_aachen_split; then
-    local/process_aachen_splits.py data/local extra/splits data/train --dataset train || exit 1
-    local/process_aachen_splits.py data/local extra/splits data/test --dataset test || exit 1
-    local/process_aachen_splits.py data/local extra/splits data/val --dataset validation || exit 1
+    local/process_aachen_splits.py data/local $aachen_splits/splits data/train --dataset train || exit 1
+    local/process_aachen_splits.py data/local $aachen_splits/splits data/test --dataset test || exit 1
+    local/process_aachen_splits.py data/local $aachen_splits/splits data/val --dataset validation || exit 1
 else
     local/process_data.py data/local data/train --dataset train || exit 1
     local/process_data.py data/local data/test --dataset test || exit 1

From 6a865314e863b831e0a20709052b46fd49cf3173 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Tue, 11 Sep 2018 23:43:29 -0400
Subject: [PATCH 35/37] hardcoding for removing only remaining long utterence

---
 .../local/remove_test_utterances_from_lob.py  | 21 ++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/egs/iam/v2/local/remove_test_utterances_from_lob.py b/egs/iam/v2/local/remove_test_utterances_from_lob.py
index bf3c72e1e2e..6d8c0780b37 100755
--- a/egs/iam/v2/local/remove_test_utterances_from_lob.py
+++ b/egs/iam/v2/local/remove_test_utterances_from_lob.py
@@ -91,7 +91,7 @@ def read_utterances(text_file_path):
     found_line = False
     # avoiding very small utterance, it causes removing
     # complete lob text
-    if len(line_to_find) < 13:
+    if len(line_to_find) < 10:
         remaining_utterances[line_id] = line_to_find
     else:
         for i in range(1, (len(corpus_text_lowercase_wo_sc) - 2)):
@@ -109,6 +109,25 @@ def read_utterances(text_file_path):
     if not found_line:
         remaining_utterances[line_id] = line_to_find
 
+# removing long utterances not found above
+row_to_keep[87530] = False; row_to_keep[87531] = False; row_to_keep[87532] = False
+row_to_keep[31724] = False; row_to_keep[31725] = False; row_to_keep[31726] = False
+row_to_keep[16704] = False; row_to_keep[16705] = False; row_to_keep[16706] = False;
+row_to_keep[94181] = False; row_to_keep[94182] = False; row_to_keep[94183] = False;
+row_to_keep[20171] = False; row_to_keep[20172] = False; row_to_keep[20173] = False;
+row_to_keep[16734] = False; row_to_keep[16733] = False; row_to_keep[16732] = False;
+row_to_keep[20576] = False; row_to_keep[20577] = False; row_to_keep[20578] = False;
+row_to_keep[31715] = False; row_to_keep[31716] = False; row_to_keep[31717] = False;
+row_to_keep[31808] = False; row_to_keep[31809] = False; row_to_keep[31810] = False;
+row_to_keep[31822] = False; row_to_keep[31823] = False; row_to_keep[31824] = False;
+row_to_keep[88791] = False; row_to_keep[88792] = False; row_to_keep[88793] = False;
+row_to_keep[31745] = False; row_to_keep[31746] = False; row_to_keep[31825] = False;
+row_to_keep[94256] = False; row_to_keep[94257] = False; row_to_keep[88794] = False;
+row_to_keep[88665] = False; row_to_keep[17093] = False; row_to_keep[17094] = False;
+row_to_keep[20586] = False; row_to_keep[87228] = False; row_to_keep[87229] = False;
+row_to_keep[16744] = False; row_to_keep[87905] = False; row_to_keep[87906] = False;
+row_to_keep[16669] = False; row_to_keep[16670] = False; row_to_keep[16719] = False;
+row_to_keep[87515] = False; row_to_keep[20090] = False; row_to_keep[31748] = False;
 for i in range(len(original_corpus_text)):
     transcript = original_corpus_text[i].strip()
     if row_to_keep[i]:

From ba07ff0171e6894fe7f5daac813f971564eb5b3e Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 12 Sep 2018 02:04:57 -0400
Subject: [PATCH 36/37] fix in hardcoding

---
 egs/iam/v2/local/remove_test_utterances_from_lob.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/egs/iam/v2/local/remove_test_utterances_from_lob.py b/egs/iam/v2/local/remove_test_utterances_from_lob.py
index 6d8c0780b37..5e5dac52818 100755
--- a/egs/iam/v2/local/remove_test_utterances_from_lob.py
+++ b/egs/iam/v2/local/remove_test_utterances_from_lob.py
@@ -27,6 +27,8 @@ def remove_punctuations(transcript):
             continue
         if char == '(' or char == ':' or char == ';' or char == '"':
             continue
+        if char == '*':
+            continue
         char_list.append(char)
     return char_list
 
@@ -110,8 +112,8 @@ def read_utterances(text_file_path):
         remaining_utterances[line_id] = line_to_find
 
 # removing long utterances not found above
-row_to_keep[87530] = False; row_to_keep[87531] = False; row_to_keep[87532] = False
-row_to_keep[31724] = False; row_to_keep[31725] = False; row_to_keep[31726] = False
+row_to_keep[87530] = False; row_to_keep[87531] = False; row_to_keep[87532] = False;
+row_to_keep[31724] = False; row_to_keep[31725] = False; row_to_keep[31726] = False;
 row_to_keep[16704] = False; row_to_keep[16705] = False; row_to_keep[16706] = False;
 row_to_keep[94181] = False; row_to_keep[94182] = False; row_to_keep[94183] = False;
 row_to_keep[20171] = False; row_to_keep[20172] = False; row_to_keep[20173] = False;

From 53984120eebedf853fd2c24f640aa7208112a1df Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 12 Sep 2018 17:20:09 -0400
Subject: [PATCH 37/37] modification from review

---
 egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
index 7395781dd96..e7d9246fb89 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
@@ -1,9 +1,8 @@
 #!/bin/bash
 
-# e2eali_1c is the same as e2eali_1c but has more CNN layers, different filter size
-# smaller lm-opts, less epochs, more initial/finaljobs, less minibatch, frams-per-iter.
+# e2eali_1d is the same as e2eali_1c but has more CNN layers, different filter size
+# smaller lm-opts, minibatch, frams-per-iter, less epochs and more initial/finaljobs.
 
-# This script does end2end chain training (i.e. from scratch)
 # local/chain/compare_wer.sh exp/chain/e2e_cnn_1b/ exp/chain/cnn_e2eali_1d
 # System                      e2e_cnn_1b cnn_e2eali_1d
 # WER                             13.91      8.80