diff --git a/egs/iam/v1/local/unk_arc_post_to_transcription.py b/egs/iam/v1/local/unk_arc_post_to_transcription.py
index c5ad1235427..f8b69820601 100755
--- a/egs/iam/v1/local/unk_arc_post_to_transcription.py
+++ b/egs/iam/v1/local/unk_arc_post_to_transcription.py
@@ -1,88 +1,107 @@
 #!/usr/bin/env python3
 
-# Copyright     2017  Ashish Arora
+#Copyright      2017  Ashish Arora
 
+""" This module will be used by scripts for open vocabulary setup.
+ If the hypothesis transcription contains <unk>, then it will replace the 
+ <unk> with the word predicted by <unk> model by concatenating phones decoded 
+ from the unk-model. It is currently supported only for triphone setup.
+ Args:
+  phones: File name of a file that contains the phones.txt, (symbol-table for phones).
+          phone and phoneID, Eg. a 217, phoneID of 'a' is 217. 
+  words: File name of a file that contains the words.txt, (symbol-table for words). 
+         word and wordID. Eg. ACCOUNTANCY 234, wordID of 'ACCOUNTANCY' is 234.
+  unk: ID of <unk>. Eg. 231.
+  one-best-arc-post: A file in arc-post format, which is a list of timing info and posterior
+               of arcs along the one-best path from the lattice.
+               E.g. 506_m01-049-00 8 12  1 7722  282 272 288 231
+                    <utterance-id> <start-frame> <num-frames> <posterior> <word> [<ali>] 
+                    [<phone1> <phone2>...]
+  output-text: File containing hypothesis transcription with <unk> recognized by the
+               unk-model.
+               E.g. A move to stop mr. gaitskell.
+  
+  Eg. local/unk_arc_post_to_transcription.py lang/phones.txt lang/words.txt 
+      data/lang/oov.int
+"""
 import argparse
+import os
 import sys
-
 parser = argparse.ArgumentParser(description="""uses phones to convert unk to word""")
-parser.add_argument('phones', type=str, help='phones and phonesID')
-parser.add_argument('words', type=str, help='word and wordID')
-parser.add_argument('unk', type=str, default='-', help='location of unk file')
-parser.add_argument('--input-ark', type=str, default='-', help='where to read the input data')
-parser.add_argument('--out-ark', type=str, default='-', help='where to write the output data')
+parser.add_argument('phones', type=str, help='File name of a file that contains the'
+                    'symbol-table for phones. Each line must be: <phone> <phoneID>')
+parser.add_argument('words', type=str, help='File name of a file that contains the'
+                    'symbol-table for words. Each line must be: <word> <word-id>')
+parser.add_argument('unk', type=str, default='-', help='File name of a file that'
+                    'contains the ID of <unk>. The content must be: <oov-id>, e.g. 231')
+parser.add_argument('--one-best-arc-post', type=str, default='-', help='A file in arc-post'
+                    'format, which is a list of timing info and posterior of arcs'
+                    'along the one-best path from the lattice')
+parser.add_argument('--output-text', type=str, default='-', help='File containing'
+                    'hypothesis transcription with <unk> recognized by the unk-model')
 args = parser.parse_args()
 
-
 ### main ###
-phone_fh = open(args.phones, 'r', encoding='latin-1')
-word_fh = open(args.words, 'r', encoding='latin-1')
-unk_fh = open(args.unk, 'r', encoding='latin-1')
-if args.input_ark == '-':
-    input_fh = sys.stdin
+phone_handle = open(args.phones, 'r', encoding='latin-1') # Create file handles 
+word_handle = open(args.words, 'r', encoding='latin-1')
+unk_handle = open(args.unk,'r', encoding='latin-1')
+if args.one_best_arc_post == '-':
+    arc_post_handle = sys.stdin
 else:
-    input_fh = open(args.input_ark, 'r', encoding='latin-1')
-if args.out_ark == '-':
-    out_fh = sys.stdout
+    arc_post_handle = open(args.one_best_arc_post, 'r', encoding='latin-1')
+if args.output_text == '-':
+    output_text_handle = sys.stdout
 else:
-    out_fh = open(args.out_ark, 'w', encoding='latin-1')
+    output_text_handle = open(args.output_text, 'w', encoding='latin-1')
 
-phone_dict = dict()  # Stores phoneID and phone mapping
-phone_data_vect = phone_fh.read().strip().split("\n")
-for key_val in phone_data_vect:
+id2phone = dict() # Stores the mapping from phone_id (int) to phone (char)
+phones_data = phone_handle.read().strip().split("\n")
+
+for key_val in phones_data:
   key_val = key_val.split(" ")
-  phone_dict[key_val[1]] = key_val[0]
+  id2phone[key_val[1]] = key_val[0]
+
 word_dict = dict()
-word_data_vect = word_fh.read().strip().split("\n")
+word_data_vect = word_handle.read().strip().split("\n")
+
 for key_val in word_data_vect:
   key_val = key_val.split(" ")
   word_dict[key_val[1]] = key_val[0]
-unk_val = unk_fh.read().strip().split(" ")[0]
+unk_val = unk_handle.read().strip().split(" ")[0]
 
-utt_word_dict = dict()
-utt_phone_dict = dict()  # Stores utteranceID and phoneID
-unk_word_dict = dict()
-count=0
-for line in input_fh:
+utt_word_dict = dict() # Dict of list, stores mapping from utteranceID(int) to words(str)
+for line in arc_post_handle:
   line_vect = line.strip().split("\t")
-  if len(line_vect) < 6:
-    print("Bad line: '{}'   Expecting 6 fields. Skipping...".format(line),
+  if len(line_vect) < 6: # Check for 1best-arc-post output
+    print("Error: Bad line: '{}'   Expecting 6 fields. Skipping...".format(line),
           file=sys.stderr)
     continue
-  uttID = line_vect[0]
+  utt_id = line_vect[0]
   word = line_vect[4]
   phones = line_vect[5]
-  if uttID in utt_word_dict.keys():
-    utt_word_dict[uttID][count] = word
-    utt_phone_dict[uttID][count] = phones
-  else:
-    count = 0
-    utt_word_dict[uttID] = dict()
-    utt_phone_dict[uttID] = dict()
-    utt_word_dict[uttID][count] = word
-    utt_phone_dict[uttID][count] = phones
-  if word == unk_val:   # Get character sequence for unk
-    phone_key_vect = phones.split(" ")
-    phone_val_vect = list()
-    for pkey in phone_key_vect:
-      phone_val_vect.append(phone_dict[pkey])
+  if utt_id not in list(utt_word_dict.keys()):
+    utt_word_dict[utt_id] = list()
+
+  if word == unk_val: # Get the 1best phone sequence given by the unk-model
+    phone_id_seq = phones.split(" ")
+    phone_seq = list()
+    for pkey in phone_id_seq:
+      phone_seq.append(id2phone[pkey]) # Convert the phone-id sequence to a phone sequence.
     phone_2_word = list()
-    for phone_val in phone_val_vect:
-      phone_2_word.append(phone_val.split('_')[0])
-    phone_2_word = ''.join(phone_2_word)
-    utt_word_dict[uttID][count] = phone_2_word
+    for phone_val in phone_seq:
+      phone_2_word.append(phone_val.split('_')[0]) # Removing the world-position markers(e.g. _B)
+    phone_2_word = ''.join(phone_2_word) # Concatnate phone sequence
+    utt_word_dict[utt_id].append(phone_2_word) # Store word from unk-model
   else:
-    if word == '0':
+    if word == '0': # Store space/silence
       word_val = ' '
     else:
       word_val = word_dict[word]
-    utt_word_dict[uttID][count] = word_val
-  count += 1
+    utt_word_dict[utt_id].append(word_val) # Store word from 1best-arc-post
 
-transcription = ""
-for key in sorted(utt_word_dict.keys()):
-  transcription = key
-  for index in sorted(utt_word_dict[key].keys()):
-    value = utt_word_dict[key][index]
-    transcription = transcription + " " + value
-  out_fh.write(transcription + '\n')
+transcription = "" # Output transcription
+for utt_key in sorted(utt_word_dict.keys()):
+  transcription = utt_key
+  for word in utt_word_dict[utt_key]:
+    transcription = transcription + " " + word
+  output_text_handle.write(transcription + '\n')
diff --git a/egs/iam/v2/cmd.sh b/egs/iam/v2/cmd.sh
old mode 100644
new mode 100755
diff --git a/egs/iam/v2/local/augment_data.sh b/egs/iam/v2/local/augment_data.sh
new file mode 100755
index 00000000000..31e4a8217ca
--- /dev/null
+++ b/egs/iam/v2/local/augment_data.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Copyright   2018 Hossein Hadian
+#             2018 Ashish Arora
+
+# Apache 2.0
+# This script performs data augmentation.
+
+nj=4
+cmd=run.pl
+feat_dim=40
+echo "$0 $@"
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh || exit 1;
+
+srcdir=$1
+outdir=$2
+datadir=$3
+aug_set=aug1
+mkdir -p $datadir/augmentations
+echo "copying $srcdir to $datadir/augmentations/$aug_set, allowed length, creating feats.scp"
+
+for set in $aug_set; do
+  image/copy_data_dir.sh --spk-prefix $set- --utt-prefix $set- \
+    $srcdir $datadir/augmentations/$set
+  cat $srcdir/allowed_lengths.txt > $datadir/augmentations/$set/allowed_lengths.txt
+  local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim $feat_dim \
+    --fliplr false --augment true $datadir/augmentations/$set
+done
+
+echo " combine original data and data from different augmentations"
+utils/combine_data.sh --extra-files images.scp $outdir $srcdir $datadir/augmentations/$aug_set
+cat $srcdir/allowed_lengths.txt > $outdir/allowed_lengths.txt
diff --git a/egs/iam/v2/local/chain/compare_wer.sh b/egs/iam/v2/local/chain/compare_wer.sh
index d4076457463..2ce14e13694 100755
--- a/egs/iam/v2/local/chain/compare_wer.sh
+++ b/egs/iam/v2/local/chain/compare_wer.sh
@@ -50,6 +50,36 @@ for x in $*; do
 done
 echo
 
+echo -n "# WER val                    "
+for x in $*; do
+  wer=$(cat $x/decode_val/scoring_kaldi/best_wer | awk '{print $2}')
+  printf "% 10s" $wer
+done
+echo
+
+echo -n "# WER (rescored) val         "
+for x in $*; do
+  wer="--"
+  [ -d $x/decode_val_rescored ] && wer=$(cat $x/decode_val_rescored/scoring_kaldi/best_wer | awk '{print $2}')
+  printf "% 10s" $wer
+done
+echo
+
+echo -n "# CER val                    "
+for x in $*; do
+  cer=$(cat $x/decode_val/scoring_kaldi/best_cer | awk '{print $2}')
+  printf "% 10s" $cer
+done
+echo
+
+echo -n "# CER (rescored) val         "
+for x in $*; do
+  cer="--"
+  [ -d $x/decode_val_rescored ] && cer=$(cat $x/decode_val_rescored/scoring_kaldi/best_cer | awk '{print $2}')
+  printf "% 10s" $cer
+done
+echo
+
 if $used_epochs; then
   exit 0;  # the diagnostics aren't comparable between regular and discriminatively trained systems.
 fi
diff --git a/egs/iam/v2/local/chain/run_cnn_e2eali.sh b/egs/iam/v2/local/chain/run_cnn_e2eali.sh
index ad51803ab0e..da731bcb0b1 120000
--- a/egs/iam/v2/local/chain/run_cnn_e2eali.sh
+++ b/egs/iam/v2/local/chain/run_cnn_e2eali.sh
@@ -1 +1 @@
-tuning/run_cnn_e2eali_1c.sh
\ No newline at end of file
+tuning/run_cnn_e2eali_1d.sh
\ No newline at end of file
diff --git a/egs/iam/v2/local/chain/run_e2e_cnn.sh b/egs/iam/v2/local/chain/run_e2e_cnn.sh
deleted file mode 100755
index 15bdf610cd3..00000000000
--- a/egs/iam/v2/local/chain/run_e2e_cnn.sh
+++ /dev/null
@@ -1,174 +0,0 @@
-#!/bin/bash
-# Copyright    2017  Hossein Hadian
-
-# This script does end2end chain training (i.e. from scratch)
-
-# local/chain/compare_wer.sh exp/chain/cnn_1a exp/chain/cnn_chainali_1c exp/chain/e2e_cnn_1a
-# System                         cnn_1a cnn_chainali_1c e2e_cnn_1a
-# WER                             18.52     12.72     12.15
-# CER                             10.07      5.99      6.03
-# Final train prob              -0.0077   -0.0291   -0.0371
-# Final valid prob              -0.0970   -0.0359   -0.0636
-# Final train prob (xent)       -0.5484   -0.9781
-# Final valid prob (xent)       -0.9643   -1.1544
-# Parameters                      4.36M     3.96M     9.13M
-
-# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a
-# exp/chain/e2e_cnn_1a: num-iters=21 nj=2..4 num-params=9.1M dim=40->12640 combine=-0.033->-0.033 (over 1) logprob:train/valid[13,20,final]=(-0.058,-0.042,-0.035/-0.070,-0.064,-0.059)
-
-set -e
-
-# configs for 'chain'
-stage=0
-train_stage=-10
-get_egs_stage=-10
-affix=1a
-
-# training options
-tdnn_dim=450
-num_epochs=4
-num_jobs_initial=2
-num_jobs_final=4
-minibatch_size=150=100,64/300=50,32/600=25,16/1200=16,8
-common_egs_dir=
-l2_regularize=0.00005
-frames_per_iter=1000000
-cmvn_opts="--norm-means=true --norm-vars=true"
-train_set=train
-lang_decode=data/lang
-lang_rescore=data/lang_rescore_6g
-
-# End configuration section.
-echo "$0 $@"  # Print the command line for logging
-
-. ./cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh
-
-if ! cuda-compiled; then
-  cat <<EOF && exit 1
-This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
-If you want to use GPUs (and have them), go to src/, and configure and make on a machine
-where "nvcc" is installed.
-EOF
-fi
-
-lang=data/lang_e2e
-treedir=exp/chain/e2e_bitree  # it's actually just a trivial tree (no tree building)
-dir=exp/chain/e2e_cnn_${affix}
-
-if [ $stage -le 0 ]; then
-  # Create a version of the lang/ directory that has one state per phone in the
-  # topo file. [note, it really has two states.. the first one is only repeated
-  # once, the second one has zero or more repeats.]
-  rm -rf $lang
-  cp -r data/lang $lang
-  silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
-  nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
-  # Use our special topology... note that later on may have to tune this
-  # topology.
-  steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
-fi
-
-if [ $stage -le 1 ]; then
-  steps/nnet3/chain/e2e/prepare_e2e.sh --nj 30 --cmd "$cmd" \
-                                       --shared-phones true \
-                                       --type biphone \
-                                       data/$train_set $lang $treedir
-  $cmd $treedir/log/make_phone_lm.log \
-  cat data/$train_set/text \| \
-    steps/nnet3/chain/e2e/text_to_phones.py data/lang \| \
-    utils/sym2int.pl -f 2- data/lang/phones.txt \| \
-    chain-est-phone-lm --num-extra-lm-states=500 \
-                       ark:- $treedir/phone_lm.fst
-fi
-
-if [ $stage -le 2 ]; then
-  echo "$0: creating neural net configs using the xconfig parser";
-  num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}')
-
-  cnn_opts="l2-regularize=0.075"
-  tdnn_opts="l2-regularize=0.075"
-  output_opts="l2-regularize=0.1"
-  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
-  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
-  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
-  mkdir -p $dir/configs
-  cat <<EOF > $dir/configs/network.xconfig
-  input dim=40 name=input
-
-  conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
-  conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
-  conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
-  conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
-  conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
-  conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-1,0,1 $common3
-  conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-1,0,1 $common3
-  relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim $tdnn_opts
-  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
-  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
-
-  ## adding the layers for chain branch
-  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $output_opts
-  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
-EOF
-
-  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
-fi
-
-if [ $stage -le 3 ]; then
-  # no need to store the egs in a shared storage because we always
-  # remove them. Anyway, it takes only 5 minutes to generate them.
-
-  steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
-    --cmd "$cmd" \
-    --feat.cmvn-opts "$cmvn_opts" \
-    --chain.leaky-hmm-coefficient 0.1 \
-    --chain.l2-regularize $l2_regularize \
-    --chain.apply-deriv-weights false \
-    --egs.dir "$common_egs_dir" \
-    --egs.stage $get_egs_stage \
-    --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \
-    --chain.frame-subsampling-factor 4 \
-    --chain.alignment-subsampling-factor 4 \
-    --trainer.num-chunk-per-minibatch $minibatch_size \
-    --trainer.frames-per-iter $frames_per_iter \
-    --trainer.num-epochs $num_epochs \
-    --trainer.optimization.momentum 0 \
-    --trainer.optimization.num-jobs-initial $num_jobs_initial \
-    --trainer.optimization.num-jobs-final $num_jobs_final \
-    --trainer.optimization.initial-effective-lrate 0.001 \
-    --trainer.optimization.final-effective-lrate 0.0001 \
-    --trainer.optimization.shrink-value 1.0 \
-    --trainer.max-param-change 2.0 \
-    --cleanup.remove-egs true \
-    --feat-dir data/${train_set} \
-    --tree-dir $treedir \
-    --dir $dir  || exit 1;
-fi
-
-if [ $stage -le 4 ]; then
-  # The reason we are using data/lang here, instead of $lang, is just to
-  # emphasize that it's not actually important to give mkgraph.sh the
-  # lang directory with the matched topology (since it gets the
-  # topology file from the model).  So you could give it a different
-  # lang directory, one that contained a wordlist and LM of your choice,
-  # as long as phones.txt was compatible.
-
-  utils/mkgraph.sh \
-    --self-loop-scale 1.0 $lang_decode \
-    $dir $dir/graph || exit 1;
-fi
-
-if [ $stage -le 5 ]; then
-  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --nj 30 --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
-
-  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/test $dir/decode_test{,_rescored} || exit 1
-fi
-
-echo "Done. Date: $(date). Results:"
-local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v2/local/chain/run_e2e_cnn.sh b/egs/iam/v2/local/chain/run_e2e_cnn.sh
new file mode 120000
index 00000000000..7dca9c30e23
--- /dev/null
+++ b/egs/iam/v2/local/chain/run_e2e_cnn.sh
@@ -0,0 +1 @@
+tuning/run_e2e_cnn_1b.sh
\ No newline at end of file
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh
index ba28f681708..a80bb02290b 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh
@@ -22,6 +22,7 @@ stage=0
 
 nj=30
 train_set=train
+decode_val=true
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
 affix=_1a  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
 e2echain_model_dir=exp/chain/e2e_cnn_1a
@@ -42,7 +43,9 @@ tdnn_dim=450
 # training options
 srand=0
 remove_egs=true
-lang_test=lang_unk
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
+if $decode_val; then maybe_val=val; else maybe_val= ; fi
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
@@ -228,18 +231,26 @@ if [ $stage -le 6 ]; then
   # as long as phones.txt was compatible.
 
   utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
+    --self-loop-scale 1.0 $lang_decode \
     $dir $dir/graph || exit 1;
 fi
 
 if [ $stage -le 7 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
-    --frames-per-chunk $frames_per_chunk \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --extra-left-context $chunk_left_context \
+      --extra-right-context $chunk_right_context \
+      --extra-left-context-initial 0 \
+      --extra-right-context-final 0 \
+      --frames-per-chunk $frames_per_chunk \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+
+    steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/$decode_set $dir/decode_${decode_set}{,_rescored} || exit 1
+  done
 fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh
index 298e7053086..6615c4669d6 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh
@@ -23,6 +23,7 @@ stage=0
 
 nj=30
 train_set=train
+decode_val=true
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
 affix=_1b  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
 e2echain_model_dir=exp/chain/e2e_cnn_1a
@@ -45,6 +46,7 @@ srand=0
 remove_egs=true
 lang_decode=data/lang
 lang_rescore=data/lang_rescore_6g
+if $decode_val; then maybe_val=val; else maybe_val= ; fi
 
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
@@ -237,15 +239,20 @@ fi
 
 if [ $stage -le 7 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
-    --frames-per-chunk $frames_per_chunk \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
-
-  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/test $dir/decode_test{,_rescored} || exit 1
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --extra-left-context $chunk_left_context \
+      --extra-right-context $chunk_right_context \
+      --extra-left-context-initial 0 \
+      --extra-right-context-final 0 \
+      --frames-per-chunk $frames_per_chunk \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+
+    steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/$decode_set $dir/decode_${decode_set}{,_rescored} || exit 1
+  done
 fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh
index 48e0a76dead..f44c073635e 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh
@@ -25,6 +25,7 @@ stage=0
 
 nj=30
 train_set=train
+decode_val=true
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
 affix=_1c  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
 e2echain_model_dir=exp/chain/e2e_cnn_1a
@@ -47,7 +48,7 @@ srand=0
 remove_egs=true
 lang_decode=data/lang
 lang_rescore=data/lang_rescore_6g
-
+if $decode_val; then maybe_val=val; else maybe_val= ; fi
 dropout_schedule='0,0@0.20,0.2@0.50,0'
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
@@ -239,15 +240,20 @@ fi
 
 if [ $stage -le 7 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
-    --frames-per-chunk $frames_per_chunk \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
-
-  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/test $dir/decode_test{,_rescored} || exit 1
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --extra-left-context $chunk_left_context \
+      --extra-right-context $chunk_right_context \
+      --extra-left-context-initial 0 \
+      --extra-right-context-final 0 \
+      --frames-per-chunk $frames_per_chunk \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+
+    steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/$decode_set $dir/decode_${decode_set}{,_rescored} || exit 1
+  done
 fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
new file mode 100755
index 00000000000..e7d9246fb89
--- /dev/null
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
@@ -0,0 +1,259 @@
+#!/bin/bash
+
+# e2eali_1d is the same as e2eali_1c but has more CNN layers, different filter size
+# smaller lm-opts, minibatch, frams-per-iter, less epochs and more initial/finaljobs.
+
+# local/chain/compare_wer.sh exp/chain/e2e_cnn_1b/ exp/chain/cnn_e2eali_1d
+# System                      e2e_cnn_1b cnn_e2eali_1d
+# WER                             13.91      8.80
+# WER (rescored)                  13.64      8.52
+# CER                              7.08      4.06
+# CER (rescored)                   6.82      3.98
+# Final train prob               0.0148   -0.0524
+# Final valid prob               0.0105   -0.0713
+# Final train prob (xent)                 -0.4695
+# Final valid prob (xent)                 -0.5310
+# Parameters                      9.52M     4.36M
+
+# steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1d
+# exp/chain/cnn_e2eali_1d: num-iters=30 nj=3..5 num-params=4.4M dim=40->400 combine=-0.055->-0.055 (over 1) xent:train/valid[19,29,final]=(-0.683,-0.489,-0.469/-0.703,-0.544,-0.531) logprob:train/valid[19,29,final]=(-0.090,-0.057,-0.052/-0.107,-0.076,-0.071)
+set -e -o pipefail
+
+stage=0
+
+nj=30
+train_set=train
+decode_val=true
+nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
+affix=_1d  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+e2echain_model_dir=exp/chain/e2e_cnn_1b
+common_egs_dir=
+reporting_email=
+
+# chain options
+train_stage=-10
+xent_regularize=0.1
+frame_subsampling_factor=4
+# training chunk-options
+chunk_width=340,300,200,100
+num_leaves=500
+# we don't need extra left/right context for TDNN systems.
+chunk_left_context=0
+chunk_right_context=0
+tdnn_dim=550
+# training options
+srand=0
+remove_egs=true
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
+if $decode_val; then maybe_val=val; else maybe_val= ; fi
+dropout_schedule='0,0@0.20,0.2@0.50,0'
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+ali_dir=exp/chain/e2e_ali_train
+lat_dir=exp/chain${nnet3_affix}/e2e_${train_set}_lats
+dir=exp/chain${nnet3_affix}/cnn_e2eali${affix}
+train_data_dir=data/${train_set}
+tree_dir=exp/chain${nnet3_affix}/tree_e2e
+
+# the 'lang' directory is created by this script.
+# If you create such a directory with a non-standard topology
+# you should probably name it differently.
+lang=data/lang_chain
+for f in $train_data_dir/feats.scp $ali_dir/ali.1.gz $ali_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+
+if [ $stage -le 1 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 2 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
+                            --acoustic-scale 1.0 \
+                            --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
+                            ${train_data_dir} data/lang $e2echain_model_dir $lat_dir
+  echo "" >$lat_dir/splice_opts
+fi
+
+if [ $stage -le 3 ]; then
+  # Build a tree using our new topology.  We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.  The num-leaves is always somewhat less than the num-leaves from
+  # the GMM baseline.
+  if [ -f $tree_dir/final.mdl ]; then
+    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+    exit 1;
+  fi
+
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor $frame_subsampling_factor \
+    --alignment-subsampling-factor 1 \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$cmd" $num_leaves ${train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+
+if [ $stage -le 4 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  cnn_opts="l2-regularize=0.03 dropout-proportion=0.0"
+  tdnn_opts="l2-regularize=0.03"
+  output_opts="l2-regularize=0.04"
+  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+
+  conv-relu-batchnorm-dropout-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-dropout-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-dropout-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-dropout-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-dropout-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common3 height-subsample-out=2
+  conv-relu-batchnorm-dropout-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
+  relu-batchnorm-dropout-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+  relu-batchnorm-dropout-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+  relu-batchnorm-dropout-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' mod?els... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 5 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage=$train_stage \
+    --cmd="$cmd" \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.00005 \
+    --chain.apply-deriv-weights=true \
+    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
+    --chain.frame-subsampling-factor=$frame_subsampling_factor \
+    --chain.alignment-subsampling-factor=1 \
+    --chain.left-tolerance 3 \
+    --chain.right-tolerance 3 \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=5 \
+    --trainer.frames-per-iter=1500000 \
+    --trainer.optimization.num-jobs-initial=3 \
+    --trainer.optimization.num-jobs-final=5 \
+    --trainer.dropout-schedule $dropout_schedule \
+    --trainer.optimization.initial-effective-lrate=0.001 \
+    --trainer.optimization.final-effective-lrate=0.0001 \
+    --trainer.optimization.shrink-value=1.0 \
+    --trainer.num-chunk-per-minibatch=32,16 \
+    --trainer.optimization.momentum=0.0 \
+    --egs.chunk-width=$chunk_width \
+    --egs.chunk-left-context=$chunk_left_context \
+    --egs.chunk-right-context=$chunk_right_context \
+    --egs.chunk-left-context-initial=0 \
+    --egs.chunk-right-context-final=0 \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir=$train_data_dir \
+    --tree-dir=$tree_dir \
+    --lat-dir=$lat_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 6 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 $lang_decode \
+    $dir $dir/graph || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --extra-left-context $chunk_left_context \
+      --extra-right-context $chunk_right_context \
+      --extra-left-context-initial 0 \
+      --extra-right-context-final 0 \
+      --frames-per-chunk $frames_per_chunk \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+
+    steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/$decode_set $dir/decode_${decode_set}{,_rescored} || exit 1
+  done
+fi
+
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh
new file mode 100755
index 00000000000..cb2bfa0a82d
--- /dev/null
+++ b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -0,0 +1,177 @@
+#!/bin/bash
+# Copyright    2017  Hossein Hadian
+
+# This script does end2end chain training (i.e. from scratch)
+# ./local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/
+# System                      e2e_cnn_1a
+# WER                             11.24
+# WER (rescored)                  10.80
+# CER                              5.32
+# CER (rescored)                   5.24
+# Final train prob               0.0568
+# Final valid prob               0.0381
+# Final train prob (xent)
+# Final valid prob (xent)
+# Parameters                      9.13M
+
+# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a
+# exp/chain/e2e_cnn_1a: num-iters=42 nj=2..4 num-params=9.1M dim=40->12640 combine=0.049->0.049 (over 1) logprob:train/valid[27,41,final]=(0.035,0.055,0.057/0.016,0.037,0.038)
+
+
+set -e
+
+# configs for 'chain'
+stage=0
+train_stage=-10
+get_egs_stage=-10
+affix=1a
+nj=30
+
+# training options
+tdnn_dim=450
+num_epochs=4
+num_jobs_initial=2
+num_jobs_final=4
+minibatch_size=150=100,64/300=50,32/600=25,16/1200=16,8
+common_egs_dir=
+l2_regularize=0.00005
+frames_per_iter=1000000
+cmvn_opts="--norm-means=true --norm-vars=true"
+train_set=train
+decode_val=true
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+lang=data/lang_e2e
+treedir=exp/chain/e2e_bitree  # it's actually just a trivial tree (no tree building)
+dir=exp/chain/e2e_cnn_${affix}
+
+if [ $stage -le 0 ]; then
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  rm -rf $lang
+  cp -r data/lang $lang
+  silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+  nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+  # Use our special topology... note that later on may have to tune this
+  # topology.
+  steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+fi
+
+if [ $stage -le 1 ]; then
+  steps/nnet3/chain/e2e/prepare_e2e.sh --nj 30 --cmd "$cmd" \
+                                       --shared-phones true \
+                                       --type biphone \
+                                       data/$train_set $lang $treedir
+  $cmd $treedir/log/make_phone_lm.log \
+  cat data/$train_set/text \| \
+    steps/nnet3/chain/e2e/text_to_phones.py data/lang \| \
+    utils/sym2int.pl -f 2- data/lang/phones.txt \| \
+    chain-est-phone-lm --num-extra-lm-states=500 \
+                       ark:- $treedir/phone_lm.fst
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+  num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}')
+
+  cnn_opts="l2-regularize=0.075"
+  tdnn_opts="l2-regularize=0.075"
+  output_opts="l2-regularize=0.1"
+  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+  conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-1,0,1 $common3
+  conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-1,0,1 $common3
+  relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim $tdnn_opts
+  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
+  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $output_opts
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 3 ]; then
+  # no need to store the egs in a shared storage because we always
+  # remove them. Anyway, it takes only 5 minutes to generate them.
+
+  steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
+    --cmd "$cmd" \
+    --feat.cmvn-opts "$cmvn_opts" \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize $l2_regularize \
+    --chain.apply-deriv-weights false \
+    --egs.dir "$common_egs_dir" \
+    --egs.stage $get_egs_stage \
+    --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \
+    --chain.frame-subsampling-factor 4 \
+    --chain.alignment-subsampling-factor 4 \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter $frames_per_iter \
+    --trainer.num-epochs $num_epochs \
+    --trainer.optimization.momentum 0 \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.optimization.shrink-value 1.0 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs true \
+    --feat-dir data/${train_set} \
+    --tree-dir $treedir \
+    --dir $dir  || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 $lang_decode \
+    $dir $dir/graph || exit 1;
+fi
+
+if [ $stage -le 5 ]; then
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+
+    steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/$decode_set $dir/decode_${decode_set}{,_rescored} || exit 1
+  done
+fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
new file mode 100755
index 00000000000..d5f79602695
--- /dev/null
+++ b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
@@ -0,0 +1,163 @@
+#!/bin/bash
+# Copyright    2017  Hossein Hadian
+
+# This script does end2end chain training (i.e. from scratch)
+# ./local/chain/compare_wer.sh exp/chain/e2e_cnn_1b/
+# System                      e2e_cnn_1b
+# WER                             13.59
+# WER (rescored)                  13.27
+# CER                              6.92
+# CER (rescored)                   6.71
+# Final train prob               0.0345
+# Final valid prob               0.0269
+# Final train prob (xent)
+# Final valid prob (xent)
+# Parameters                      9.52M
+
+# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1b
+# exp/chain/e2e_cnn_1b: num-iters=42 nj=2..4 num-params=9.5M dim=40->12640 combine=0.041->0.041 (over 2) logprob:train/valid[27,41,final]=(0.032,0.035,0.035/0.025,0.026,0.027)
+set -e
+
+# configs for 'chain'
+stage=0
+train_stage=-10
+get_egs_stage=-10
+affix=1b
+nj=30
+
+# training options
+tdnn_dim=450
+minibatch_size=150=100,64/300=50,32/600=25,16/1200=16,8
+common_egs_dir=
+train_set=train
+decode_val=true
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
+if $decode_val; then maybe_val=val; else maybe_val= ; fi
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+lang=data/lang_e2e
+treedir=exp/chain/e2e_bitree  # it's actually just a trivial tree (no tree building)
+dir=exp/chain/e2e_cnn_${affix}
+
+if [ $stage -le 0 ]; then
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  rm -rf $lang
+  cp -r data/lang $lang
+  silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+  nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+  # Use our special topology... note that later on may have to tune this
+  # topology.
+  steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+fi
+
+if [ $stage -le 1 ]; then
+  steps/nnet3/chain/e2e/prepare_e2e.sh --nj 30 --cmd "$cmd" \
+                                       --shared-phones true \
+                                       --type biphone \
+                                       data/$train_set $lang $treedir
+  $cmd $treedir/log/make_phone_lm.log \
+  cat data/$train_set/text \| \
+    steps/nnet3/chain/e2e/text_to_phones.py data/lang \| \
+    utils/sym2int.pl -f 2- data/lang/phones.txt \| \
+    chain-est-phone-lm --num-extra-lm-states=500 \
+                       ark:- $treedir/phone_lm.fst
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+  num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}')
+  common1="height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+
+  conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
+  relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-4,0,4) dim=$tdnn_dim
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $output_opts
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 3 ]; then
+  # no need to store the egs in a shared storage because we always
+  # remove them. Anyway, it takes only 5 minutes to generate them.
+
+  steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
+    --cmd "$cmd" \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights false \
+    --egs.dir "$common_egs_dir" \
+    --egs.stage $get_egs_stage \
+    --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \
+    --chain.frame-subsampling-factor 4 \
+    --chain.alignment-subsampling-factor 4 \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter 1000000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.momentum 0 \
+    --trainer.optimization.num-jobs-initial 2 \
+    --trainer.optimization.num-jobs-final 4 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.optimization.shrink-value 1.0 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs true \
+    --feat-dir data/${train_set} \
+    --tree-dir $treedir \
+    --dir $dir  || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 $lang_decode \
+    $dir $dir/graph || exit 1;
+fi
+
+if [ $stage -le 5 ]; then
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+
+    steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/$decode_set $dir/decode_${decode_set}{,_rescored} || exit 1
+  done
+fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v2/local/extract_features.sh b/egs/iam/v2/local/extract_features.sh
new file mode 100755
index 00000000000..1741ad3f9b2
--- /dev/null
+++ b/egs/iam/v2/local/extract_features.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright   2017 Yiwen Shao
+#             2018 Ashish Arora
+
+# Apache 2.0
+# This script runs the make features script in parallel. 
+
+nj=4
+cmd=run.pl
+feat_dim=40
+augment=false
+fliplr=false
+echo "$0 $@"
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh || exit 1;
+
+data=$1
+featdir=$data/data
+scp=$data/images.scp
+logdir=$data/log
+
+mkdir -p $logdir
+mkdir -p $featdir
+
+# make $featdir an absolute pathname
+featdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $featdir ${PWD}`
+
+for n in $(seq $nj); do
+    split_scps="$split_scps $logdir/images.$n.scp"
+done
+
+# split images.scp
+utils/split_scp.pl $scp $split_scps || exit 1;
+
+$cmd JOB=1:$nj $logdir/extract_features.JOB.log \
+  local/make_features.py $logdir/images.JOB.scp \
+    --allowed_len_file_path $data/allowed_lengths.txt \
+    --feat-dim $feat_dim --fliplr $fliplr --augment $augment \| \
+    copy-feats --compress=true --compression-method=7 \
+    ark:- ark,scp:$featdir/images.JOB.ark,$featdir/images.JOB.scp
+
+## aggregates the output scp's to get feats.scp
+for n in $(seq $nj); do
+  cat $featdir/images.$n.scp || exit 1;
+done > $data/feats.scp || exit 1
diff --git a/egs/iam/v2/local/gen_topo.py b/egs/iam/v2/local/gen_topo.py
new file mode 100755
index 00000000000..540bfbcf270
--- /dev/null
+++ b/egs/iam/v2/local/gen_topo.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+
+# Copyright 2017 (author: Chun-Chieh Chang)
+
+# Generate a topology file.  This allows control of the number of states in the
+# non-silence HMMs, and in the silence HMMs. This is a modified version of
+# 'utils/gen_topo.pl'. The difference is that this creates two topologies for
+# the non-silence HMMs. The number of states for punctuations is different than
+# the number of states for other characters.
+
+from __future__ import print_function
+import argparse
+import string
+
+parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py "
+                                             "<colon-separated-nonsilence-phones> <colon-separated-silence-phones>"
+                                             "e.g.:  steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n",
+                                 epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage.");
+parser.add_argument("num_nonsil_states", type=int, help="number of states for nonsilence phones");
+parser.add_argument("num_sil_states", type=int, help="number of states for silence phones");
+parser.add_argument("num_punctuation_states", type=int, help="number of states for punctuation");
+parser.add_argument("nonsilence_phones", type=str,
+                    help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9");
+parser.add_argument("silence_phones", type=str,
+                    help="List of silence phones as integers, separated by colons, e.g. 1:2:3");
+parser.add_argument("phone_list", type=str, help="file containing all phones and their corresponding number.");
+
+args = parser.parse_args()
+
+silence_phones = [ int(x) for x in args.silence_phones.split(":") ]
+nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ]
+all_phones = silence_phones +  nonsilence_phones
+
+punctuation_phones = []
+exclude = set("!(),.?;:'-\"")
+with open(args.phone_list) as f:
+    for line in f:
+        line = line.strip()
+        phone = line.split(' ')[0]
+        if len(phone) == 1 and phone in exclude:
+            punctuation_phones.append(int(line.split(' ')[1]))
+# For nonsilence phones that are not punctuations
+print("<Topology>")
+print("<TopologyEntry>")
+print("<ForPhones>")
+print(" ".join([str(x) for x in nonsilence_phones if x not in punctuation_phones]))
+print("</ForPhones>")
+for x in range(0, args.num_nonsil_states):
+    xp1 = x + 1
+    print("<State> " + str(x) + " <PdfClass> " + str(x) + " <Transition> " + str(x) + " 0.75 <Transition> " + str(xp1) + " 0.25 </State>")
+print("<State> " + str(args.num_nonsil_states) + " </State>")
+print("</TopologyEntry>")
+
+# For nonsilence phones that ar punctuations
+print("<TopologyEntry>")
+print("<ForPhones>")
+print(" ".join([str(x) for x in nonsilence_phones if x in punctuation_phones]))
+print("</ForPhones>")
+for x in range(0, args.num_punctuation_states):
+    xp1 = x + 1
+    print("<State> " + str(x) + " <PdfClass> " + str(x) + " <Transition> " + str(x) + " 0.75 <Transition> " + str(xp1) + " 0.25 </State>")
+print("<State> " + str(args.num_punctuation_states) + " </State>")
+print("</TopologyEntry>")
+
+# For silence phones
+print("<TopologyEntry>")
+print("<ForPhones>")
+print(" ".join([str(x) for x in silence_phones]))
+print("</ForPhones>")
+if(args.num_sil_states > 1):
+    transp = 1.0 / (args.num_sil_states - 1)
+    
+    state_str = "<State> 0 <PdfClass> 0 "
+    for x in range(0, (args.num_sil_states - 1)):
+        state_str = state_str + "<Transition> " + str(x) + " " + str(transp) + " "
+    state_str = state_str + "</State>"
+    print(state_str)
+
+    for x in range(1, (args.num_sil_states - 1)):
+        state_str = "<State> " + str(x) + " <PdfClass> " + str(x) + " "
+        for y in range(1, args.num_sil_states):
+            state_str = state_str + "<Transition> " + str(y) + " " + str(transp) + " "
+        state_str = state_str + "</State>"
+        print(state_str)
+    second_last = args.num_sil_states - 1
+    print("<State> " + str(second_last) + " <PdfClass> " + str(second_last) + " <Transition> " + str(second_last) + " 0.75 <Transition> " + str(args.num_sil_states) + " 0.25 </State>")
+    print("<State> " + str(args.num_sil_states) + " </State>")
+else:
+    print("<State> 0 <PdfClass> 0 <Transition> 0 0.75 <Transition> 1 0.25 </State>")
+    print("<State> " + str(args.num_sil_states) + " </State>")
+print("</TopologyEntry>")
+print("</Topology>")
diff --git a/egs/iam/v2/local/make_features.py b/egs/iam/v2/local/make_features.py
index 84e012daedb..3ce501732cf 100755
--- a/egs/iam/v2/local/make_features.py
+++ b/egs/iam/v2/local/make_features.py
@@ -2,6 +2,7 @@
 
 # Copyright      2017  Chun Chieh Chang
 #                2017  Ashish Arora
+#                2017  Yiwen Shao
 #                2018  Hossein Hadian
 
 """ This script converts images to Kaldi-format feature matrices. The input to
@@ -14,20 +15,27 @@
     to enforce the images to have the specified length in that file by padding
     white pixels (the --padding option will be ignored in this case). This relates
     to end2end chain training.
-
     eg. local/make_features.py data/train --feat-dim 40
 """
-
+import random
 import argparse
 import os
 import sys
+import scipy.io as sio
 import numpy as np
 from scipy import misc
+from scipy.ndimage.interpolation import affine_transform
+import math
+from signal import signal, SIGPIPE, SIG_DFL
+signal(SIGPIPE, SIG_DFL)
 
 parser = argparse.ArgumentParser(description="""Converts images (in 'dir'/images.scp) to features and
                                                 writes them to standard output in text format.""")
-parser.add_argument('dir', type=str,
-                    help='Source data directory (containing images.scp)')
+parser.add_argument('images_scp_path', type=str,
+                    help='Path of images.scp file')
+parser.add_argument('--allowed_len_file_path', type=str, default=None,
+                    help='If supplied, each images will be padded to reach the '
+                    'target length (this overrides --padding).')
 parser.add_argument('--out-ark', type=str, default='-',
                     help='Where to write the output feature file')
 parser.add_argument('--feat-dim', type=int, default=40,
@@ -35,8 +43,10 @@
 parser.add_argument('--padding', type=int, default=5,
                     help='Number of white pixels to pad on the left'
                     'and right side of the image.')
-
-
+parser.add_argument('--fliplr', type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="Flip the image left-right for right to left languages")
+parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="performs image augmentation")
 args = parser.parse_args()
 
 
@@ -56,18 +66,12 @@ def write_kaldi_matrix(file_handle, matrix, key):
             file_handle.write("\n")
     file_handle.write(" ]\n")
 
-def get_scaled_image(im, allowed_lengths = None):
-    scale_size = args.feat_dim
-    sx = im.shape[1]
-    sy = im.shape[0]
-    scale = (1.0 * scale_size) / sy
-    nx = int(scale_size)
-    ny = int(scale * sx)
-    im = misc.imresize(im, (nx, ny))
+
+def horizontal_pad(im, allowed_lengths = None):
     if allowed_lengths is None:
         left_padding = right_padding = args.padding
     else:  # Find an allowed length for the image
-        imlen = im.shape[1]
+        imlen = im.shape[1] # width
         allowed_len = 0
         for l in allowed_lengths:
             if l > imlen:
@@ -77,28 +81,153 @@ def get_scaled_image(im, allowed_lengths = None):
             #  No allowed length was found for the image (the image is too long)
             return None
         padding = allowed_len - imlen
-        left_padding = padding // 2
+        left_padding = int(padding // 2)
         right_padding = padding - left_padding
-    dim_y = im.shape[0]
+    dim_y = im.shape[0] # height
     im_pad = np.concatenate((255 * np.ones((dim_y, left_padding),
                                            dtype=int), im), axis=1)
     im_pad1 = np.concatenate((im_pad, 255 * np.ones((dim_y, right_padding),
                                                     dtype=int)), axis=1)
     return im_pad1
 
-### main ###
-data_list_path = os.path.join(args.dir, 'images.scp')
+def get_scaled_image_aug(im, mode='normal'):
+    scale_size = args.feat_dim
+    sx = im.shape[1]
+    sy = im.shape[0]
+    scale = (1.0 * scale_size) / sy
+    nx = int(scale_size)
+    ny = int(scale * sx) 
+    scale_size = random.randint(10, 30)
+    scale = (1.0 * scale_size) / sy
+    down_nx = int(scale_size)
+    down_ny = int(scale * sx)
+    if mode == 'normal':
+        im = misc.imresize(im, (nx, ny))
+        return im
+    else:
+        im_scaled_down = misc.imresize(im, (down_nx, down_ny))
+        im_scaled_up = misc.imresize(im_scaled_down, (nx, ny))
+        return im_scaled_up
+    return im
+
+def contrast_normalization(im, low_pct, high_pct):
+    element_number = im.size
+    rows = im.shape[0]
+    cols = im.shape[1]
+    im_contrast = np.zeros(shape=im.shape)
+    low_index = int(low_pct * element_number)
+    high_index = int(high_pct * element_number)
+    sorted_im = np.sort(im, axis=None)
+    low_thred = sorted_im[low_index]
+    high_thred = sorted_im[high_index]
+    for i in range(rows):
+        for j in range(cols):
+            if im[i, j] > high_thred:
+                im_contrast[i, j] = 255  # lightest to white
+            elif im[i, j] < low_thred:
+                im_contrast[i, j] = 0  # darkest to black
+            else:
+                # linear normalization
+                im_contrast[i, j] = (im[i, j] - low_thred) * \
+                    255 / (high_thred - low_thred)
+    return im_contrast
+
+
+def geometric_moment(frame, p, q):
+    m = 0
+    for i in range(frame.shape[1]):
+        for j in range(frame.shape[0]):
+            m += (i ** p) * (j ** q) * frame[i][i]
+    return m
+
+
+def central_moment(frame, p, q):
+    u = 0
+    x_bar = geometric_moment(frame, 1, 0) / \
+        geometric_moment(frame, 0, 0)  # m10/m00
+    y_bar = geometric_moment(frame, 0, 1) / \
+        geometric_moment(frame, 0, 0)  # m01/m00
+    for i in range(frame.shape[1]):
+        for j in range(frame.shape[0]):
+            u += ((i - x_bar)**p) * ((j - y_bar)**q) * frame[i][j]
+    return u
+
+
+def height_normalization(frame, w, h):
+    frame_normalized = np.zeros(shape=(h, w))
+    alpha = 4
+    x_bar = geometric_moment(frame, 1, 0) / \
+        geometric_moment(frame, 0, 0)  # m10/m00
+    y_bar = geometric_moment(frame, 0, 1) / \
+        geometric_moment(frame, 0, 0)  # m01/m00
+    sigma_x = (alpha * ((central_moment(frame, 2, 0) /
+                         geometric_moment(frame, 0, 0)) ** .5))  # alpha * sqrt(u20/m00)
+    sigma_y = (alpha * ((central_moment(frame, 0, 2) /
+                         geometric_moment(frame, 0, 0)) ** .5))  # alpha * sqrt(u02/m00)
+    for x in range(w):
+        for y in range(h):
+            i = int((x / w - 0.5) * sigma_x + x_bar)
+            j = int((y / h - 0.5) * sigma_y + y_bar)
+            frame_normalized[x][y] = frame[i][j]
+    return frame_normalized
 
+
+def find_slant_project(im):
+    rows = im.shape[0]
+    cols = im.shape[1]
+    std_max = 0
+    alpha_max = 0
+    col_disp = np.zeros(90, int)
+    proj = np.zeros(shape=(90, cols + 2 * rows), dtype=int)
+    for r in range(rows):
+        for alpha in range(-45, 45, 1):
+            col_disp[alpha] = int(r * math.tan(alpha / 180.0 * math.pi))
+        for c in range(cols):
+            if im[r, c] < 100:
+                for alpha in range(-45, 45, 1):
+                    proj[alpha + 45, c + col_disp[alpha] + rows] += 1
+    for alpha in range(-45, 45, 1):
+        proj_histogram, bin_array = np.histogram(proj[alpha + 45, :], bins=10)
+        proj_std = np.std(proj_histogram)
+        if proj_std > std_max:
+            std_max = proj_std
+            alpha_max = alpha
+    proj_std = np.std(proj, axis=1)
+    return -alpha_max
+
+
+def horizontal_shear(im, degree):
+    rad = degree / 180.0 * math.pi
+    padding_x = int(abs(np.tan(rad)) * im.shape[0])
+    padding_y = im.shape[0]
+    if rad > 0:
+        im_pad = np.concatenate(
+            (255 * np.ones((padding_y, padding_x), dtype=int), im), axis=1)
+    elif rad < 0:
+        im_pad = np.concatenate(
+            (im, 255 * np.ones((padding_y, padding_x), dtype=int)), axis=1)
+    else:
+        im_pad = im
+    shear_matrix = np.array([[1, 0],
+                             [np.tan(rad), 1]])
+    sheared_im = affine_transform(im_pad, shear_matrix, cval=255.0)
+    return sheared_im
+
+
+### main ###
+random.seed(1)
+data_list_path = args.images_scp_path
 if args.out_ark == '-':
     out_fh = sys.stdout
 else:
-    out_fh = open(args.out_ark,'wb')
+    out_fh = open(args.out_ark,'w')
 
 allowed_lengths = None
-if os.path.isfile(os.path.join(args.dir, 'allowed_lengths.txt')):
+allowed_len_handle = args.allowed_len_file_path
+if os.path.isfile(allowed_len_handle):
     print("Found 'allowed_lengths.txt' file...", file=sys.stderr)
     allowed_lengths = []
-    with open(os.path.join(args.dir,'allowed_lengths.txt')) as f:
+    with open(allowed_len_handle) as f:
         for line in f:
             allowed_lengths.append(int(line.strip()))
     print("Read {} allowed lengths and will apply them to the "
@@ -106,6 +235,7 @@ def get_scaled_image(im, allowed_lengths = None):
 
 num_fail = 0
 num_ok = 0
+aug_setting = ['normal', 'scaled']
 with open(data_list_path) as f:
     for line in f:
         line = line.strip()
@@ -113,15 +243,24 @@ def get_scaled_image(im, allowed_lengths = None):
         image_id = line_vect[0]
         image_path = line_vect[1]
         im = misc.imread(image_path)
-        im_scaled = get_scaled_image(im, allowed_lengths)
-
-        if im_scaled is None:
+        if args.fliplr:
+            im = np.fliplr(im)
+        if args.augment:
+            im_aug = get_scaled_image_aug(im, aug_setting[0])
+            im_contrast = contrast_normalization(im_aug, 0.05, 0.2)
+            slant_degree = find_slant_project(im_contrast)
+            im_sheared = horizontal_shear(im_contrast, slant_degree)
+            im_aug = im_sheared
+        else:
+            im_aug = get_scaled_image_aug(im, aug_setting[0])
+        im_horizontal_padded = horizontal_pad(im_aug, allowed_lengths)
+        if im_horizontal_padded is None:
             num_fail += 1
             continue
-        data = np.transpose(im_scaled, (1, 0))
+        data = np.transpose(im_horizontal_padded, (1, 0))
         data = np.divide(data, 255.0)
         num_ok += 1
         write_kaldi_matrix(out_fh, data, image_id)
 
-print('Generated features for {} images. Failed for {} (iamge too '
+print('Generated features for {} images. Failed for {} (image too '
       'long).'.format(num_ok, num_fail), file=sys.stderr)
diff --git a/egs/iam/v2/local/prepare_data.sh b/egs/iam/v2/local/prepare_data.sh
index 73d711c73f0..cf729d9a939 100755
--- a/egs/iam/v2/local/prepare_data.sh
+++ b/egs/iam/v2/local/prepare_data.sh
@@ -18,6 +18,7 @@
 
 stage=0
 download_dir=data/download
+process_aachen_split=false
 wellington_dir=
 username=
 password=       # username and password for downloading the IAM database
@@ -53,6 +54,8 @@ ascii_url=http://www.fki.inf.unibe.ch/DBs/iamDB/data/ascii/ascii.tgz
 brown_corpus_url=http://www.sls.hawaii.edu/bley-vroman/brown.txt
 lob_corpus_url=http://ota.ox.ac.uk/text/0167.zip
 wellington_corpus_loc=/export/corpora5/Wellington/WWC/
+aachen_split_url=http://www.openslr.org/resources/56/splits.zip
+aachen_splits=data/local/aachensplits
 mkdir -p $download_dir data/local
 
 # download and extact images and transcription
@@ -144,6 +147,19 @@ else
   echo "$0: Wellington Corpus not included because wellington_dir not provided"
 fi
 
+if [ -d $aachen_splits ]; then
+  echo "$0: Not downloading the Aachen splits as it is already there."
+else
+  if [ ! -f $aachen_splits/splits.zip ]; then
+    echo "$0: Downloading Aachen splits ..."
+    mkdir -p $aachen_splits
+    wget -P $aachen_splits/ $aachen_split_url || exit 1;
+  fi
+  unzip $aachen_splits/splits.zip -d $aachen_splits || exit 1;
+  echo "$0: Done downloading and extracting Aachen splits"
+fi
+
+
 mkdir -p data/{train,test,val}
 file_name=largeWriterIndependentTextLineRecognitionTask
 
@@ -160,11 +176,16 @@ cat $train_old > $train_new
 cat $test_old > $test_new
 cat $val1_old $val2_old > $val_new
 
-if [ $stage -le 0 ]; then
-  local/process_data.py data/local data/train --dataset train || exit 1
-  local/process_data.py data/local data/test --dataset test || exit 1
-  local/process_data.py data/local data/val --dataset validation || exit 1
-
-  utils/utt2spk_to_spk2utt.pl data/train/utt2spk > data/train/spk2utt
-  utils/utt2spk_to_spk2utt.pl data/test/utt2spk > data/test/spk2utt
+if $process_aachen_split; then
+    local/process_aachen_splits.py data/local $aachen_splits/splits data/train --dataset train || exit 1
+    local/process_aachen_splits.py data/local $aachen_splits/splits data/test --dataset test || exit 1
+    local/process_aachen_splits.py data/local $aachen_splits/splits data/val --dataset validation || exit 1
+else
+    local/process_data.py data/local data/train --dataset train || exit 1
+    local/process_data.py data/local data/test --dataset test || exit 1
+    local/process_data.py data/local data/val --dataset validation || exit 1
 fi
+
+image/fix_data_dir.sh data/train
+image/fix_data_dir.sh data/test
+image/fix_data_dir.sh data/val
diff --git a/egs/iam/v2/local/prepend_words.py b/egs/iam/v2/local/prepend_words.py
deleted file mode 100755
index d53eb8974bf..00000000000
--- a/egs/iam/v2/local/prepend_words.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-# This script, prepend '|' to every words in the transcript to mark
-# the beginning of the words for finding the initial-space of every word
-# after decoding.
-
-import sys, io
-
-infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
-output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
-for line in infile:
-    output.write(' '.join(["|" + word for word in line.split()]) + '\n')
diff --git a/egs/iam/v2/local/process_aachen_splits.py b/egs/iam/v2/local/process_aachen_splits.py
new file mode 100755
index 00000000000..cb6a6d4f0d8
--- /dev/null
+++ b/egs/iam/v2/local/process_aachen_splits.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+
+# Copyright      2017  Chun Chieh Chang
+#                2017  Ashish Arora
+
+""" This script reads the extracted IAM database files and creates
+    the following files (for the data subset selected via --dataset):
+    text, utt2spk, images.scp.
+
+  Eg. local/process_aachen_splits.py data/local data/train data --dataset train
+  Eg. text file: 000_a01-000u-00 A MOVE to stop Mr. Gaitskell from
+      utt2spk file: 000_a01-000u-00 000
+      images.scp file: 000_a01-000u-00 data/local/lines/a01/a01-000u/a01-000u-00.png
+"""
+
+import argparse
+import os
+import sys
+import xml.dom.minidom as minidom
+
+parser = argparse.ArgumentParser(description="""Creates text, utt2spk
+                                                and images.scp files.""")
+parser.add_argument('database_path', type=str,
+                    help='Path to the downloaded (and extracted) IAM data')
+parser.add_argument('split_path', type=str,
+                    help='location of the train/test/val set')
+parser.add_argument('out_dir', type=str,
+                    help='location to write output files.')
+parser.add_argument('--dataset', type=str, default='train',
+                    choices=['train', 'test','validation'],
+                    help='Subset of data to process.')
+args = parser.parse_args()
+
+text_file = os.path.join(args.out_dir + '/', 'text')
+text_fh = open(text_file, 'w')
+
+utt2spk_file = os.path.join(args.out_dir + '/', 'utt2spk')
+utt2spk_fh = open(utt2spk_file, 'w')
+
+image_file = os.path.join(args.out_dir + '/', 'images.scp')
+image_fh = open(image_file, 'w')
+
+dataset_path = os.path.join(args.split_path,
+                            args.dataset + '.uttlist')
+
+text_file_path = os.path.join(args.database_path,
+                              'ascii','lines.txt')
+text_dict = {}
+def process_text_file_for_word_model():
+  with open (text_file_path, 'rt') as in_file:
+    for line in in_file:
+      if line[0]=='#':
+        continue
+      line = line.strip()
+      utt_id = line.split(' ')[0]
+      text_vect = line.split(' ')[8:]
+      text = "".join(text_vect)
+      text = text.replace("|", " ")
+      text_dict[utt_id] = text
+
+
+### main ###
+
+print("Processing '{}' data...".format(args.dataset))
+process_text_file_for_word_model()
+
+with open(dataset_path) as f:
+  for line in f:
+    line = line.strip()
+    line_vect = line.split('-')
+    xml_file = line_vect[0] + '-' + line_vect[1]
+    xml_path = os.path.join(args.database_path, 'xml', xml_file + '.xml')
+    doc = minidom.parse(xml_path)
+    form_elements = doc.getElementsByTagName('form')[0]
+    writer_id = form_elements.getAttribute('writer-id')
+    outerfolder = form_elements.getAttribute('id')[0:3]
+    innerfolder = form_elements.getAttribute('id')
+    lines_path = os.path.join(args.database_path, 'lines',
+                              outerfolder, innerfolder)
+    for file in os.listdir(lines_path):
+      if file.endswith(".png"):
+        image_file_path = os.path.join(lines_path, file)
+        base_name = os.path.splitext(os.path.basename(image_file_path))[0]
+        text =  text_dict[base_name]
+        utt_id = writer_id + '_' + base_name
+        text_fh.write(utt_id + ' ' + text + '\n')
+        utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
+        image_fh.write(utt_id + ' ' + image_file_path + '\n')
diff --git a/egs/iam/v2/local/process_data.py b/egs/iam/v2/local/process_data.py
index fa5eb484707..2adae7bf7be 100755
--- a/egs/iam/v2/local/process_data.py
+++ b/egs/iam/v2/local/process_data.py
@@ -67,7 +67,6 @@ def process_text_file_for_word_model():
     xml_path = os.path.join(args.database_path, 'xml', xml_file + '.xml')
     img_num = line[-3:]
     doc = minidom.parse(xml_path)
-
     form_elements = doc.getElementsByTagName('form')[0]
     writer_id = form_elements.getAttribute('writer-id')
     outerfolder = form_elements.getAttribute('id')[0:3]
diff --git a/egs/iam/v2/local/remove_test_utterances_from_lob.py b/egs/iam/v2/local/remove_test_utterances_from_lob.py
index 1b414ef47f6..5e5dac52818 100755
--- a/egs/iam/v2/local/remove_test_utterances_from_lob.py
+++ b/egs/iam/v2/local/remove_test_utterances_from_lob.py
@@ -27,6 +27,8 @@ def remove_punctuations(transcript):
             continue
         if char == '(' or char == ':' or char == ';' or char == '"':
             continue
+        if char == '*':
+            continue
         char_list.append(char)
     return char_list
 
@@ -89,22 +91,45 @@ def read_utterances(text_file_path):
 remaining_utterances = dict()
 for line_id, line_to_find in utterance_dict.items():
     found_line = False
-    for i in range(1, (len(corpus_text_lowercase_wo_sc) - 2)):
-        # Combine 3 consecutive lines of the corpus into a single line
-        prev_words = corpus_text_lowercase_wo_sc[i - 1].strip()
-        curr_words = corpus_text_lowercase_wo_sc[i].strip()
-        next_words = corpus_text_lowercase_wo_sc[i + 1].strip()
-        new_line = prev_words + curr_words + next_words
-        transcript = ''.join(new_line)
-        if line_to_find in transcript:
-            found_line = True
-            row_to_keep[i-1] = False
-            row_to_keep[i] = False
-            row_to_keep[i+1] = False
+    # avoiding very small utterance, it causes removing
+    # complete lob text
+    if len(line_to_find) < 10:
+        remaining_utterances[line_id] = line_to_find
+    else:
+        for i in range(1, (len(corpus_text_lowercase_wo_sc) - 2)):
+            # Combine 3 consecutive lines of the corpus into a single line
+            prev_words = corpus_text_lowercase_wo_sc[i - 1].strip()
+            curr_words = corpus_text_lowercase_wo_sc[i].strip()
+            next_words = corpus_text_lowercase_wo_sc[i + 1].strip()
+            new_line = prev_words + curr_words + next_words
+            transcript = ''.join(new_line)
+            if line_to_find in transcript:
+                found_line = True
+                row_to_keep[i-1] = False
+                row_to_keep[i] = False
+                row_to_keep[i+1] = False
     if not found_line:
         remaining_utterances[line_id] = line_to_find
 
-
+# removing long utterances not found above
+row_to_keep[87530] = False; row_to_keep[87531] = False; row_to_keep[87532] = False;
+row_to_keep[31724] = False; row_to_keep[31725] = False; row_to_keep[31726] = False;
+row_to_keep[16704] = False; row_to_keep[16705] = False; row_to_keep[16706] = False;
+row_to_keep[94181] = False; row_to_keep[94182] = False; row_to_keep[94183] = False;
+row_to_keep[20171] = False; row_to_keep[20172] = False; row_to_keep[20173] = False;
+row_to_keep[16734] = False; row_to_keep[16733] = False; row_to_keep[16732] = False;
+row_to_keep[20576] = False; row_to_keep[20577] = False; row_to_keep[20578] = False;
+row_to_keep[31715] = False; row_to_keep[31716] = False; row_to_keep[31717] = False;
+row_to_keep[31808] = False; row_to_keep[31809] = False; row_to_keep[31810] = False;
+row_to_keep[31822] = False; row_to_keep[31823] = False; row_to_keep[31824] = False;
+row_to_keep[88791] = False; row_to_keep[88792] = False; row_to_keep[88793] = False;
+row_to_keep[31745] = False; row_to_keep[31746] = False; row_to_keep[31825] = False;
+row_to_keep[94256] = False; row_to_keep[94257] = False; row_to_keep[88794] = False;
+row_to_keep[88665] = False; row_to_keep[17093] = False; row_to_keep[17094] = False;
+row_to_keep[20586] = False; row_to_keep[87228] = False; row_to_keep[87229] = False;
+row_to_keep[16744] = False; row_to_keep[87905] = False; row_to_keep[87906] = False;
+row_to_keep[16669] = False; row_to_keep[16670] = False; row_to_keep[16719] = False;
+row_to_keep[87515] = False; row_to_keep[20090] = False; row_to_keep[31748] = False;
 for i in range(len(original_corpus_text)):
     transcript = original_corpus_text[i].strip()
     if row_to_keep[i]:
diff --git a/egs/iam/v2/local/score.sh b/egs/iam/v2/local/score.sh
index b2032909333..1d84815fc69 100755
--- a/egs/iam/v2/local/score.sh
+++ b/egs/iam/v2/local/score.sh
@@ -1,155 +1,6 @@
-#!/bin/bash
-# Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey, Yenda Trmal)
-# Apache 2.0
-
-# This script is like steps/scoring/score_kaldi_wer.sh except it transcribes the <unk>'s
-# using local/unk_arc_post_to_transcription.py and also it calls
-# steps/scoring/score_kaldi_cer.sh at the end.
-
-[ -f ./path.sh ] && . ./path.sh
-
-# begin configuration section.
-cmd=run.pl
-stage=0
-decode_mbr=false
-stats=true
-beam=6
-word_ins_penalty=0.0,0.5,1.0
-min_lmwt=3
-max_lmwt=13
-iter=final
-#end configuration section.
-
-echo "$0 $@"  # Print the command line for logging
-[ -f ./path.sh ] && . ./path.sh
-. parse_options.sh || exit 1;
-
-if [ $# -ne 3 ]; then
-  echo "Usage: $0 [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <decode-dir>"
-  echo " Options:"
-  echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
-  echo "    --stage (0|1|2)                 # start scoring script from part-way through."
-  echo "    --decode_mbr (true/false)       # maximum bayes risk decoding (confusion network)."
-  echo "    --min_lmwt <int>                # minumum LM-weight for lattice rescoring "
-  echo "    --max_lmwt <int>                # maximum LM-weight for lattice rescoring "
-  exit 1;
-fi
-
-data=$1
-lang_or_graph=$2
-dir=$3
-model_path=`echo $dir |xargs dirname`
-symtab=$lang_or_graph/words.txt
-
-for f in $symtab $dir/lat.1.gz $data/text; do
-  [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1;
-done
-
-
-ref_filtering_cmd="cat"
-[ -x local/wer_output_filter ] && ref_filtering_cmd="local/wer_output_filter"
-[ -x local/wer_ref_filter ] && ref_filtering_cmd="local/wer_ref_filter"
-hyp_filtering_cmd="cat"
-[ -x local/wer_output_filter ] && hyp_filtering_cmd="local/wer_output_filter"
-[ -x local/wer_hyp_filter ] && hyp_filtering_cmd="local/wer_hyp_filter"
-
-
-if $decode_mbr ; then
-  echo "$0: scoring with MBR, word insertion penalty=$word_ins_penalty"
-else
-  echo "$0: scoring with word insertion penalty=$word_ins_penalty"
-fi
-
-
-mkdir -p $dir/scoring_kaldi
-cat $data/text | $ref_filtering_cmd > $dir/scoring_kaldi/test_filt.txt || exit 1;
-if [ $stage -le 0 ]; then
-
-  for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
-    mkdir -p $dir/scoring_kaldi/penalty_$wip/log
-
-    if $decode_mbr ; then
-      $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/best_path.LMWT.log \
-        acwt=\`perl -e \"print 1.0/LMWT\"\`\; \
-        lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
-        lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \
-        lattice-prune --beam=$beam ark:- ark:- \| \
-        lattice-mbr-decode  --word-symbol-table=$symtab \
-        ark:- ark,t:- \| \
-        utils/int2sym.pl -f 2- $symtab \| \
-        $hyp_filtering_cmd '>' $dir/scoring_kaldi/penalty_$wip/LMWT.txt || exit 1;
 
-    else
-      $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/best_path.LMWT.log \
-        lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
-        lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \
-        lattice-best-path --word-symbol-table=$symtab ark:- ark,t:- \| \
-        utils/int2sym.pl -f 2- $symtab \| \
-        $hyp_filtering_cmd '>' $dir/scoring_kaldi/penalty_$wip/LMWT.txt || exit 1;
-    fi
-
-    $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/score.LMWT.log \
-      cat $dir/scoring_kaldi/penalty_$wip/LMWT.txt \| \
-      compute-wer --text --mode=present \
-      "ark:cat $dir/scoring_kaldi/test_filt.txt |" ark,p:- ">&" $dir/wer_LMWT_$wip || exit 1;
-
-  done
-fi
-
-
-
-if [ $stage -le 1 ]; then
-
-  for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
-    for lmwt in $(seq $min_lmwt $max_lmwt); do
-      # adding /dev/null to the command list below forces grep to output the filename
-      grep WER $dir/wer_${lmwt}_${wip} /dev/null
-    done
-  done | utils/best_wer.sh  >& $dir/scoring_kaldi/best_wer || exit 1
-
-  best_wer_file=$(awk '{print $NF}' $dir/scoring_kaldi/best_wer)
-  best_wip=$(echo $best_wer_file | awk -F_ '{print $NF}')
-  best_lmwt=$(echo $best_wer_file | awk -F_ '{N=NF-1; print $N}')
-
-  if [ -z "$best_lmwt" ]; then
-    echo "$0: we could not get the details of the best WER from the file $dir/wer_*.  Probably something went wrong."
-    exit 1;
-  fi
-
-  if $stats; then
-    mkdir -p $dir/scoring_kaldi/wer_details
-    echo $best_lmwt > $dir/scoring_kaldi/wer_details/lmwt # record best language model weight
-    echo $best_wip > $dir/scoring_kaldi/wer_details/wip # record best word insertion penalty
-
-    $cmd $dir/scoring_kaldi/log/stats1.log \
-      cat $dir/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt \| \
-      align-text --special-symbol="'***'" ark:$dir/scoring_kaldi/test_filt.txt ark:- ark,t:- \|  \
-      utils/scoring/wer_per_utt_details.pl --special-symbol "'***'" \| tee $dir/scoring_kaldi/wer_details/per_utt \|\
-       utils/scoring/wer_per_spk_details.pl $data/utt2spk \> $dir/scoring_kaldi/wer_details/per_spk || exit 1;
-
-    $cmd $dir/scoring_kaldi/log/stats2.log \
-      cat $dir/scoring_kaldi/wer_details/per_utt \| \
-      utils/scoring/wer_ops_details.pl --special-symbol "'***'" \| \
-      sort -b -i -k 1,1 -k 4,4rn -k 2,2 -k 3,3 \> $dir/scoring_kaldi/wer_details/ops || exit 1;
-
-    $cmd $dir/scoring_kaldi/log/wer_bootci.log \
-      compute-wer-bootci --mode=present \
-        ark:$dir/scoring_kaldi/test_filt.txt ark:$dir/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt \
-        '>' $dir/scoring_kaldi/wer_details/wer_bootci || exit 1;
-
-  fi
-fi
-
-steps/scoring/score_kaldi_cer.sh --cmd "$cmd" --stage 2 --min-lmwt $min_lmwt \
-                                 --max-lmwt $max_lmwt --word-ins-penalty $word_ins_penalty \
-                                 $data $lang_or_graph $dir
+#!/bin/bash
 
-# If we got here, the scoring was successful.
-# As a  small aid to prevent confusion, we remove all wer_{?,??} files;
-# these originate from the previous version of the scoring files
-# i keep both statement here because it could lead to confusion about
-# the capabilities of the script (we don't do cer in the script)
-rm $dir/wer_{?,??} 2>/dev/null
-rm $dir/cer_{?,??} 2>/dev/null
 
-exit 0;
+steps/scoring/score_kaldi_wer.sh "$@"
+steps/scoring/score_kaldi_cer.sh --stage 2 "$@"
diff --git a/egs/iam/v2/local/train_lm.sh b/egs/iam/v2/local/train_lm.sh
index 35eb56b1341..cc0119eb748 100755
--- a/egs/iam/v2/local/train_lm.sh
+++ b/egs/iam/v2/local/train_lm.sh
@@ -64,22 +64,22 @@ if [ $stage -le 0 ]; then
                                                > data/local/lob-train-only.txt
   fi
   cat data/local/lob-train-only.txt | \
-    local/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
+    utils/lang/bpe/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
     | sed 's/@@//g' > ${dir}/data/text/lob.txt
   cat data/local/browncorpus/brown.txt | \
-    local/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
-    | sed 's/@@//g' > ${dir}/data/text/brown.txt
+    utils/lang/bpe/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
+    | sed 's/@@//g' > ${dir}/brown.txt
+  tail -n +5000 ${dir}/brown.txt > ${dir}/data/text/brown.txt
   if [ -d "data/local/wellingtoncorpus" ]; then
     cat data/local/wellingtoncorpus/Wellington_annotation_removed.txt | \
-      local/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
+      utils/lang/bpe/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
       | sed 's/@@//g' > ${dir}/data/text/wellington.txt
   fi
 
   # use the validation data as the dev set.
   # Note: the name 'dev' is treated specially by pocolm, it automatically
   # becomes the dev set.
-
-  cat data/val/text | cut -d " " -f 2-  > ${dir}/data/text/dev.txt
+  head -5000 ${dir}/brown.txt > ${dir}/data/text/dev.txt
 
   # use the training data as an additional data source.
   # we can later fold the dev data into this.
diff --git a/egs/iam/v2/run_end2end.sh b/egs/iam/v2/run_end2end.sh
index de5c7086ec2..c515c85fc72 100755
--- a/egs/iam/v2/run_end2end.sh
+++ b/egs/iam/v2/run_end2end.sh
@@ -6,6 +6,8 @@ stage=0
 nj=20
 username=
 password=
+process_aachen_split=false
+overwrite=false
 # iam_database points to the database path on the JHU grid. If you have not
 # already downloaded the database you can set it to a local directory
 # like "data/download" and follow the instructions
@@ -27,56 +29,96 @@ wellington_database=/export/corpora5/Wellington/WWC/
 ./local/check_tools.sh
 
 if [ $stage -le 0 ]; then
+
+  if [ -f data/train/text ] && ! $overwrite; then
+    echo "$0: Not processing, probably script have run from wrong stage"
+    echo "Exiting with status 1 to avoid data corruption"
+    exit 1;
+  fi
+
   echo "$0: Preparing data..."
   local/prepare_data.sh --download-dir "$iam_database" \
     --wellington-dir "$wellington_database" \
-    --username "$username" --password "$password"
+    --username "$username" --password "$password" \
+    --process_aachen_split $process_aachen_split
 fi
-mkdir -p data/{train,test}/data
 
+mkdir -p data/{train,test}/data
 if [ $stage -le 1 ]; then
-  image/get_image2num_frames.py data/train  # This will be needed for the next command
+  echo "$(date) stage 1: getting allowed image widths for e2e training..."
+  image/get_image2num_frames.py --feat-dim 40 data/train # This will be needed for the next command
   # The next command creates a "allowed_lengths.txt" file in data/train
   # which will be used by local/make_features.py to enforce the images to
   # have allowed lengths. The allowed lengths will be spaced by 10% difference in length.
   image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
-  echo "$0: Preparing the test and train feature files..."
-  for dataset in train test; do
-    local/make_features.py data/$dataset --feat-dim 40 | \
-      copy-feats --compress=true --compression-method=7 \
-                 ark:- ark,scp:data/$dataset/data/images.ark,data/$dataset/feats.scp
-    steps/compute_cmvn_stats.sh data/$dataset
+  echo "$(date) Extracting features, creating feats.scp file"
+  local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/train
+  steps/compute_cmvn_stats.sh data/train || exit 1;
+  for set in val test; do
+    local/extract_features.sh --nj $nj --cmd "$cmd" --augment true \
+    --feat-dim 40 data/${set}
+    steps/compute_cmvn_stats.sh data/${set} || exit 1;
   done
   utils/fix_data_dir.sh data/train
 fi
 
 if [ $stage -le 2 ]; then
+  for set in train; do
+    echo "$(date) stage 2: Performing augmentation, it will double training data"
+    local/augment_data.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set} data/${set}_aug data
+    steps/compute_cmvn_stats.sh data/${set}_aug || exit 1;
+  done
+fi
+
+if [ $stage -le 3 ]; then
   echo "$0: Preparing BPE..."
+  # getting non-silence phones.
   cut -d' ' -f2- data/train/text | \
-    local/prepend_words.py | \
+python3 <(
+cat << "END"
+import os, sys, io;
+infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8');
+output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8');
+phone_dict = dict();
+for line in infile:
+    line_vect = line.strip().split();
+    for word in line_vect:
+        for phone in word:
+            phone_dict[phone] = phone;
+for phone in phone_dict.keys():
+      output.write(phone+ '\n');
+END
+   ) > data/local/phones.txt
+
+  cut -d' ' -f2- data/train/text > data/local/train_data.txt
+  cat data/local/phones.txt data/local/train_data.txt | \
+    utils/lang/bpe/prepend_words.py | \
     utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt
-  for set in test train val; do
+  for set in test train val train_aug; do
     cut -d' ' -f1 data/$set/text > data/$set/ids
     cut -d' ' -f2- data/$set/text | \
-      local/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
+      utils/lang/bpe/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
       | sed 's/@@//g' > data/$set/bpe_text
     mv data/$set/text data/$set/text.old
     paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text
   done
 fi
 
-if [ $stage -le 3 ]; then
+if [ $stage -le 4 ]; then
   echo "$0: Estimating a language model for decoding..."
   local/train_lm.sh
 fi
 
-if [ $stage -le 4 ]; then
+if [ $stage -le 5 ]; then
   echo "$0: Preparing dictionary and lang..."
   local/prepare_dict.sh
   # This recipe uses byte-pair encoding, the silences are part of the words' pronunciations.
   # So we set --sil-prob to 0.0
   utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \
                         data/local/dict "<sil>" data/lang/temp data/lang
+  silphonelist=`cat data/lang/phones/silence.csl`
+  nonsilphonelist=`cat data/lang/phones/nonsilence.csl`
+  local/gen_topo.py 8 4 4 $nonsilphonelist $silphonelist data/lang/phones.txt >data/lang/topo
   utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang
 
   utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_big.arpa.gz \
@@ -85,20 +127,20 @@ if [ $stage -le 4 ]; then
                                data/lang data/lang_rescore_6g
 fi
 
-if [ $stage -le 5 ]; then
+if [ $stage -le 6 ]; then
   echo "$0: Calling the flat-start chain recipe..."
-  local/chain/run_e2e_cnn.sh
+  local/chain/run_e2e_cnn.sh --train_set train_aug
 fi
 
-if [ $stage -le 6 ]; then
+if [ $stage -le 7 ]; then
   echo "$0: Aligning the training data using the e2e chain model..."
   steps/nnet3/align.sh --nj 50 --cmd "$cmd" \
                        --use-gpu false \
                        --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
-                       data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
+                       data/train_aug data/lang exp/chain/e2e_cnn_1b exp/chain/e2e_ali_train
 fi
 
-if [ $stage -le 7 ]; then
+if [ $stage -le 8 ]; then
   echo "$0: Building a tree and training a regular chain model using the e2e alignments..."
-  local/chain/run_cnn_e2eali.sh
+  local/chain/run_cnn_e2eali.sh --train_set train_aug
 fi
diff --git a/egs/uw3/v1/local/unk_arc_post_to_transcription.py b/egs/uw3/v1/local/unk_arc_post_to_transcription.py
index c86d35e4b8a..f8b69820601 100755
--- a/egs/uw3/v1/local/unk_arc_post_to_transcription.py
+++ b/egs/uw3/v1/local/unk_arc_post_to_transcription.py
@@ -1,86 +1,107 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
-# Copyright     2017  Ashish Arora
+#Copyright      2017  Ashish Arora
 
+""" This module will be used by scripts for open vocabulary setup.
+ If the hypothesis transcription contains <unk>, then it will replace the 
+ <unk> with the word predicted by <unk> model by concatenating phones decoded 
+ from the unk-model. It is currently supported only for triphone setup.
+ Args:
+  phones: File name of a file that contains the phones.txt, (symbol-table for phones).
+          phone and phoneID, Eg. a 217, phoneID of 'a' is 217. 
+  words: File name of a file that contains the words.txt, (symbol-table for words). 
+         word and wordID. Eg. ACCOUNTANCY 234, wordID of 'ACCOUNTANCY' is 234.
+  unk: ID of <unk>. Eg. 231.
+  one-best-arc-post: A file in arc-post format, which is a list of timing info and posterior
+               of arcs along the one-best path from the lattice.
+               E.g. 506_m01-049-00 8 12  1 7722  282 272 288 231
+                    <utterance-id> <start-frame> <num-frames> <posterior> <word> [<ali>] 
+                    [<phone1> <phone2>...]
+  output-text: File containing hypothesis transcription with <unk> recognized by the
+               unk-model.
+               E.g. A move to stop mr. gaitskell.
+  
+  Eg. local/unk_arc_post_to_transcription.py lang/phones.txt lang/words.txt 
+      data/lang/oov.int
+"""
 import argparse
+import os
 import sys
-
 parser = argparse.ArgumentParser(description="""uses phones to convert unk to word""")
-parser.add_argument('phones', type=str, help='phones and phonesID')
-parser.add_argument('words', type=str, help='word and wordID')
-parser.add_argument('unk', type=str, default='-', help='location of unk file')
-parser.add_argument('--input-ark', type=str, default='-', help='where to read the input data')
-parser.add_argument('--out-ark', type=str, default='-', help='where to write the output data')
+parser.add_argument('phones', type=str, help='File name of a file that contains the'
+                    'symbol-table for phones. Each line must be: <phone> <phoneID>')
+parser.add_argument('words', type=str, help='File name of a file that contains the'
+                    'symbol-table for words. Each line must be: <word> <word-id>')
+parser.add_argument('unk', type=str, default='-', help='File name of a file that'
+                    'contains the ID of <unk>. The content must be: <oov-id>, e.g. 231')
+parser.add_argument('--one-best-arc-post', type=str, default='-', help='A file in arc-post'
+                    'format, which is a list of timing info and posterior of arcs'
+                    'along the one-best path from the lattice')
+parser.add_argument('--output-text', type=str, default='-', help='File containing'
+                    'hypothesis transcription with <unk> recognized by the unk-model')
 args = parser.parse_args()
+
 ### main ###
-phone_fh = open(args.phones, 'r')
-word_fh = open(args.words, 'r')
-unk_fh = open(args.unk,'r')
-if args.input_ark == '-':
-    input_fh = sys.stdin
+phone_handle = open(args.phones, 'r', encoding='latin-1') # Create file handles 
+word_handle = open(args.words, 'r', encoding='latin-1')
+unk_handle = open(args.unk,'r', encoding='latin-1')
+if args.one_best_arc_post == '-':
+    arc_post_handle = sys.stdin
 else:
-    input_fh = open(args.input_ark,'r')
-if args.out_ark == '-':
-    out_fh = sys.stdout
+    arc_post_handle = open(args.one_best_arc_post, 'r', encoding='latin-1')
+if args.output_text == '-':
+    output_text_handle = sys.stdout
 else:
-    out_fh = open(args.out_ark,'wb')
+    output_text_handle = open(args.output_text, 'w', encoding='latin-1')
 
-phone_dict = dict()# stores phoneID and phone mapping
-phone_data_vect = phone_fh.read().strip().split("\n")
-for key_val in phone_data_vect:
+id2phone = dict() # Stores the mapping from phone_id (int) to phone (char)
+phones_data = phone_handle.read().strip().split("\n")
+
+for key_val in phones_data:
   key_val = key_val.split(" ")
-  phone_dict[key_val[1]] = key_val[0]
+  id2phone[key_val[1]] = key_val[0]
+
 word_dict = dict()
-word_data_vect = word_fh.read().strip().split("\n")
+word_data_vect = word_handle.read().strip().split("\n")
+
 for key_val in word_data_vect:
   key_val = key_val.split(" ")
   word_dict[key_val[1]] = key_val[0]
-unk_val = unk_fh.read().strip().split(" ")[0]
+unk_val = unk_handle.read().strip().split(" ")[0]
 
-utt_word_dict = dict()
-utt_phone_dict = dict()# stores utteranceID and phoneID
-unk_word_dict = dict()
-count=0
-for line in input_fh:
+utt_word_dict = dict() # Dict of list, stores mapping from utteranceID(int) to words(str)
+for line in arc_post_handle:
   line_vect = line.strip().split("\t")
-  if len(line_vect) < 6:
-    print "IndexError"
-    print line_vect
+  if len(line_vect) < 6: # Check for 1best-arc-post output
+    print("Error: Bad line: '{}'   Expecting 6 fields. Skipping...".format(line),
+          file=sys.stderr)
     continue
-  uttID = line_vect[0]
+  utt_id = line_vect[0]
   word = line_vect[4]
   phones = line_vect[5]
-  if uttID in utt_word_dict.keys():
-    utt_word_dict[uttID][count] = word
-    utt_phone_dict[uttID][count] = phones
-  else:
-    count = 0
-    utt_word_dict[uttID] = dict()
-    utt_phone_dict[uttID] = dict()
-    utt_word_dict[uttID][count] = word
-    utt_phone_dict[uttID][count] = phones
-  if word == unk_val: # get character sequence for unk
-    phone_key_vect = phones.split(" ")
-    phone_val_vect = list()
-    for pkey in phone_key_vect:
-      phone_val_vect.append(phone_dict[pkey])
+  if utt_id not in list(utt_word_dict.keys()):
+    utt_word_dict[utt_id] = list()
+
+  if word == unk_val: # Get the 1best phone sequence given by the unk-model
+    phone_id_seq = phones.split(" ")
+    phone_seq = list()
+    for pkey in phone_id_seq:
+      phone_seq.append(id2phone[pkey]) # Convert the phone-id sequence to a phone sequence.
     phone_2_word = list()
-    for phone_val in phone_val_vect:
-      phone_2_word.append(phone_val.split('_')[0])
-    phone_2_word = ''.join(phone_2_word)
-    utt_word_dict[uttID][count] = phone_2_word
+    for phone_val in phone_seq:
+      phone_2_word.append(phone_val.split('_')[0]) # Removing the world-position markers(e.g. _B)
+    phone_2_word = ''.join(phone_2_word) # Concatnate phone sequence
+    utt_word_dict[utt_id].append(phone_2_word) # Store word from unk-model
   else:
-    if word == '0':
+    if word == '0': # Store space/silence
       word_val = ' '
     else:
       word_val = word_dict[word]
-    utt_word_dict[uttID][count] = word_val
-  count += 1
+    utt_word_dict[utt_id].append(word_val) # Store word from 1best-arc-post
 
-transcription = ""
-for key in sorted(utt_word_dict.iterkeys()):
-  transcription = key
-  for index in sorted(utt_word_dict[key].iterkeys()):
-    value = utt_word_dict[key][index]
-    transcription = transcription + " " + value
-  out_fh.write(transcription + '\n')
+transcription = "" # Output transcription
+for utt_key in sorted(utt_word_dict.keys()):
+  transcription = utt_key
+  for word in utt_word_dict[utt_key]:
+    transcription = transcription + " " + word
+  output_text_handle.write(transcription + '\n')