Adding the reverberation script to AMI (kaldi-asr#1178)

Added reverberation based data augmentation recipe for AMI. Gives gains in IHM, SDM and MDM settings. (TDNN + Chain recipe checked in).
hprovenza · Nov 12, 2016 · a2f5464 · a2f5464
1 parent 49eb733
commit a2f5464
Show file tree

Hide file tree

Showing 5 changed files with 553 additions and 2 deletions.
diff --git a/egs/ami/s5b/RESULTS_ihm b/egs/ami/s5b/RESULTS_ihm
@@ -62,3 +62,10 @@
 for d in exp/ihm/chain/tdnn_sp_bi/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done
 %WER 22.4 | 13098 94476 | 80.4 10.4 9.2 2.8 22.4 54.6 | 0.069 | exp/ihm/chain/tdnn_sp_bi/decode_dev/ascore_10/dev_hires.ctm.filt.sys
 %WER 22.5 | 12643 89974 | 80.0 12.1 7.9 2.6 22.5 52.8 | 0.157 | exp/ihm/chain/tdnn_sp_bi/decode_eval/ascore_10/eval_hires.ctm.filt.sys
+
+# local/chain/multi_condition/run_tdnn.sh --mic ihm &
+# cleanup + chain TDNN model + IHM reverberated data
+# for d in exp/ihm/chain_cleaned_rvb/tdnn_sp_bi/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done
+%WER 21.5 | 13098 94486 | 81.8 11.0 7.2 3.3 21.5 54.6 | 0.090 | exp/ihm/chain_cleaned_rvb/tdnn_sp_rvb_bi/decode_dev/ascore_10/dev_hires.ctm.filt.sys
+%WER 21.9 | 12643 89985 | 80.8 12.3 6.9 2.7 21.9 52.5 | 0.183 | exp/ihm/chain_cleaned_rvb/tdnn_sp_rvb_bi/decode_eval/ascore_10/eval_hires.ctm.filt.sys
+
diff --git a/egs/ami/s5b/RESULTS_mdm b/egs/ami/s5b/RESULTS_mdm
@@ -65,7 +65,6 @@
 
 # cleanup + chain TDNN model, alignments from IHM data (IHM alignments help).
 # local/chain/run_tdnn.sh --mic mdm8 --use-ihm-ali true --stage 12 &
-# *** best system ***
 # for d in exp/mdm8/chain_cleaned/tdnn_sp_bi_ihmali/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done
 %WER 37.4 | 15286 94509 | 66.6 18.0 15.5 3.9 37.4 62.8 | 0.624 | exp/mdm8/chain_cleaned/tdnn_sp_bi_ihmali/decode_dev/ascore_9/dev_hires_o4.ctm.filt.sys
 %WER 40.6 | 13381 89982 | 62.7 18.9 18.3 3.3 40.6 67.6 | 0.594 | exp/mdm8/chain_cleaned/tdnn_sp_bi_ihmali/decode_eval/ascore_9/eval_hires_o4.ctm.filt.sys
@@ -78,4 +77,10 @@
 %WER 37.9 | 15635 94514 | 66.5 19.1 14.4 4.4 37.9 61.2 | 0.646 | exp/mdm8/chain/tdnn_sp_bi_ihmali/decode_dev/ascore_8/dev_hires_o4.ctm.filt.sys
 %WER 41.5 | 13884 89975 | 62.3 20.3 17.4 3.8 41.5 66.0 | 0.621 | exp/mdm8/chain/tdnn_sp_bi_ihmali/decode_eval/ascore_8/eval_hires_o4.ctm.filt.sys
 
+# local/chain/multi_condition/run_tdnn.sh --mic mdm8 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned
+# cleanup + chain TDNN model, MDM original + IHM reverberated data, alignments from IHM data
+# *** best system ***
+# for d in exp/mdm8/chain_cleaned_rvb/tdnn_sp_rvb_bi_ihmali/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done
+%WER 35.8 | 14512 94498 | 68.2 17.2 14.6 4.0 35.8 64.9 | 0.632 | exp/mdm8/chain_cleaned_rvb/tdnn_sp_rvb_bi_ihmali/decode_dev/ascore_9/dev_hires_o4.ctm.filt.sys
+%WER 39.1 | 13651 89967 | 64.3 18.4 17.3 3.3 39.1 65.2 | 0.607 | exp/mdm8/chain_cleaned_rvb/tdnn_sp_rvb_bi_ihmali/decode_eval/ascore_9/eval_hires_o4.ctm.filt.sys
 
diff --git a/egs/ami/s5b/RESULTS_sdm b/egs/ami/s5b/RESULTS_sdm
@@ -62,7 +62,6 @@
 # cleanup + chain TDNN model, alignments from IHM data (IHM alignments help).
 # local/chain/run_tdnn.sh --mic sdm1 --use-ihm-ali true --stage 12 &
 # cleanup + chain TDNN model, cleaned data and alignments from ihm data.
-# *** best system ***
 # for d in exp/sdm1/chain_cleaned/tdnn_sp_bi_ihmali/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done
 %WER 40.7 | 14321 94501 | 63.0 19.6 17.4 3.7 40.7 67.7 | 0.592 | exp/sdm1/chain_cleaned/tdnn_sp_bi_ihmali/decode_dev/ascore_9/dev_hires_o4.ctm.filt.sys
 %WER 44.8 | 14293 89976 | 58.6 21.3 20.1 3.3 44.8 64.2 | 0.559 | exp/sdm1/chain_cleaned/tdnn_sp_bi_ihmali/decode_eval/ascore_9/eval_hires_o4.ctm.filt.sys
@@ -75,4 +74,12 @@
 %WER 40.7 | 14549 94520 | 63.6 21.4 15.0 4.3 40.7 66.2 | 0.617 | exp/sdm1/chain/tdnn_sp_bi_ihmali/decode_dev/ascore_8/dev_hires_o4.ctm.filt.sys
 %WER 45.1 | 13296 89971 | 59.1 23.4 17.6 4.2 45.1 69.5 | 0.591 | exp/sdm1/chain/tdnn_sp_bi_ihmali/decode_eval/ascore_8/eval_hires_o4.ctm.filt.sys
 
+# local/chain/multi_condition/run_tdnn.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned &
+# cleanup + chain TDNN model, SDM original + IHM reverberated data, alignments from ihm data.
+# *** best system ***
+# for d in exp/sdm1/chain_cleaned_rvb/tdnn_sp_rvb_bi_ihmali/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done
+%WER 38.6 | 14760 94502 | 65.3 19.3 15.4 3.9 38.6 64.9 | 0.599 | exp/sdm1/chain_cleaned_rvb/tdnn_sp_rvb_bi_ihmali/decode_dev/ascore_9/dev_hires_o4.ctm.filt.sys
+%WER 42.7 | 14070 89982 | 60.9 21.0 18.0 3.6 42.7 64.5 | 0.571 | exp/sdm1/chain_cleaned_rvb/tdnn_sp_rvb_bi_ihmali/decode_eval/ascore_9/eval_hires_o4.ctm.filt.sys
+
+
 
diff --git a/egs/ami/s5b/local/chain/multi_condition/run_tdnn.sh b/egs/ami/s5b/local/chain/multi_condition/run_tdnn.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+
+# This is a chain-training script with TDNN neural networks.
+# This script is based on local/chain/run_tdnn.sh, but adding
+# the reverberated IHM data into the train set.
+# This script obtains better results on both IHM and SDM tasks.
+
+# Please see RESULTS_* for examples of command lines invoking this script.
+
+# local/chain/multi_condition/run_tdnn.sh --mic ihm --train-set train_cleaned --gmm tri3_cleaned &
+# local/chain/multi_condition/run_tdnn.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned &
+# local/chain/multi_condition/run_tdnn.sh --mic mdm8 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned &
+
+
+set -e -o pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=1
+mic=ihm
+nj=30
+min_seg_len=1.55
+use_ihm_ali=true
+train_set=train_cleaned
+gmm=tri3_cleaned # the gmm for the target data
+ihm_gmm=tri3_cleaned # the gmm for the IHM system (if --use-ihm-ali true).
+num_threads_ubm=32
+num_data_reps=1
+
+# The rest are configs specific to this script. Most of the parameters
+# are just hardcoded at this level, in the commands below.
+train_stage=-10
+tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration.
+tdnn_affix= #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration.
+common_egs_dir= # you can set this to use previously dumped egs.
+
+# End configuration section.
+echo "$0 $@" # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! $use_ihm_ali; then
+ [ "$mic" != "ihm" ] && \
+ echo "$0: you cannot specify --use-ihm-ali false if the microphone is not ihm." && \
+ exit 1;
+else
+ [ "$mic" == "ihm" ] && \
+ echo "$0: you must specify --use-ihm-ali false if the microphone is ihm." && \
+ exit 1;
+fi
+
+if ! cuda-compiled; then
+ cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+nnet3_affix=_cleaned
+rvb_affix=_rvb
+
+
+if $use_ihm_ali; then
+ gmm_dir=exp/ihm/${ihm_gmm}
+ ali_dir=exp/${mic}/${ihm_gmm}_ali_${train_set}_sp_comb_ihmdata
+ lores_train_data_dir=data/$mic/${train_set}_ihmdata_sp_comb
+ tree_dir=exp/$mic/chain${nnet3_affix}/tree_bi${tree_affix}_ihmdata
+ original_lat_dir=exp/$mic/chain${nnet3_affix}/${ihm_gmm}_${train_set}_sp_comb_lats_ihmdata
+ lat_dir=exp/$mic/chain${nnet3_affix}${rvb_affix}/${ihm_gmm}_${train_set}_sp${rvb_affix}_comb_lats_ihmdata
+ dir=exp/$mic/chain${nnet3_affix}${rvb_affix}/tdnn${tdnn_affix}_sp${rvb_affix}_bi_ihmali
+ # note: the distinction between when we use the 'ihmdata' suffix versus
+ # 'ihmali' is pretty arbitrary.
+else
+ gmm_dir=exp/${mic}/$gmm
+ ali_dir=exp/${mic}/${gmm}_ali_${train_set}_sp_comb
+ lores_train_data_dir=data/$mic/${train_set}_sp_comb
+ tree_dir=exp/$mic/chain${nnet3_affix}/tree_bi${tree_affix}
+ original_lat_dir=exp/$mic/chain${nnet3_affix}/${gmm}_${train_set}_sp_comb_lats
+ lat_dir=exp/$mic/chain${nnet3_affix}${rvb_affix}/${gmm}_${train_set}_sp${rvb_affix}_comb_lats
+ dir=exp/$mic/chain${nnet3_affix}${rvb_affix}/tdnn${tdnn_affix}_sp${rvb_affix}_bi
+fi
+
+
+local/nnet3/multi_condition/run_ivector_common.sh --stage $stage \
+ --mic $mic \
+ --nj $nj \
+ --min-seg-len $min_seg_len \
+ --train-set $train_set \
+ --gmm $gmm \
+ --num-threads-ubm $num_threads_ubm \
+ --num-data-reps $num_data_reps \
+ --nnet3-affix "$nnet3_affix"
+
+
+# Note: the first stage of the following script is stage 8.
+local/nnet3/prepare_lores_feats.sh --stage $stage \
+ --mic $mic \
+ --nj $nj \
+ --min-seg-len $min_seg_len \
+ --use-ihm-ali $use_ihm_ali \
+ --train-set $train_set
+
+
+train_data_dir=data/$mic/${train_set}_sp${rvb_affix}_hires_comb
+train_ivector_dir=exp/$mic/nnet3${nnet3_affix}${rvb_affix}/ivectors_${train_set}_sp${rvb_affix}_hires_comb
+final_lm=`cat data/local/lm/final_lm`
+LM=$final_lm.pr1-7
+
+
+for f in $gmm_dir/final.mdl $lores_train_data_dir/feats.scp \
+ $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp; do
+ [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+
+if [ $stage -le 11 ]; then
+ if [ -f $ali_dir/ali.1.gz ]; then
+ echo "$0: alignments in $ali_dir appear to already exist. Please either remove them "
+ echo " ... or use a later --stage option."
+ exit 1
+ fi
+ echo "$0: aligning perturbed, short-segment-combined ${maybe_ihm}data"
+ steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \
+ ${lores_train_data_dir} data/lang $gmm_dir $ali_dir
+fi
+
+[ ! -f $ali_dir/ali.1.gz ] && echo "$0: expected $ali_dir/ali.1.gz to exist" && exit 1
+
+if [ $stage -le 12 ]; then
+ echo "$0: creating lang directory with one state per phone."
+ # Create a version of the lang/ directory that has one state per phone in the
+ # topo file. [note, it really has two states.. the first one is only repeated
+ # once, the second one has zero or more repeats.]
+ if [ -d data/lang_chain ]; then
+ if [ data/lang_chain/L.fst -nt data/lang/L.fst ]; then
+ echo "$0: data/lang_chain already exists, not overwriting it; continuing"
+ else
+ echo "$0: data/lang_chain already exists and seems to be older than data/lang..."
+ echo " ... not sure what to do. Exiting."
+ exit 1;
+ fi
+ else
+ cp -r data/lang data/lang_chain
+ silphonelist=$(cat data/lang_chain/phones/silence.csl) || exit 1;
+ nonsilphonelist=$(cat data/lang_chain/phones/nonsilence.csl) || exit 1;
+ # Use our special topology... note that later on may have to tune this
+ # topology.
+ steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >data/lang_chain/topo
+ fi
+fi
+
+if [ $stage -le 13 ]; then
+ # Get the alignments as lattices (gives the chain training more freedom).
+ # use the same num-jobs as the alignments
+ steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \
+ data/lang $gmm_dir $original_lat_dir
+ rm $original_lat_dir/fsts.*.gz # save space
+
+ lat_dir_ihmdata=exp/ihm/chain${nnet3_affix}/${gmm}_${train_set}_sp_comb_lats
+
+ mkdir -p $lat_dir/temp/
+ mkdir -p $lat_dir/temp2/
+ lattice-copy "ark:gunzip -c $original_lat_dir/lat.*.gz |" ark,scp:$lat_dir/temp/lats.ark,$lat_dir/temp/lats.scp
+ lattice-copy "ark:gunzip -c $lat_dir_ihmdata/lat.*.gz |" ark,scp:$lat_dir/temp2/lats.ark,$lat_dir/temp2/lats.scp
+
+ # copy the lattices for the reverberated data
+ rm -f $lat_dir/temp/combined_lats.scp
+ touch $lat_dir/temp/combined_lats.scp
+ cat $lat_dir/temp/lats.scp >> $lat_dir/temp/combined_lats.scp
+ for i in `seq 1 $num_data_reps`; do
+ cat $lat_dir/temp2/lats.scp | sed -e "s/^/rev${i}_/" >> $lat_dir/temp/combined_lats.scp
+ done
+ sort -u $lat_dir/temp/combined_lats.scp > $lat_dir/temp/combined_lats_sorted.scp
+
+ lattice-copy scp:$lat_dir/temp/combined_lats_sorted.scp "ark:|gzip -c >$lat_dir/lat.1.gz" || exit 1;
+ echo "1" > $lat_dir/num_jobs
+
+ # copy other files from original lattice dir
+ for f in cmvn_opts final.mdl splice_opts tree; do
+ cp $original_lat_dir/$f $lat_dir/$f
+ done
+fi
+
+
+if [ $stage -le 14 ]; then
+ # Build a tree using our new topology. We know we have alignments for the
+ # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+ # those.
+ if [ -f $tree_dir/final.mdl ]; then
+ echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+ exit 1;
+ fi
+ steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
+ --context-opts "--context-width=2 --central-position=1" \
+ --leftmost-questions-truncate -1 \
+ --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir
+fi
+
+if [ $stage -le 15 ]; then
+ mkdir -p $dir
+
+ echo "$0: creating neural net configs";
+
+ steps/nnet3/tdnn/make_configs.py \
+ --self-repair-scale-nonlinearity 0.00001 \
+ --feat-dir data/$mic/${train_set}_sp_hires_comb \
+ --ivector-dir $train_ivector_dir \
+ --tree-dir $tree_dir \
+ --relu-dim 450 \
+ --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \
+ --use-presoftmax-prior-scale false \
+ --xent-regularize 0.1 \
+ --xent-separate-forward-affine true \
+ --include-log-softmax false \
+ --final-layer-normalize-target 1.0 \
+ $dir/configs || exit 1;
+fi
+
+if [ $stage -le 16 ]; then
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+ utils/create_split_dir.pl \
+ /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-rvb$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage
+ fi
+
+ touch $dir/egs/.nodelete # keep egs around when that run dies.
+
+ steps/nnet3/chain/train.py --stage $train_stage \
+ --cmd "$decode_cmd" \
+ --feat.online-ivector-dir $train_ivector_dir \
+ --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+ --chain.xent-regularize 0.1 \
+ --chain.leaky-hmm-coefficient 0.1 \
+ --chain.l2-regularize 0.00005 \
+ --chain.apply-deriv-weights false \
+ --chain.lm-opts="--num-extra-lm-states=2000" \
+ --egs.dir "$common_egs_dir" \
+ --egs.opts "--frames-overlap-per-eg 0" \
+ --egs.chunk-width 150 \
+ --trainer.num-chunk-per-minibatch 128 \
+ --trainer.frames-per-iter 1500000 \
+ --trainer.num-epochs 4 \
+ --trainer.optimization.num-jobs-initial 2 \
+ --trainer.optimization.num-jobs-final 12 \
+ --trainer.optimization.initial-effective-lrate 0.001 \
+ --trainer.optimization.final-effective-lrate 0.0001 \
+ --trainer.max-param-change 2.0 \
+ --cleanup.remove-egs true \
+ --feat-dir $train_data_dir \
+ --tree-dir $tree_dir \
+ --lat-dir $lat_dir \
+ --dir $dir
+fi
+
+
+graph_dir=$dir/graph_${LM}
+if [ $stage -le 17 ]; then
+ # Note: it might appear that this data/lang_chain directory is mismatched, and it is as
+ # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+ # the lang directory.
+ utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir
+fi
+
+if [ $stage -le 18 ]; then
+ rm $dir/.error 2>/dev/null || true
+ for decode_set in dev eval; do
+ (
+ steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+ --nj $nj --cmd "$decode_cmd" \
+ --online-ivector-dir exp/$mic/nnet3${nnet3_affix}${rvb_affix}/ivectors_${decode_set}_hires \
+ --scoring-opts "--min-lmwt 5 " \
+ $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1;
+ ) || touch $dir/.error &
+ done
+ wait
+ if [ -f $dir/.error ]; then
+ echo "$0: something went wrong in decoding"
+ exit 1
+ fi
+fi
+exit 0