Skip to content

Commit

Permalink
Adding the reverberation script to AMI (kaldi-asr#1178)
Browse files Browse the repository at this point in the history
Added reverberation based data augmentation recipe for AMI. Gives gains in IHM, SDM and MDM settings. (TDNN + Chain recipe checked in).
  • Loading branch information
tomkocse authored and vijayaditya committed Nov 12, 2016
1 parent 49eb733 commit a2f5464
Show file tree
Hide file tree
Showing 5 changed files with 553 additions and 2 deletions.
7 changes: 7 additions & 0 deletions egs/ami/s5b/RESULTS_ihm
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,10 @@
for d in exp/ihm/chain/tdnn_sp_bi/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done
%WER 22.4 | 13098 94476 | 80.4 10.4 9.2 2.8 22.4 54.6 | 0.069 | exp/ihm/chain/tdnn_sp_bi/decode_dev/ascore_10/dev_hires.ctm.filt.sys
%WER 22.5 | 12643 89974 | 80.0 12.1 7.9 2.6 22.5 52.8 | 0.157 | exp/ihm/chain/tdnn_sp_bi/decode_eval/ascore_10/eval_hires.ctm.filt.sys

# local/chain/multi_condition/run_tdnn.sh --mic ihm &
# cleanup + chain TDNN model + IHM reverberated data
# for d in exp/ihm/chain_cleaned_rvb/tdnn_sp_bi/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done
%WER 21.5 | 13098 94486 | 81.8 11.0 7.2 3.3 21.5 54.6 | 0.090 | exp/ihm/chain_cleaned_rvb/tdnn_sp_rvb_bi/decode_dev/ascore_10/dev_hires.ctm.filt.sys
%WER 21.9 | 12643 89985 | 80.8 12.3 6.9 2.7 21.9 52.5 | 0.183 | exp/ihm/chain_cleaned_rvb/tdnn_sp_rvb_bi/decode_eval/ascore_10/eval_hires.ctm.filt.sys

7 changes: 6 additions & 1 deletion egs/ami/s5b/RESULTS_mdm
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@

# cleanup + chain TDNN model, alignments from IHM data (IHM alignments help).
# local/chain/run_tdnn.sh --mic mdm8 --use-ihm-ali true --stage 12 &
# *** best system ***
# for d in exp/mdm8/chain_cleaned/tdnn_sp_bi_ihmali/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done
%WER 37.4 | 15286 94509 | 66.6 18.0 15.5 3.9 37.4 62.8 | 0.624 | exp/mdm8/chain_cleaned/tdnn_sp_bi_ihmali/decode_dev/ascore_9/dev_hires_o4.ctm.filt.sys
%WER 40.6 | 13381 89982 | 62.7 18.9 18.3 3.3 40.6 67.6 | 0.594 | exp/mdm8/chain_cleaned/tdnn_sp_bi_ihmali/decode_eval/ascore_9/eval_hires_o4.ctm.filt.sys
Expand All @@ -78,4 +77,10 @@
%WER 37.9 | 15635 94514 | 66.5 19.1 14.4 4.4 37.9 61.2 | 0.646 | exp/mdm8/chain/tdnn_sp_bi_ihmali/decode_dev/ascore_8/dev_hires_o4.ctm.filt.sys
%WER 41.5 | 13884 89975 | 62.3 20.3 17.4 3.8 41.5 66.0 | 0.621 | exp/mdm8/chain/tdnn_sp_bi_ihmali/decode_eval/ascore_8/eval_hires_o4.ctm.filt.sys

# local/chain/multi_condition/run_tdnn.sh --mic mdm8 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned
# cleanup + chain TDNN model, MDM original + IHM reverberated data, alignments from IHM data
# *** best system ***
# for d in exp/mdm8/chain_cleaned_rvb/tdnn_sp_rvb_bi_ihmali/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done
%WER 35.8 | 14512 94498 | 68.2 17.2 14.6 4.0 35.8 64.9 | 0.632 | exp/mdm8/chain_cleaned_rvb/tdnn_sp_rvb_bi_ihmali/decode_dev/ascore_9/dev_hires_o4.ctm.filt.sys
%WER 39.1 | 13651 89967 | 64.3 18.4 17.3 3.3 39.1 65.2 | 0.607 | exp/mdm8/chain_cleaned_rvb/tdnn_sp_rvb_bi_ihmali/decode_eval/ascore_9/eval_hires_o4.ctm.filt.sys

9 changes: 8 additions & 1 deletion egs/ami/s5b/RESULTS_sdm
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@
# cleanup + chain TDNN model, alignments from IHM data (IHM alignments help).
# local/chain/run_tdnn.sh --mic sdm1 --use-ihm-ali true --stage 12 &
# cleanup + chain TDNN model, cleaned data and alignments from ihm data.
# *** best system ***
# for d in exp/sdm1/chain_cleaned/tdnn_sp_bi_ihmali/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done
%WER 40.7 | 14321 94501 | 63.0 19.6 17.4 3.7 40.7 67.7 | 0.592 | exp/sdm1/chain_cleaned/tdnn_sp_bi_ihmali/decode_dev/ascore_9/dev_hires_o4.ctm.filt.sys
%WER 44.8 | 14293 89976 | 58.6 21.3 20.1 3.3 44.8 64.2 | 0.559 | exp/sdm1/chain_cleaned/tdnn_sp_bi_ihmali/decode_eval/ascore_9/eval_hires_o4.ctm.filt.sys
Expand All @@ -75,4 +74,12 @@
%WER 40.7 | 14549 94520 | 63.6 21.4 15.0 4.3 40.7 66.2 | 0.617 | exp/sdm1/chain/tdnn_sp_bi_ihmali/decode_dev/ascore_8/dev_hires_o4.ctm.filt.sys
%WER 45.1 | 13296 89971 | 59.1 23.4 17.6 4.2 45.1 69.5 | 0.591 | exp/sdm1/chain/tdnn_sp_bi_ihmali/decode_eval/ascore_8/eval_hires_o4.ctm.filt.sys

# local/chain/multi_condition/run_tdnn.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned &
# cleanup + chain TDNN model, SDM original + IHM reverberated data, alignments from ihm data.
# *** best system ***
# for d in exp/sdm1/chain_cleaned_rvb/tdnn_sp_rvb_bi_ihmali/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done
%WER 38.6 | 14760 94502 | 65.3 19.3 15.4 3.9 38.6 64.9 | 0.599 | exp/sdm1/chain_cleaned_rvb/tdnn_sp_rvb_bi_ihmali/decode_dev/ascore_9/dev_hires_o4.ctm.filt.sys
%WER 42.7 | 14070 89982 | 60.9 21.0 18.0 3.6 42.7 64.5 | 0.571 | exp/sdm1/chain_cleaned_rvb/tdnn_sp_rvb_bi_ihmali/decode_eval/ascore_9/eval_hires_o4.ctm.filt.sys



283 changes: 283 additions & 0 deletions egs/ami/s5b/local/chain/multi_condition/run_tdnn.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,283 @@
#!/bin/bash

# This is a chain-training script with TDNN neural networks.
# This script is based on local/chain/run_tdnn.sh, but adding
# the reverberated IHM data into the train set.
# This script obtains better results on both IHM and SDM tasks.

# Please see RESULTS_* for examples of command lines invoking this script.

# local/chain/multi_condition/run_tdnn.sh --mic ihm --train-set train_cleaned --gmm tri3_cleaned &
# local/chain/multi_condition/run_tdnn.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned &
# local/chain/multi_condition/run_tdnn.sh --mic mdm8 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned &


set -e -o pipefail

# First the options that are passed through to run_ivector_common.sh
# (some of which are also used in this script directly).
stage=1
mic=ihm
nj=30
min_seg_len=1.55
use_ihm_ali=true
train_set=train_cleaned
gmm=tri3_cleaned # the gmm for the target data
ihm_gmm=tri3_cleaned # the gmm for the IHM system (if --use-ihm-ali true).
num_threads_ubm=32
num_data_reps=1

# The rest are configs specific to this script. Most of the parameters
# are just hardcoded at this level, in the commands below.
train_stage=-10
tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration.
tdnn_affix= #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration.
common_egs_dir= # you can set this to use previously dumped egs.

# End configuration section.
echo "$0 $@" # Print the command line for logging

. cmd.sh
. ./path.sh
. ./utils/parse_options.sh

if ! $use_ihm_ali; then
[ "$mic" != "ihm" ] && \
echo "$0: you cannot specify --use-ihm-ali false if the microphone is not ihm." && \
exit 1;
else
[ "$mic" == "ihm" ] && \
echo "$0: you must specify --use-ihm-ali false if the microphone is ihm." && \
exit 1;
fi

if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
fi

nnet3_affix=_cleaned
rvb_affix=_rvb


if $use_ihm_ali; then
gmm_dir=exp/ihm/${ihm_gmm}
ali_dir=exp/${mic}/${ihm_gmm}_ali_${train_set}_sp_comb_ihmdata
lores_train_data_dir=data/$mic/${train_set}_ihmdata_sp_comb
tree_dir=exp/$mic/chain${nnet3_affix}/tree_bi${tree_affix}_ihmdata
original_lat_dir=exp/$mic/chain${nnet3_affix}/${ihm_gmm}_${train_set}_sp_comb_lats_ihmdata
lat_dir=exp/$mic/chain${nnet3_affix}${rvb_affix}/${ihm_gmm}_${train_set}_sp${rvb_affix}_comb_lats_ihmdata
dir=exp/$mic/chain${nnet3_affix}${rvb_affix}/tdnn${tdnn_affix}_sp${rvb_affix}_bi_ihmali
# note: the distinction between when we use the 'ihmdata' suffix versus
# 'ihmali' is pretty arbitrary.
else
gmm_dir=exp/${mic}/$gmm
ali_dir=exp/${mic}/${gmm}_ali_${train_set}_sp_comb
lores_train_data_dir=data/$mic/${train_set}_sp_comb
tree_dir=exp/$mic/chain${nnet3_affix}/tree_bi${tree_affix}
original_lat_dir=exp/$mic/chain${nnet3_affix}/${gmm}_${train_set}_sp_comb_lats
lat_dir=exp/$mic/chain${nnet3_affix}${rvb_affix}/${gmm}_${train_set}_sp${rvb_affix}_comb_lats
dir=exp/$mic/chain${nnet3_affix}${rvb_affix}/tdnn${tdnn_affix}_sp${rvb_affix}_bi
fi


local/nnet3/multi_condition/run_ivector_common.sh --stage $stage \
--mic $mic \
--nj $nj \
--min-seg-len $min_seg_len \
--train-set $train_set \
--gmm $gmm \
--num-threads-ubm $num_threads_ubm \
--num-data-reps $num_data_reps \
--nnet3-affix "$nnet3_affix"


# Note: the first stage of the following script is stage 8.
local/nnet3/prepare_lores_feats.sh --stage $stage \
--mic $mic \
--nj $nj \
--min-seg-len $min_seg_len \
--use-ihm-ali $use_ihm_ali \
--train-set $train_set


train_data_dir=data/$mic/${train_set}_sp${rvb_affix}_hires_comb
train_ivector_dir=exp/$mic/nnet3${nnet3_affix}${rvb_affix}/ivectors_${train_set}_sp${rvb_affix}_hires_comb
final_lm=`cat data/local/lm/final_lm`
LM=$final_lm.pr1-7


for f in $gmm_dir/final.mdl $lores_train_data_dir/feats.scp \
$train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp; do
[ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
done


if [ $stage -le 11 ]; then
if [ -f $ali_dir/ali.1.gz ]; then
echo "$0: alignments in $ali_dir appear to already exist. Please either remove them "
echo " ... or use a later --stage option."
exit 1
fi
echo "$0: aligning perturbed, short-segment-combined ${maybe_ihm}data"
steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \
${lores_train_data_dir} data/lang $gmm_dir $ali_dir
fi

[ ! -f $ali_dir/ali.1.gz ] && echo "$0: expected $ali_dir/ali.1.gz to exist" && exit 1

if [ $stage -le 12 ]; then
echo "$0: creating lang directory with one state per phone."
# Create a version of the lang/ directory that has one state per phone in the
# topo file. [note, it really has two states.. the first one is only repeated
# once, the second one has zero or more repeats.]
if [ -d data/lang_chain ]; then
if [ data/lang_chain/L.fst -nt data/lang/L.fst ]; then
echo "$0: data/lang_chain already exists, not overwriting it; continuing"
else
echo "$0: data/lang_chain already exists and seems to be older than data/lang..."
echo " ... not sure what to do. Exiting."
exit 1;
fi
else
cp -r data/lang data/lang_chain
silphonelist=$(cat data/lang_chain/phones/silence.csl) || exit 1;
nonsilphonelist=$(cat data/lang_chain/phones/nonsilence.csl) || exit 1;
# Use our special topology... note that later on may have to tune this
# topology.
steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >data/lang_chain/topo
fi
fi

if [ $stage -le 13 ]; then
# Get the alignments as lattices (gives the chain training more freedom).
# use the same num-jobs as the alignments
steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \
data/lang $gmm_dir $original_lat_dir
rm $original_lat_dir/fsts.*.gz # save space

lat_dir_ihmdata=exp/ihm/chain${nnet3_affix}/${gmm}_${train_set}_sp_comb_lats

mkdir -p $lat_dir/temp/
mkdir -p $lat_dir/temp2/
lattice-copy "ark:gunzip -c $original_lat_dir/lat.*.gz |" ark,scp:$lat_dir/temp/lats.ark,$lat_dir/temp/lats.scp
lattice-copy "ark:gunzip -c $lat_dir_ihmdata/lat.*.gz |" ark,scp:$lat_dir/temp2/lats.ark,$lat_dir/temp2/lats.scp

# copy the lattices for the reverberated data
rm -f $lat_dir/temp/combined_lats.scp
touch $lat_dir/temp/combined_lats.scp
cat $lat_dir/temp/lats.scp >> $lat_dir/temp/combined_lats.scp
for i in `seq 1 $num_data_reps`; do
cat $lat_dir/temp2/lats.scp | sed -e "s/^/rev${i}_/" >> $lat_dir/temp/combined_lats.scp
done
sort -u $lat_dir/temp/combined_lats.scp > $lat_dir/temp/combined_lats_sorted.scp

lattice-copy scp:$lat_dir/temp/combined_lats_sorted.scp "ark:|gzip -c >$lat_dir/lat.1.gz" || exit 1;
echo "1" > $lat_dir/num_jobs

# copy other files from original lattice dir
for f in cmvn_opts final.mdl splice_opts tree; do
cp $original_lat_dir/$f $lat_dir/$f
done
fi


if [ $stage -le 14 ]; then
# Build a tree using our new topology. We know we have alignments for the
# speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
# those.
if [ -f $tree_dir/final.mdl ]; then
echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
exit 1;
fi
steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
--context-opts "--context-width=2 --central-position=1" \
--leftmost-questions-truncate -1 \
--cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir
fi

if [ $stage -le 15 ]; then
mkdir -p $dir

echo "$0: creating neural net configs";

steps/nnet3/tdnn/make_configs.py \
--self-repair-scale-nonlinearity 0.00001 \
--feat-dir data/$mic/${train_set}_sp_hires_comb \
--ivector-dir $train_ivector_dir \
--tree-dir $tree_dir \
--relu-dim 450 \
--splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \
--use-presoftmax-prior-scale false \
--xent-regularize 0.1 \
--xent-separate-forward-affine true \
--include-log-softmax false \
--final-layer-normalize-target 1.0 \
$dir/configs || exit 1;
fi

if [ $stage -le 16 ]; then
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
utils/create_split_dir.pl \
/export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-rvb$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage
fi

touch $dir/egs/.nodelete # keep egs around when that run dies.

steps/nnet3/chain/train.py --stage $train_stage \
--cmd "$decode_cmd" \
--feat.online-ivector-dir $train_ivector_dir \
--feat.cmvn-opts "--norm-means=false --norm-vars=false" \
--chain.xent-regularize 0.1 \
--chain.leaky-hmm-coefficient 0.1 \
--chain.l2-regularize 0.00005 \
--chain.apply-deriv-weights false \
--chain.lm-opts="--num-extra-lm-states=2000" \
--egs.dir "$common_egs_dir" \
--egs.opts "--frames-overlap-per-eg 0" \
--egs.chunk-width 150 \
--trainer.num-chunk-per-minibatch 128 \
--trainer.frames-per-iter 1500000 \
--trainer.num-epochs 4 \
--trainer.optimization.num-jobs-initial 2 \
--trainer.optimization.num-jobs-final 12 \
--trainer.optimization.initial-effective-lrate 0.001 \
--trainer.optimization.final-effective-lrate 0.0001 \
--trainer.max-param-change 2.0 \
--cleanup.remove-egs true \
--feat-dir $train_data_dir \
--tree-dir $tree_dir \
--lat-dir $lat_dir \
--dir $dir
fi


graph_dir=$dir/graph_${LM}
if [ $stage -le 17 ]; then
# Note: it might appear that this data/lang_chain directory is mismatched, and it is as
# far as the 'topo' is concerned, but this script doesn't read the 'topo' from
# the lang directory.
utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir
fi

if [ $stage -le 18 ]; then
rm $dir/.error 2>/dev/null || true
for decode_set in dev eval; do
(
steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
--nj $nj --cmd "$decode_cmd" \
--online-ivector-dir exp/$mic/nnet3${nnet3_affix}${rvb_affix}/ivectors_${decode_set}_hires \
--scoring-opts "--min-lmwt 5 " \
$graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1;
) || touch $dir/.error &
done
wait
if [ -f $dir/.error ]; then
echo "$0: something went wrong in decoding"
exit 1
fi
fi
exit 0
Loading

0 comments on commit a2f5464

Please sign in to comment.