remove default cfg and fix some bugs,test=asr

PaddlePaddle · Jan 4, 2022 · 3e2cc89 · 3e2cc89
1 parent a1d8ab0
commit 3e2cc89
Show file tree

Hide file tree

Showing 41 changed files with 97 additions and 628 deletions.
diff --git a/examples/aishell/asr0/local/test_hub_ori b/examples/aishell/asr0/local/test_hub_ori
diff --git a/examples/other/1xt2x/src_deepspeech2x/bin/test.py b/examples/other/1xt2x/src_deepspeech2x/bin/test.py
@@ -15,7 +15,6 @@
 from src_deepspeech2x.test_model import DeepSpeech2Tester as Tester
 from yacs.config import CfgNode
 
-from paddlespeech.s2t.exps.deepspeech2.config import get_cfg_defaults
 from paddlespeech.s2t.training.cli import default_argument_parser
 from paddlespeech.s2t.utils.utility import print_arguments
 
@@ -42,7 +41,7 @@ def main(config, args):
  print("model_type:{}".format(args.model_type))
 
  # https://yaml.org/type/float.html
- config = get_cfg_defaults(args.model_type)
+ config = CfgNode(new_allowed=True)
  if args.config:
  config.merge_from_file(args.config)
  if args.decode_cfg:

diff --git a/examples/other/1xt2x/src_deepspeech2x/models/ds2/deepspeech2.py b/examples/other/1xt2x/src_deepspeech2x/models/ds2/deepspeech2.py
@@ -120,20 +120,6 @@ class DeepSpeech2Model(nn.Layer):
  :rtype: tuple of LayerOutput
  """
 
- @classmethod
- def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
- default = CfgNode(
- dict(
- num_conv_layers=2, #Number of stacking convolution layers.
- num_rnn_layers=3, #Number of stacking RNN layers.
- rnn_layer_size=1024, #RNN layer size (number of RNN cells).
- use_gru=True, #Use gru if set True. Use simple rnn if set False.
- share_rnn_weights=True #Whether to share input-hidden weights between forward and backward directional RNNs.Notice that for GRU, weight sharing is not supported.
- ))
- if config is not None:
- config.merge_from_other_cfg(default)
- return default
-
  def __init__(self,
  feat_size,
  dict_size,

diff --git a/examples/other/1xt2x/src_deepspeech2x/test_model.py b/examples/other/1xt2x/src_deepspeech2x/test_model.py
@@ -44,22 +44,6 @@
 
 
 class DeepSpeech2Trainer(Trainer):
- @classmethod
- def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
- # training config
- default = CfgNode(
- dict(
- lr=5e-4, # learning rate
- lr_decay=1.0, # learning rate decay
- weight_decay=1e-6, # the coeff of weight decay
- global_grad_clip=5.0, # the global norm clip
- n_epoch=50, # train epochs
- ))
-
- if config is not None:
- config.merge_from_other_cfg(default)
- return default
-
  def __init__(self, config, args):
  super().__init__(config, args)
 
@@ -246,27 +230,6 @@ def setup_dataloader(self):
 
 
 class DeepSpeech2Tester(DeepSpeech2Trainer):
- @classmethod
- def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
- # testing config
- default = CfgNode(
- dict(
- alpha=2.5, # Coef of LM for beam search.
- beta=0.3, # Coef of WC for beam search.
- cutoff_prob=1.0, # Cutoff probability for pruning.
- cutoff_top_n=40, # Cutoff number for pruning.
- lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm', # Filepath for language model.
- decoding_method='ctc_beam_search', # Decoding method. Options: ctc_beam_search, ctc_greedy
- error_rate_type='wer', # Error rate type for evaluation. Options `wer`, 'cer'
- num_proc_bsearch=8, # # of CPUs for beam search.
- beam_size=500, # Beam search width.
- batch_size=128, # decoding batch size
- ))
-
- if config is not None:
- config.merge_from_other_cfg(default)
- return default
-
  def __init__(self, config, args):
 
  self._text_featurizer = TextFeaturizer(

diff --git a/examples/ted_en_zh/st0/conf/preprocess.yaml b/examples/ted_en_zh/st0/conf/preprocess.yaml
@@ -0,0 +1,25 @@
+process:
+ # extract kaldi fbank from PCM
+ - type: fbank_kaldi
+ fs: 16000
+ n_mels: 80
+ n_shift: 160
+ win_length: 400
+ dither: 0.1
+ - type: cmvn_json
+ cmvn_path: data/mean_std.json
+ # these three processes are a.k.a. SpecAugument
+ - type: time_warp
+ max_time_warp: 5
+ inplace: true
+ mode: PIL
+ - type: freq_mask
+ F: 30
+ n_mask: 2
+ inplace: true
+ replace_with_zero: false
+ - type: time_mask
+ T: 40
+ n_mask: 2
+ inplace: true
+ replace_with_zero: false
diff --git a/examples/ted_en_zh/st0/conf/transformer.yaml b/examples/ted_en_zh/st0/conf/transformer.yaml
@@ -19,7 +19,7 @@ vocab_filepath: data/lang_char/vocab.txt
 unit_type: 'spm'
 spm_model_prefix: data/lang_char/bpe_unigram_8000
 mean_std_filepath: ""
-augmentation_config: conf/preprocess.yaml
+preprocess_config: conf/preprocess.yaml
 batch_size: 16
 maxlen_in: 5 # if input length > maxlen-in, batchsize is automatically reduced
 maxlen_out: 150 # if output length > maxlen-out, batchsize is automatically reduced
@@ -87,7 +87,7 @@ global_grad_clip: 5.0
 optim: adam
 optim_conf:
  lr: 2.5
- weight_decay: 1e-06
+ weight_decay: 1.0e-06
 scheduler: noam 
 scheduler_conf:
  warmup_steps: 25000

diff --git a/examples/ted_en_zh/st0/conf/transformer_mtl_noam.yaml b/examples/ted_en_zh/st0/conf/transformer_mtl_noam.yaml
@@ -19,7 +19,7 @@ vocab_filepath: data/lang_char/vocab.txt
 unit_type: 'spm'
 spm_model_prefix: data/lang_char/bpe_unigram_8000
 mean_std_filepath: ""
-augmentation_config: conf/preprocess.yaml
+preprocess_config: conf/preprocess.yaml
 batch_size: 16
 maxlen_in: 5 # if input length > maxlen-in, batchsize is automatically reduced
 maxlen_out: 150 # if output length > maxlen-out, batchsize is automatically reduced

diff --git a/examples/ted_en_zh/st1/conf/preprocess.yaml b/examples/ted_en_zh/st1/conf/preprocess.yaml
@@ -0,0 +1,16 @@
+process:
+ # these three processes are a.k.a. SpecAugument
+ - type: time_warp
+ max_time_warp: 5
+ inplace: true
+ mode: PIL
+ - type: freq_mask
+ F: 30
+ n_mask: 2
+ inplace: true
+ replace_with_zero: false
+ - type: time_mask
+ T: 40
+ n_mask: 2
+ inplace: true
+ replace_with_zero: false
diff --git a/examples/ted_en_zh/st1/conf/transformer.yaml b/examples/ted_en_zh/st1/conf/transformer.yaml
@@ -13,7 +13,7 @@ vocab_filepath: data/lang_char/ted_en_zh_bpe8000.txt
 unit_type: 'spm'
 spm_model_prefix: data/lang_char/ted_en_zh_bpe8000
 mean_std_filepath: ""
-# augmentation_config: conf/augmentation.json
+# preprocess_config: conf/augmentation.json
 batch_size: 20
 feat_dim: 83
 stride_ms: 10.0
@@ -27,7 +27,7 @@ batch_bins: 0
 batch_frames_in: 0
 batch_frames_out: 0
 batch_frames_inout: 0
-augmentation_config:
+preprocess_config:
 num_workers: 0
 subsampling_factor: 1
 num_encs: 1

diff --git a/examples/ted_en_zh/st1/conf/transformer_mtl_noam.yaml b/examples/ted_en_zh/st1/conf/transformer_mtl_noam.yaml
@@ -13,7 +13,7 @@ vocab_filepath: data/lang_char/ted_en_zh_bpe8000.txt
 unit_type: 'spm'
 spm_model_prefix: data/lang_char/ted_en_zh_bpe8000
 mean_std_filepath: ""
-# augmentation_config: conf/augmentation.json
+# preprocess_config: conf/augmentation.json
 batch_size: 20
 feat_dim: 83
 stride_ms: 10.0
@@ -27,7 +27,7 @@ batch_bins: 0
 batch_frames_in: 0
 batch_frames_out: 0
 batch_frames_inout: 0
-augmentation_config:
+preprocess_config:
 num_workers: 0
 subsampling_factor: 1
 num_encs: 1

diff --git a/examples/ted_en_zh/st1/local/test.sh b/examples/ted_en_zh/st1/local/test.sh
@@ -20,12 +20,7 @@ for type in fullsentence; do
  --decode_cfg ${decode_config_path} \
  --result_file ${ckpt_prefix}.${type}.rsl \
  --checkpoint_path ${ckpt_prefix} \
-<<<<<<< HEAD
- --opts decode.decoding_method ${type} \
- --opts decode.decode_batch_size ${batch_size}
-=======
  --opts decoding.decoding_method ${type} \
->>>>>>> 6272496d9c26736750b577fd832ea9dd4ddc4e6e
 
  if [ $? -ne 0 ]; then
  echo "Failed in evaluation!"

diff --git a/examples/tiny/asr1/conf/chunk_confermer.yaml b/examples/tiny/asr1/conf/chunk_confermer.yaml
@@ -58,7 +58,6 @@ mean_std_filepath: ""
 vocab_filepath: data/lang_char/vocab.txt 
 unit_type: 'spm'
 spm_model_prefix: 'data/lang_char/bpe_unigram_200'
-preprocess_config: conf/preprocess.yaml
 feat_dim: 80
 stride_ms: 10.0
 window_ms: 25.0
@@ -72,7 +71,7 @@ batch_bins: 0
 batch_frames_in: 0
 batch_frames_out: 0
 batch_frames_inout: 0
-augmentation_config: conf/preprocess.yaml 
+preprocess_config: conf/preprocess.yaml 
 num_workers: 0
 subsampling_factor: 1
 num_encs: 1

diff --git a/paddlespeech/s2t/decoders/recog.py b/paddlespeech/s2t/decoders/recog.py
@@ -85,7 +85,7 @@ def recog_v2(args):
  mode="asr",
  load_output=False,
  sort_in_input_length=False,
- preprocess_conf=confs.collator.augmentation_config
+ preprocess_conf=confs.preprocess_config
  if args.preprocess_conf is None else args.preprocess_conf,
  preprocess_args={"train": False}, )
 

diff --git a/paddlespeech/s2t/exps/deepspeech2/bin/deploy/runtime.py b/paddlespeech/s2t/exps/deepspeech2/bin/deploy/runtime.py
@@ -20,7 +20,7 @@
 from paddle.inference import create_predictor
 from paddle.io import DataLoader
 
-from paddlespeech.s2t.exps.deepspeech2.config import get_cfg_defaults
+from yacs.config import CfgNode
 from paddlespeech.s2t.io.collator import SpeechCollator
 from paddlespeech.s2t.io.dataset import ManifestDataset
 from paddlespeech.s2t.models.ds2 import DeepSpeech2Model
@@ -176,7 +176,7 @@ def main(config, args):
  print_arguments(args, globals())
 
  # https://yaml.org/type/float.html
- config = get_cfg_defaults()
+ config = CfgNode(new_allowed=True)
  if args.config:
  config.merge_from_file(args.config)
  if args.decode_cfg:

diff --git a/paddlespeech/s2t/exps/deepspeech2/bin/deploy/server.py b/paddlespeech/s2t/exps/deepspeech2/bin/deploy/server.py
@@ -18,7 +18,7 @@
 import paddle
 from paddle.io import DataLoader
 
-from paddlespeech.s2t.exps.deepspeech2.config import get_cfg_defaults
+from yacs.config import CfgNode
 from paddlespeech.s2t.io.collator import SpeechCollator
 from paddlespeech.s2t.io.dataset import ManifestDataset
 from paddlespeech.s2t.models.ds2 import DeepSpeech2Model
@@ -111,7 +111,7 @@ def main(config, args):
  print_arguments(args, globals())
 
  # https://yaml.org/type/float.html
- config = get_cfg_defaults()
+ config = CfgNode(new_allowed=True)
  if args.config:
  config.merge_from_file(args.config)
  if args.decode_cfg:

diff --git a/paddlespeech/s2t/exps/deepspeech2/bin/export.py b/paddlespeech/s2t/exps/deepspeech2/bin/export.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Export for DeepSpeech2 model."""
-from paddlespeech.s2t.exps.deepspeech2.config import get_cfg_defaults
+from yacs.config import CfgNode
 from paddlespeech.s2t.exps.deepspeech2.model import DeepSpeech2Tester as Tester
 from paddlespeech.s2t.training.cli import default_argument_parser
 from paddlespeech.s2t.utils.utility import print_arguments
@@ -41,7 +41,7 @@ def main(config, args):
  print_arguments(args)
 
  # https://yaml.org/type/float.html
- config = get_cfg_defaults(args.model_type)
+ config = CfgNode(new_allowed=True)
  if args.config:
  config.merge_from_file(args.config)
  if args.opts:

diff --git a/paddlespeech/s2t/exps/deepspeech2/bin/test.py b/paddlespeech/s2t/exps/deepspeech2/bin/test.py
@@ -14,7 +14,6 @@
 """Evaluation for DeepSpeech2 model."""
 from yacs.config import CfgNode
 
-from paddlespeech.s2t.exps.deepspeech2.config import get_cfg_defaults
 from paddlespeech.s2t.exps.deepspeech2.model import DeepSpeech2Tester as Tester
 from paddlespeech.s2t.training.cli import default_argument_parser
 from paddlespeech.s2t.utils.utility import print_arguments
@@ -43,7 +42,7 @@ def main(config, args):
  print("model_type:{}".format(args.model_type))
 
  # https://yaml.org/type/float.html
- config = get_cfg_defaults(args.model_type)
+ config = CfgNode(new_allowed=True)
  if args.config:
  config.merge_from_file(args.config)
  if args.decode_cfg:

diff --git a/paddlespeech/s2t/exps/deepspeech2/bin/test_export.py b/paddlespeech/s2t/exps/deepspeech2/bin/test_export.py
@@ -13,8 +13,6 @@
 # limitations under the License.
 """Evaluation for DeepSpeech2 model."""
 from yacs.config import CfgNode
-
-from paddlespeech.s2t.exps.deepspeech2.config import get_cfg_defaults
 from paddlespeech.s2t.exps.deepspeech2.model import DeepSpeech2ExportTester as ExportTester
 from paddlespeech.s2t.training.cli import default_argument_parser
 from paddlespeech.s2t.utils.utility import print_arguments
@@ -48,7 +46,7 @@ def main(config, args):
  print("model_type:{}".format(args.model_type))
 
  # https://yaml.org/type/float.html
- config = get_cfg_defaults(args.model_type)
+ config = CfgNode(new_allowed=True)
  if args.config:
  config.merge_from_file(args.config)
  if args.decode_cfg:

diff --git a/paddlespeech/s2t/exps/deepspeech2/bin/test_wav.py b/paddlespeech/s2t/exps/deepspeech2/bin/test_wav.py
@@ -188,7 +188,7 @@ def main(config, args):
  print("model_type:{}".format(args.model_type))
 
  # https://yaml.org/type/float.html
- config = get_cfg_defaults(args.model_type)
+ config = CfgNode(new_allowed=True)
  if args.config:
  config.merge_from_file(args.config)
  if args.decode_cfg:

diff --git a/paddlespeech/s2t/exps/deepspeech2/bin/train.py b/paddlespeech/s2t/exps/deepspeech2/bin/train.py
@@ -14,7 +14,7 @@
 """Trainer for DeepSpeech2 model."""
 from paddle import distributed as dist
 
-from paddlespeech.s2t.exps.deepspeech2.config import get_cfg_defaults
+from yacs.config import CfgNode
 from paddlespeech.s2t.exps.deepspeech2.model import DeepSpeech2Trainer as Trainer
 from paddlespeech.s2t.training.cli import default_argument_parser
 from paddlespeech.s2t.utils.utility import print_arguments
@@ -42,7 +42,7 @@ def main(config, args):
  print_arguments(args, globals())
 
  # https://yaml.org/type/float.html
- config = get_cfg_defaults(args.model_type)
+ config = CfgNode(new_allowed=True)
  if args.config:
  config.merge_from_file(args.config)
  if args.opts: