Skip to content

Commit

Permalink
remove default cfg and fix some bugs,test=asr
Browse files Browse the repository at this point in the history
  • Loading branch information
Jackwaterveg committed Jan 4, 2022
1 parent a1d8ab0 commit 3e2cc89
Show file tree
Hide file tree
Showing 41 changed files with 97 additions and 628 deletions.
47 changes: 0 additions & 47 deletions examples/aishell/asr0/local/test_hub_ori

This file was deleted.

3 changes: 1 addition & 2 deletions examples/other/1xt2x/src_deepspeech2x/bin/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from src_deepspeech2x.test_model import DeepSpeech2Tester as Tester
from yacs.config import CfgNode

from paddlespeech.s2t.exps.deepspeech2.config import get_cfg_defaults
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.utils.utility import print_arguments

Expand All @@ -42,7 +41,7 @@ def main(config, args):
print("model_type:{}".format(args.model_type))

# https://yaml.org/type/float.html
config = get_cfg_defaults(args.model_type)
config = CfgNode(new_allowed=True)
if args.config:
config.merge_from_file(args.config)
if args.decode_cfg:
Expand Down
14 changes: 0 additions & 14 deletions examples/other/1xt2x/src_deepspeech2x/models/ds2/deepspeech2.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,20 +120,6 @@ class DeepSpeech2Model(nn.Layer):
:rtype: tuple of LayerOutput
"""

@classmethod
def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
default = CfgNode(
dict(
num_conv_layers=2, #Number of stacking convolution layers.
num_rnn_layers=3, #Number of stacking RNN layers.
rnn_layer_size=1024, #RNN layer size (number of RNN cells).
use_gru=True, #Use gru if set True. Use simple rnn if set False.
share_rnn_weights=True #Whether to share input-hidden weights between forward and backward directional RNNs.Notice that for GRU, weight sharing is not supported.
))
if config is not None:
config.merge_from_other_cfg(default)
return default

def __init__(self,
feat_size,
dict_size,
Expand Down
37 changes: 0 additions & 37 deletions examples/other/1xt2x/src_deepspeech2x/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,22 +44,6 @@


class DeepSpeech2Trainer(Trainer):
@classmethod
def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
# training config
default = CfgNode(
dict(
lr=5e-4, # learning rate
lr_decay=1.0, # learning rate decay
weight_decay=1e-6, # the coeff of weight decay
global_grad_clip=5.0, # the global norm clip
n_epoch=50, # train epochs
))

if config is not None:
config.merge_from_other_cfg(default)
return default

def __init__(self, config, args):
super().__init__(config, args)

Expand Down Expand Up @@ -246,27 +230,6 @@ def setup_dataloader(self):


class DeepSpeech2Tester(DeepSpeech2Trainer):
@classmethod
def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
# testing config
default = CfgNode(
dict(
alpha=2.5, # Coef of LM for beam search.
beta=0.3, # Coef of WC for beam search.
cutoff_prob=1.0, # Cutoff probability for pruning.
cutoff_top_n=40, # Cutoff number for pruning.
lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm', # Filepath for language model.
decoding_method='ctc_beam_search', # Decoding method. Options: ctc_beam_search, ctc_greedy
error_rate_type='wer', # Error rate type for evaluation. Options `wer`, 'cer'
num_proc_bsearch=8, # # of CPUs for beam search.
beam_size=500, # Beam search width.
batch_size=128, # decoding batch size
))

if config is not None:
config.merge_from_other_cfg(default)
return default

def __init__(self, config, args):

self._text_featurizer = TextFeaturizer(
Expand Down
25 changes: 25 additions & 0 deletions examples/ted_en_zh/st0/conf/preprocess.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
process:
# extract kaldi fbank from PCM
- type: fbank_kaldi
fs: 16000
n_mels: 80
n_shift: 160
win_length: 400
dither: 0.1
- type: cmvn_json
cmvn_path: data/mean_std.json
# these three processes are a.k.a. SpecAugument
- type: time_warp
max_time_warp: 5
inplace: true
mode: PIL
- type: freq_mask
F: 30
n_mask: 2
inplace: true
replace_with_zero: false
- type: time_mask
T: 40
n_mask: 2
inplace: true
replace_with_zero: false
4 changes: 2 additions & 2 deletions examples/ted_en_zh/st0/conf/transformer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ vocab_filepath: data/lang_char/vocab.txt
unit_type: 'spm'
spm_model_prefix: data/lang_char/bpe_unigram_8000
mean_std_filepath: ""
augmentation_config: conf/preprocess.yaml
preprocess_config: conf/preprocess.yaml
batch_size: 16
maxlen_in: 5 # if input length > maxlen-in, batchsize is automatically reduced
maxlen_out: 150 # if output length > maxlen-out, batchsize is automatically reduced
Expand Down Expand Up @@ -87,7 +87,7 @@ global_grad_clip: 5.0
optim: adam
optim_conf:
lr: 2.5
weight_decay: 1e-06
weight_decay: 1.0e-06
scheduler: noam
scheduler_conf:
warmup_steps: 25000
Expand Down
2 changes: 1 addition & 1 deletion examples/ted_en_zh/st0/conf/transformer_mtl_noam.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ vocab_filepath: data/lang_char/vocab.txt
unit_type: 'spm'
spm_model_prefix: data/lang_char/bpe_unigram_8000
mean_std_filepath: ""
augmentation_config: conf/preprocess.yaml
preprocess_config: conf/preprocess.yaml
batch_size: 16
maxlen_in: 5 # if input length > maxlen-in, batchsize is automatically reduced
maxlen_out: 150 # if output length > maxlen-out, batchsize is automatically reduced
Expand Down
16 changes: 16 additions & 0 deletions examples/ted_en_zh/st1/conf/preprocess.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
process:
# these three processes are a.k.a. SpecAugument
- type: time_warp
max_time_warp: 5
inplace: true
mode: PIL
- type: freq_mask
F: 30
n_mask: 2
inplace: true
replace_with_zero: false
- type: time_mask
T: 40
n_mask: 2
inplace: true
replace_with_zero: false
4 changes: 2 additions & 2 deletions examples/ted_en_zh/st1/conf/transformer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ vocab_filepath: data/lang_char/ted_en_zh_bpe8000.txt
unit_type: 'spm'
spm_model_prefix: data/lang_char/ted_en_zh_bpe8000
mean_std_filepath: ""
# augmentation_config: conf/augmentation.json
# preprocess_config: conf/augmentation.json
batch_size: 20
feat_dim: 83
stride_ms: 10.0
Expand All @@ -27,7 +27,7 @@ batch_bins: 0
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
augmentation_config:
preprocess_config:
num_workers: 0
subsampling_factor: 1
num_encs: 1
Expand Down
4 changes: 2 additions & 2 deletions examples/ted_en_zh/st1/conf/transformer_mtl_noam.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ vocab_filepath: data/lang_char/ted_en_zh_bpe8000.txt
unit_type: 'spm'
spm_model_prefix: data/lang_char/ted_en_zh_bpe8000
mean_std_filepath: ""
# augmentation_config: conf/augmentation.json
# preprocess_config: conf/augmentation.json
batch_size: 20
feat_dim: 83
stride_ms: 10.0
Expand All @@ -27,7 +27,7 @@ batch_bins: 0
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
augmentation_config:
preprocess_config:
num_workers: 0
subsampling_factor: 1
num_encs: 1
Expand Down
5 changes: 0 additions & 5 deletions examples/ted_en_zh/st1/local/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,7 @@ for type in fullsentence; do
--decode_cfg ${decode_config_path} \
--result_file ${ckpt_prefix}.${type}.rsl \
--checkpoint_path ${ckpt_prefix} \
<<<<<<< HEAD
--opts decode.decoding_method ${type} \
--opts decode.decode_batch_size ${batch_size}
=======
--opts decoding.decoding_method ${type} \
>>>>>>> 6272496d9c26736750b577fd832ea9dd4ddc4e6e

if [ $? -ne 0 ]; then
echo "Failed in evaluation!"
Expand Down
3 changes: 1 addition & 2 deletions examples/tiny/asr1/conf/chunk_confermer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ mean_std_filepath: ""
vocab_filepath: data/lang_char/vocab.txt
unit_type: 'spm'
spm_model_prefix: 'data/lang_char/bpe_unigram_200'
preprocess_config: conf/preprocess.yaml
feat_dim: 80
stride_ms: 10.0
window_ms: 25.0
Expand All @@ -72,7 +71,7 @@ batch_bins: 0
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
augmentation_config: conf/preprocess.yaml
preprocess_config: conf/preprocess.yaml
num_workers: 0
subsampling_factor: 1
num_encs: 1
Expand Down
2 changes: 1 addition & 1 deletion paddlespeech/s2t/decoders/recog.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def recog_v2(args):
mode="asr",
load_output=False,
sort_in_input_length=False,
preprocess_conf=confs.collator.augmentation_config
preprocess_conf=confs.preprocess_config
if args.preprocess_conf is None else args.preprocess_conf,
preprocess_args={"train": False}, )

Expand Down
4 changes: 2 additions & 2 deletions paddlespeech/s2t/exps/deepspeech2/bin/deploy/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from paddle.inference import create_predictor
from paddle.io import DataLoader

from paddlespeech.s2t.exps.deepspeech2.config import get_cfg_defaults
from yacs.config import CfgNode
from paddlespeech.s2t.io.collator import SpeechCollator
from paddlespeech.s2t.io.dataset import ManifestDataset
from paddlespeech.s2t.models.ds2 import DeepSpeech2Model
Expand Down Expand Up @@ -176,7 +176,7 @@ def main(config, args):
print_arguments(args, globals())

# https://yaml.org/type/float.html
config = get_cfg_defaults()
config = CfgNode(new_allowed=True)
if args.config:
config.merge_from_file(args.config)
if args.decode_cfg:
Expand Down
4 changes: 2 additions & 2 deletions paddlespeech/s2t/exps/deepspeech2/bin/deploy/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import paddle
from paddle.io import DataLoader

from paddlespeech.s2t.exps.deepspeech2.config import get_cfg_defaults
from yacs.config import CfgNode
from paddlespeech.s2t.io.collator import SpeechCollator
from paddlespeech.s2t.io.dataset import ManifestDataset
from paddlespeech.s2t.models.ds2 import DeepSpeech2Model
Expand Down Expand Up @@ -111,7 +111,7 @@ def main(config, args):
print_arguments(args, globals())

# https://yaml.org/type/float.html
config = get_cfg_defaults()
config = CfgNode(new_allowed=True)
if args.config:
config.merge_from_file(args.config)
if args.decode_cfg:
Expand Down
4 changes: 2 additions & 2 deletions paddlespeech/s2t/exps/deepspeech2/bin/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Export for DeepSpeech2 model."""
from paddlespeech.s2t.exps.deepspeech2.config import get_cfg_defaults
from yacs.config import CfgNode
from paddlespeech.s2t.exps.deepspeech2.model import DeepSpeech2Tester as Tester
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.utils.utility import print_arguments
Expand Down Expand Up @@ -41,7 +41,7 @@ def main(config, args):
print_arguments(args)

# https://yaml.org/type/float.html
config = get_cfg_defaults(args.model_type)
config = CfgNode(new_allowed=True)
if args.config:
config.merge_from_file(args.config)
if args.opts:
Expand Down
3 changes: 1 addition & 2 deletions paddlespeech/s2t/exps/deepspeech2/bin/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
"""Evaluation for DeepSpeech2 model."""
from yacs.config import CfgNode

from paddlespeech.s2t.exps.deepspeech2.config import get_cfg_defaults
from paddlespeech.s2t.exps.deepspeech2.model import DeepSpeech2Tester as Tester
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.utils.utility import print_arguments
Expand Down Expand Up @@ -43,7 +42,7 @@ def main(config, args):
print("model_type:{}".format(args.model_type))

# https://yaml.org/type/float.html
config = get_cfg_defaults(args.model_type)
config = CfgNode(new_allowed=True)
if args.config:
config.merge_from_file(args.config)
if args.decode_cfg:
Expand Down
4 changes: 1 addition & 3 deletions paddlespeech/s2t/exps/deepspeech2/bin/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
# limitations under the License.
"""Evaluation for DeepSpeech2 model."""
from yacs.config import CfgNode

from paddlespeech.s2t.exps.deepspeech2.config import get_cfg_defaults
from paddlespeech.s2t.exps.deepspeech2.model import DeepSpeech2ExportTester as ExportTester
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.utils.utility import print_arguments
Expand Down Expand Up @@ -48,7 +46,7 @@ def main(config, args):
print("model_type:{}".format(args.model_type))

# https://yaml.org/type/float.html
config = get_cfg_defaults(args.model_type)
config = CfgNode(new_allowed=True)
if args.config:
config.merge_from_file(args.config)
if args.decode_cfg:
Expand Down
2 changes: 1 addition & 1 deletion paddlespeech/s2t/exps/deepspeech2/bin/test_wav.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def main(config, args):
print("model_type:{}".format(args.model_type))

# https://yaml.org/type/float.html
config = get_cfg_defaults(args.model_type)
config = CfgNode(new_allowed=True)
if args.config:
config.merge_from_file(args.config)
if args.decode_cfg:
Expand Down
4 changes: 2 additions & 2 deletions paddlespeech/s2t/exps/deepspeech2/bin/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"""Trainer for DeepSpeech2 model."""
from paddle import distributed as dist

from paddlespeech.s2t.exps.deepspeech2.config import get_cfg_defaults
from yacs.config import CfgNode
from paddlespeech.s2t.exps.deepspeech2.model import DeepSpeech2Trainer as Trainer
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.utils.utility import print_arguments
Expand Down Expand Up @@ -42,7 +42,7 @@ def main(config, args):
print_arguments(args, globals())

# https://yaml.org/type/float.html
config = get_cfg_defaults(args.model_type)
config = CfgNode(new_allowed=True)
if args.config:
config.merge_from_file(args.config)
if args.opts:
Expand Down
Loading

0 comments on commit 3e2cc89

Please sign in to comment.