From 8c7859d3bc10f9f01eac4c1ee9099fb0b5e2c50f Mon Sep 17 00:00:00 2001 From: Shuangchi He <34329208+Yulv-git@users.noreply.github.com> Date: Fri, 21 Apr 2023 13:06:20 +0800 Subject: [PATCH] Fix some typos. (#3178) Signed-off-by: Yulv-git --- .github/CONTRIBUTING.md | 2 +- audio/paddleaudio/backends/soundfile_backend.py | 2 +- demos/TTSAndroid/README.md | 2 +- demos/TTSArmLinux/front.conf | 4 ++-- demos/TTSCppFrontend/front_demo/front.conf | 4 ++-- demos/TTSCppFrontend/front_demo/front_demo.cpp | 2 +- .../front_demo/gentools/word2phones.py | 6 +++--- .../TTSCppFrontend/src/front/front_interface.cpp | 8 ++++---- demos/TTSCppFrontend/src/front/front_interface.h | 2 +- demos/speech_web/README.md | 2 +- demos/speech_web/speech_server/main.py | 2 +- docs/tutorial/st/st_tutorial.ipynb | 2 +- docs/tutorial/tts/tts_tutorial.ipynb | 2 +- examples/librispeech/asr2/README.md | 2 +- examples/other/mfa/local/generate_lexicon.py | 2 +- examples/tiny/asr1/README.md | 2 +- paddlespeech/s2t/__init__.py | 2 +- .../s2t/frontend/augmentor/augmentation.py | 2 +- paddlespeech/s2t/io/speechbrain/sampler.py | 2 +- paddlespeech/s2t/models/u2/u2.py | 4 ++-- paddlespeech/s2t/models/u2_st/u2_st.py | 2 +- .../engine/asr/online/python/asr_engine.py | 2 +- paddlespeech/server/ws/asr_api.py | 2 +- paddlespeech/t2s/frontend/generate_lexicon.py | 2 +- paddlespeech/t2s/models/waveflow.py | 8 ++++---- .../t2s/modules/transformer/lightconv.py | 2 +- paddlespeech/vector/exps/ecapa_tdnn/train.py | 4 ++-- paddlespeech/vector/exps/ge2e/preprocess.py | 2 +- .../ds2_ol/onnx/local/onnx_infer_shape.py | 2 +- speechx/speechx/frontend/audio/db_norm.cc | 2 +- speechx/speechx/kaldi/base/kaldi-types.h | 2 +- speechx/speechx/kaldi/feat/pitch-functions.cc | 2 +- speechx/speechx/kaldi/lat/lattice-functions.h | 16 ++++++++-------- speechx/speechx/kaldi/matrix/kaldi-matrix.cc | 2 +- speechx/speechx/kaldi/matrix/sparse-matrix.cc | 2 +- speechx/speechx/kaldi/util/kaldi-table-inl.h | 2 +- speechx/speechx/nnet/ds2_nnet.cc | 2 +- .../protocol/websocket/websocket_server.cc | 8 ++++---- tools/extras/install_mkl.sh | 2 +- utils/fst/ctc_token_fst.py | 2 +- utils/tokenizer.perl | 2 +- 41 files changed, 63 insertions(+), 63 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index a18c454c77f..1ff473308f7 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -27,4 +27,4 @@ git commit -m "xxxxxx, test=doc" 1. 虽然跳过了 CI,但是还要先排队排到才能跳过,所以非自己方向看到 pending 不要着急 🤣 2. 在 `git commit --amend` 的时候才加 `test=xxx` 可能不太有效 3. 一个 pr 多次提交 commit 注意每次都要加 `test=xxx`,因为每个 commit 都会触发 CI -4. 删除 python 环境中已经安装好的的 paddlespeech,否则可能会影响 import paddlespeech 的顺序 +4. 删除 python 环境中已经安装好的 paddlespeech,否则可能会影响 import paddlespeech 的顺序 diff --git a/audio/paddleaudio/backends/soundfile_backend.py b/audio/paddleaudio/backends/soundfile_backend.py index ae7b5b52d49..9195ea0974a 100644 --- a/audio/paddleaudio/backends/soundfile_backend.py +++ b/audio/paddleaudio/backends/soundfile_backend.py @@ -191,7 +191,7 @@ def soundfile_save(y: np.ndarray, sr: int, file: os.PathLike) -> None: if sr <= 0: raise ParameterError( - f'Sample rate should be larger than 0, recieved sr = {sr}') + f'Sample rate should be larger than 0, received sr = {sr}') if y.dtype not in ['int16', 'int8']: warnings.warn( diff --git a/demos/TTSAndroid/README.md b/demos/TTSAndroid/README.md index 36ff969fb03..36848cbe370 100644 --- a/demos/TTSAndroid/README.md +++ b/demos/TTSAndroid/README.md @@ -1,6 +1,6 @@ # 语音合成 Java API Demo 使用指南 -在 Android 上实现语音合成功能,此 Demo 有很好的的易用性和开放性,如在 Demo 中跑自己训练好的模型等。 +在 Android 上实现语音合成功能,此 Demo 有很好的易用性和开放性,如在 Demo 中跑自己训练好的模型等。 本文主要介绍语音合成 Demo 运行方法。 diff --git a/demos/TTSArmLinux/front.conf b/demos/TTSArmLinux/front.conf index 04bd2d97f05..5960b32a92d 100644 --- a/demos/TTSArmLinux/front.conf +++ b/demos/TTSArmLinux/front.conf @@ -6,13 +6,13 @@ --jieba_stop_word_path=./dict/jieba/stop_words.utf8 # dict conf fastspeech2_0.4 ---seperate_tone=false +--separate_tone=false --word2phone_path=./dict/fastspeech2_nosil_baker_ckpt_0.4/word2phone_fs2.dict --phone2id_path=./dict/fastspeech2_nosil_baker_ckpt_0.4/phone_id_map.txt --tone2id_path=./dict/fastspeech2_nosil_baker_ckpt_0.4/word2phone_fs2.dict # dict conf speedyspeech_0.5 -#--seperate_tone=true +#--separate_tone=true #--word2phone_path=./dict/speedyspeech_nosil_baker_ckpt_0.5/word2phone.dict #--phone2id_path=./dict/speedyspeech_nosil_baker_ckpt_0.5/phone_id_map.txt #--tone2id_path=./dict/speedyspeech_nosil_baker_ckpt_0.5/tone_id_map.txt diff --git a/demos/TTSCppFrontend/front_demo/front.conf b/demos/TTSCppFrontend/front_demo/front.conf index e9ce1c94d73..abff444703c 100644 --- a/demos/TTSCppFrontend/front_demo/front.conf +++ b/demos/TTSCppFrontend/front_demo/front.conf @@ -6,13 +6,13 @@ --jieba_stop_word_path=./front_demo/dict/jieba/stop_words.utf8 # dict conf fastspeech2_0.4 ---seperate_tone=false +--separate_tone=false --word2phone_path=./front_demo/dict/fastspeech2_nosil_baker_ckpt_0.4/word2phone_fs2.dict --phone2id_path=./front_demo/dict/fastspeech2_nosil_baker_ckpt_0.4/phone_id_map.txt --tone2id_path=./front_demo/dict/fastspeech2_nosil_baker_ckpt_0.4/word2phone_fs2.dict # dict conf speedyspeech_0.5 -#--seperate_tone=true +#--separate_tone=true #--word2phone_path=./front_demo/dict/speedyspeech_nosil_baker_ckpt_0.5/word2phone.dict #--phone2id_path=./front_demo/dict/speedyspeech_nosil_baker_ckpt_0.5/phone_id_map.txt #--tone2id_path=./front_demo/dict/speedyspeech_nosil_baker_ckpt_0.5/tone_id_map.txt diff --git a/demos/TTSCppFrontend/front_demo/front_demo.cpp b/demos/TTSCppFrontend/front_demo/front_demo.cpp index 19f16758bc6..77f3fc725d0 100644 --- a/demos/TTSCppFrontend/front_demo/front_demo.cpp +++ b/demos/TTSCppFrontend/front_demo/front_demo.cpp @@ -20,7 +20,7 @@ DEFINE_string(sentence, "你好,欢迎使用语音合成服务", "Text to be synthesized"); DEFINE_string(front_conf, "./front_demo/front.conf", "Front conf file"); -// DEFINE_string(seperate_tone, "true", "If true, get phoneids and tonesid"); +// DEFINE_string(separate_tone, "true", "If true, get phoneids and tonesid"); int main(int argc, char** argv) { diff --git a/demos/TTSCppFrontend/front_demo/gentools/word2phones.py b/demos/TTSCppFrontend/front_demo/gentools/word2phones.py index 8726ee89cf6..d9baeea9c09 100644 --- a/demos/TTSCppFrontend/front_demo/gentools/word2phones.py +++ b/demos/TTSCppFrontend/front_demo/gentools/word2phones.py @@ -20,7 +20,7 @@ newdict = "./dict/word_phones.dict" -def GenPhones(initials, finals, seperate=True): +def GenPhones(initials, finals, separate=True): phones = [] for c, v in zip(initials, finals): @@ -30,9 +30,9 @@ def GenPhones(initials, finals, seperate=True): elif c in ['zh', 'ch', 'sh', 'r']: v = re.sub('i', 'iii', v) if c: - if seperate is True: + if separate is True: phones.append(c + '0') - elif seperate is False: + elif separate is False: phones.append(c) else: print("Not sure whether phone and tone need to be separated") diff --git a/demos/TTSCppFrontend/src/front/front_interface.cpp b/demos/TTSCppFrontend/src/front/front_interface.cpp index 8bd466d28e9..e7b08c798f8 100644 --- a/demos/TTSCppFrontend/src/front/front_interface.cpp +++ b/demos/TTSCppFrontend/src/front/front_interface.cpp @@ -126,7 +126,7 @@ int FrontEngineInterface::init() { } // 生成音调字典(音调到音调id的映射) - if (_seperate_tone == "true") { + if (_separate_tone == "true") { if (0 != GenDict(_tone2id_path, &tone_id_map)) { LOG(ERROR) << "Genarate tone2id dict failed"; return -1; @@ -168,7 +168,7 @@ int FrontEngineInterface::ReadConfFile() { _jieba_stop_word_path = conf_map["jieba_stop_word_path"]; // dict path - _seperate_tone = conf_map["seperate_tone"]; + _separate_tone = conf_map["separate_tone"]; _word2phone_path = conf_map["word2phone_path"]; _phone2id_path = conf_map["phone2id_path"]; _tone2id_path = conf_map["tone2id_path"]; @@ -295,7 +295,7 @@ int FrontEngineInterface::GetWordsIds( } } } else { // 标点符号 - if (_seperate_tone == "true") { + if (_separate_tone == "true") { phone = "sp0"; // speedyspeech } else { phone = "sp"; // fastspeech2 @@ -354,7 +354,7 @@ int FrontEngineInterface::Phone2Phoneid(const std::string &phone, std::string temp_phone; for (int i = 0; i < phone_vec.size(); i++) { temp_phone = phone_vec[i]; - if (_seperate_tone == "true") { + if (_separate_tone == "true") { phoneid->push_back(atoi( (phone_id_map[temp_phone.substr(0, temp_phone.length() - 1)]) .c_str())); diff --git a/demos/TTSCppFrontend/src/front/front_interface.h b/demos/TTSCppFrontend/src/front/front_interface.h index fc33a4de6bc..8c16859cf46 100644 --- a/demos/TTSCppFrontend/src/front/front_interface.h +++ b/demos/TTSCppFrontend/src/front/front_interface.h @@ -182,7 +182,7 @@ class FrontEngineInterface : public TextNormalizer { std::string _jieba_idf_path; std::string _jieba_stop_word_path; - std::string _seperate_tone; + std::string _separate_tone; std::string _word2phone_path; std::string _phone2id_path; std::string _tone2id_path; diff --git a/demos/speech_web/README.md b/demos/speech_web/README.md index 572781ab682..fc1fe7105f4 100644 --- a/demos/speech_web/README.md +++ b/demos/speech_web/README.md @@ -23,7 +23,7 @@ Paddle Speech Demo 是一个以 PaddleSpeech 的语音交互功能为主体开 + ERNIE-SAT:语言-语音跨模态大模型 ERNIE-SAT 可视化展示示例,支持个性化合成,跨语言语音合成(音频为中文则输入英文文本进行合成),语音编辑(修改音频文字中间的结果)功能。 ERNIE-SAT 更多实现细节,可以参考: + [【ERNIE-SAT with AISHELL-3 dataset】](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/ernie_sat) - + [【ERNIE-SAT with with AISHELL3 and VCTK datasets】](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3_vctk/ernie_sat) + + [【ERNIE-SAT with AISHELL3 and VCTK datasets】](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3_vctk/ernie_sat) + [【ERNIE-SAT with VCTK dataset】](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/ernie_sat) 运行效果: diff --git a/demos/speech_web/speech_server/main.py b/demos/speech_web/speech_server/main.py index 03e7e5996c2..f4678628f15 100644 --- a/demos/speech_web/speech_server/main.py +++ b/demos/speech_web/speech_server/main.py @@ -260,7 +260,7 @@ async def websocket_endpoint_online(websocket: WebSocket): # and we break the loop if message['signal'] == 'start': resp = {"status": "ok", "signal": "server_ready"} - # do something at begining here + # do something at beginning here # create the instance to process the audio # connection_handler = chatbot.asr.connection_handler connection_handler = PaddleASRConnectionHanddler(engine) diff --git a/docs/tutorial/st/st_tutorial.ipynb b/docs/tutorial/st/st_tutorial.ipynb index 2fb850535ed..e755bebad17 100644 --- a/docs/tutorial/st/st_tutorial.ipynb +++ b/docs/tutorial/st/st_tutorial.ipynb @@ -62,7 +62,7 @@ "collapsed": false }, "source": [ - "# 使用Transformer进行端到端语音翻译的的基本流程\n", + "# 使用Transformer进行端到端语音翻译的基本流程\n", "## 基础模型\n", "由于 ASR 章节已经介绍了 Transformer 以及语音特征抽取,在此便不做过多介绍,感兴趣的同学可以去相关章节进行了解。\n", "\n", diff --git a/docs/tutorial/tts/tts_tutorial.ipynb b/docs/tutorial/tts/tts_tutorial.ipynb index 583adb01470..0cecb680d61 100644 --- a/docs/tutorial/tts/tts_tutorial.ipynb +++ b/docs/tutorial/tts/tts_tutorial.ipynb @@ -464,7 +464,7 @@ "
FastSpeech2 网络结构图

\n", "\n", "\n", - "PaddleSpeech TTS 实现的 FastSpeech2 与论文不同的地方在于,我们使用的的是 phone 级别的 `pitch` 和 `energy`(与 FastPitch 类似),这样的合成结果可以更加**稳定**。\n", + "PaddleSpeech TTS 实现的 FastSpeech2 与论文不同的地方在于,我们使用的是 phone 级别的 `pitch` 和 `energy`(与 FastPitch 类似),这样的合成结果可以更加**稳定**。\n", "
\n", "
FastPitch 网络结构图

\n", "\n", diff --git a/examples/librispeech/asr2/README.md b/examples/librispeech/asr2/README.md index 26978520da2..253c9b45950 100644 --- a/examples/librispeech/asr2/README.md +++ b/examples/librispeech/asr2/README.md @@ -153,7 +153,7 @@ After training the model, we need to get the final model for testing and inferen ```bash if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then # avg n best model - avg.sh lastest exp/${ckpt}/checkpoints ${avg_num} + avg.sh latest exp/${ckpt}/checkpoints ${avg_num} fi ``` The `avg.sh` is in the `../../../utils/` which is define in the `path.sh`. diff --git a/examples/other/mfa/local/generate_lexicon.py b/examples/other/mfa/local/generate_lexicon.py index 3deb2470189..e63b5eb27d0 100644 --- a/examples/other/mfa/local/generate_lexicon.py +++ b/examples/other/mfa/local/generate_lexicon.py @@ -48,7 +48,7 @@ def rule(C, V, R, T): 'i' is distinguished when appeared in phonemes, and separated into 3 categories, 'i', 'ii' and 'iii'. - Erhua is is possibly applied to every finals, except for finals that already ends with 'r'. + Erhua is possibly applied to every finals, except for finals that already ends with 'r'. When a syllable is impossible or does not have any characters with this pronunciation, return None to filter it out. diff --git a/examples/tiny/asr1/README.md b/examples/tiny/asr1/README.md index cfa26670451..489f5bc3e76 100644 --- a/examples/tiny/asr1/README.md +++ b/examples/tiny/asr1/README.md @@ -37,7 +37,7 @@ It will support the way of using `--variable value` in the shell scripts. Some local variables are set in `run.sh`. `gpus` denotes the GPU number you want to use. If you set `gpus=`, it means you only use CPU. `stage` denotes the number of stage you want the start from in the experiments. -`stop stage` denotes the number of stage you want the stop at in the expriments. +`stop stage` denotes the number of stage you want the stop at in the experiments. `conf_path` denotes the config path of the model. `avg_num`denotes the number K of top-K models you want to average to get the final model. `ckpt` denotes the checkpoint prefix of the model, e.g. "transformerr" diff --git a/paddlespeech/s2t/__init__.py b/paddlespeech/s2t/__init__.py index 6663bcf87be..37d99226204 100644 --- a/paddlespeech/s2t/__init__.py +++ b/paddlespeech/s2t/__init__.py @@ -267,7 +267,7 @@ def to(x: paddle.Tensor, *args, **kwargs) -> paddle.Tensor: if not hasattr(paddle.Tensor, 'to'): - logger.debug("register user to to paddle.Tensor, remove this when fixed!") + logger.debug("register user to paddle.Tensor, remove this when fixed!") setattr(paddle.Tensor, 'to', to) setattr(paddle.static.Variable, 'to', to) diff --git a/paddlespeech/s2t/frontend/augmentor/augmentation.py b/paddlespeech/s2t/frontend/augmentor/augmentation.py index 4c5ca4fe630..744ea56dd79 100644 --- a/paddlespeech/s2t/frontend/augmentor/augmentation.py +++ b/paddlespeech/s2t/frontend/augmentor/augmentation.py @@ -45,7 +45,7 @@ class AugmentationPipeline(): samples to make the model invariant to certain types of perturbations in the real world, improving model's generalization ability. - The pipeline is built according the the augmentation configuration in json + The pipeline is built according to the augmentation configuration in json string, e.g. .. code-block:: diff --git a/paddlespeech/s2t/io/speechbrain/sampler.py b/paddlespeech/s2t/io/speechbrain/sampler.py index ba13193eb6e..09a884c2b84 100755 --- a/paddlespeech/s2t/io/speechbrain/sampler.py +++ b/paddlespeech/s2t/io/speechbrain/sampler.py @@ -283,7 +283,7 @@ def _get_boundaries_through_warping( num_quantiles, ) # get quantiles using lognormal distribution quantiles = lognorm.ppf(latent_boundaries, 1) - # scale up to to max_batch_length + # scale up to max_batch_length bucket_boundaries = quantiles * max_batch_length / quantiles[-1] # compute resulting bucket length multipliers length_multipliers = [ diff --git a/paddlespeech/s2t/models/u2/u2.py b/paddlespeech/s2t/models/u2/u2.py index 6494b5304c4..f716fa3b57f 100644 --- a/paddlespeech/s2t/models/u2/u2.py +++ b/paddlespeech/s2t/models/u2/u2.py @@ -560,7 +560,7 @@ def attention_rescoring(self, [len(hyp[0]) for hyp in hyps], place=device, dtype=paddle.long) # (beam_size,) hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id) - hyps_lens = hyps_lens + 1 # Add at begining + hyps_lens = hyps_lens + 1 # Add at beginning logger.debug( f"hyps pad: {hyps_pad} {self.sos} {self.eos} {self.ignore_id}") @@ -709,7 +709,7 @@ def forward_attention_decoder(self, hypothesis from ctc prefix beam search and one encoder output Args: hyps (paddle.Tensor): hyps from ctc prefix beam search, already - pad sos at the begining, (B, T) + pad sos at the beginning, (B, T) hyps_lens (paddle.Tensor): length of each hyp in hyps, (B) encoder_out (paddle.Tensor): corresponding encoder output, (B=1, T, D) Returns: diff --git a/paddlespeech/s2t/models/u2_st/u2_st.py b/paddlespeech/s2t/models/u2_st/u2_st.py index 31defbbaf1b..b4c8c255f01 100644 --- a/paddlespeech/s2t/models/u2_st/u2_st.py +++ b/paddlespeech/s2t/models/u2_st/u2_st.py @@ -455,7 +455,7 @@ def forward_attention_decoder( hypothesis from ctc prefix beam search and one encoder output Args: hyps (paddle.Tensor): hyps from ctc prefix beam search, already - pad sos at the begining, (B, T) + pad sos at the beginning, (B, T) hyps_lens (paddle.Tensor): length of each hyp in hyps, (B) encoder_out (paddle.Tensor): corresponding encoder output, (B=1, T, D) Returns: diff --git a/paddlespeech/server/engine/asr/online/python/asr_engine.py b/paddlespeech/server/engine/asr/online/python/asr_engine.py index 536ffe0a906..a702f0aa12d 100644 --- a/paddlespeech/server/engine/asr/online/python/asr_engine.py +++ b/paddlespeech/server/engine/asr/online/python/asr_engine.py @@ -609,7 +609,7 @@ def rescoring(self): dtype=paddle.long) # (beam_size,) hyps_pad, _ = add_sos_eos(hyps_pad, self.model.sos, self.model.eos, self.model.ignore_id) - hyps_lens = hyps_lens + 1 # Add at begining + hyps_lens = hyps_lens + 1 # Add at beginning # ctc score in ln domain # (beam_size, max_hyps_len, vocab_size) diff --git a/paddlespeech/server/ws/asr_api.py b/paddlespeech/server/ws/asr_api.py index ae1c8831077..b3ad0b7c502 100644 --- a/paddlespeech/server/ws/asr_api.py +++ b/paddlespeech/server/ws/asr_api.py @@ -67,7 +67,7 @@ async def websocket_endpoint(websocket: WebSocket): # and we break the loop if message['signal'] == 'start': resp = {"status": "ok", "signal": "server_ready"} - # do something at begining here + # do something at beginning here # create the instance to process the audio #connection_handler = PaddleASRConnectionHanddler(asr_model) connection_handler = asr_model.new_handler() diff --git a/paddlespeech/t2s/frontend/generate_lexicon.py b/paddlespeech/t2s/frontend/generate_lexicon.py index 6b467d00e12..4fb748a69bb 100644 --- a/paddlespeech/t2s/frontend/generate_lexicon.py +++ b/paddlespeech/t2s/frontend/generate_lexicon.py @@ -45,7 +45,7 @@ def rule(C, V, R, T): 'u' in syllables when certain conditions are satisfied. 'i' is distinguished when appeared in phonemes, and separated into 3 categories, 'i', 'ii' and 'iii'. - Erhua is is possibly applied to every finals, except for finals that already ends with 'r'. + Erhua is possibly applied to every finals, except for finals that already ends with 'r'. When a syllable is impossible or does not have any characters with this pronunciation, return None to filter it out. """ diff --git a/paddlespeech/t2s/models/waveflow.py b/paddlespeech/t2s/models/waveflow.py index 8e2ce822fd2..b4818cab4cd 100644 --- a/paddlespeech/t2s/models/waveflow.py +++ b/paddlespeech/t2s/models/waveflow.py @@ -236,7 +236,7 @@ def add_input(self, x_row, condition_row): Returns: res (Tensor): - A row of the the residual output. shape=(batch_size, channel, 1, width) + A row of the residual output. shape=(batch_size, channel, 1, width) skip (Tensor): A row of the skip output. shape=(batch_size, channel, 1, width) @@ -343,7 +343,7 @@ def add_input(self, x_row, condition_row): Returns: res (Tensor): - A row of the the residual output. shape=(batch_size, channel, 1, width) + A row of the residual output. shape=(batch_size, channel, 1, width) skip (Tensor): A row of the skip output. shape=(batch_size, channel, 1, width) @@ -465,7 +465,7 @@ def _start_sequence(self): self.resnet.start_sequence() def inverse(self, z, condition): - """Sampling from the the distrition p(X). It is done by sample form + """Sampling from the distrition p(X). It is done by sample form p(Z) and transform the sample. It is a auto regressive transformation. Args: @@ -600,7 +600,7 @@ def forward(self, x, condition): return z, log_det_jacobian def inverse(self, z, condition): - """Sampling from the the distrition p(X). + """Sampling from the distrition p(X). It is done by sample a ``z`` form p(Z) and transform it into ``x``. Each Flow transform .. math:: `z_{i-1}` to .. math:: `z_{i}` in an diff --git a/paddlespeech/t2s/modules/transformer/lightconv.py b/paddlespeech/t2s/modules/transformer/lightconv.py index 22217d50f51..85336f4f3ca 100644 --- a/paddlespeech/t2s/modules/transformer/lightconv.py +++ b/paddlespeech/t2s/modules/transformer/lightconv.py @@ -110,7 +110,7 @@ def forward(self, query, key, value, mask): (batch, time1, time2) mask Return: - Tensor: ouput. (batch, time1, d_model) + Tensor: output. (batch, time1, d_model) """ # linear -> GLU -> lightconv -> linear diff --git a/paddlespeech/vector/exps/ecapa_tdnn/train.py b/paddlespeech/vector/exps/ecapa_tdnn/train.py index bf014045d0a..2dc7a7164c5 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/train.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/train.py @@ -51,7 +51,7 @@ def main(args, config): # stage0: set the training device, cpu or gpu paddle.set_device(args.device) - # stage1: we must call the paddle.distributed.init_parallel_env() api at the begining + # stage1: we must call the paddle.distributed.init_parallel_env() api at the beginning paddle.distributed.init_parallel_env() nranks = paddle.distributed.get_world_size() rank = paddle.distributed.get_rank() @@ -146,7 +146,7 @@ def main(args, config): timer.start() for epoch in range(start_epoch + 1, config.epochs + 1): - # at the begining, model must set to train mode + # at the beginning, model must set to train mode model.train() avg_loss = 0 diff --git a/paddlespeech/vector/exps/ge2e/preprocess.py b/paddlespeech/vector/exps/ge2e/preprocess.py index dabe0ce7694..ee59e62457a 100644 --- a/paddlespeech/vector/exps/ge2e/preprocess.py +++ b/paddlespeech/vector/exps/ge2e/preprocess.py @@ -42,7 +42,7 @@ parser.add_argument( "--skip_existing", action="store_true", - help="Whether to skip ouput files with the same name. Useful if this script was interrupted." + help="Whether to skip output files with the same name. Useful if this script was interrupted." ) parser.add_argument( "--no_trim", diff --git a/speechx/examples/ds2_ol/onnx/local/onnx_infer_shape.py b/speechx/examples/ds2_ol/onnx/local/onnx_infer_shape.py index c53e9ec920d..65709fc2be1 100755 --- a/speechx/examples/ds2_ol/onnx/local/onnx_infer_shape.py +++ b/speechx/examples/ds2_ol/onnx/local/onnx_infer_shape.py @@ -2078,7 +2078,7 @@ def _infer_PythonOp(self, node): output_tensor_ranks = get_attribute(node, 'output_tensor_ranks') assert output_tensor_ranks - # set the context output seperately. + # set the context output separately. # The first output is autograd's context. vi = self.known_vi_[node.output[0]] vi.CopyFrom( diff --git a/speechx/speechx/frontend/audio/db_norm.cc b/speechx/speechx/frontend/audio/db_norm.cc index ad79fcc3a4a..7141fc8077a 100644 --- a/speechx/speechx/frontend/audio/db_norm.cc +++ b/speechx/speechx/frontend/audio/db_norm.cc @@ -76,7 +76,7 @@ bool DecibelNormalizer::Compute(VectorBase* waves) const { if (gain > opts_.max_gain_db) { LOG(ERROR) << "Unable to normalize segment to " << opts_.target_db << "dB," - << "because the the probable gain have exceeds opts_.max_gain_db" + << "because the probable gain has exceeded opts_.max_gain_db" << opts_.max_gain_db << "dB."; return false; } diff --git a/speechx/speechx/kaldi/base/kaldi-types.h b/speechx/speechx/kaldi/base/kaldi-types.h index c6a3e1aedb9..07381cf2af8 100644 --- a/speechx/speechx/kaldi/base/kaldi-types.h +++ b/speechx/speechx/kaldi/base/kaldi-types.h @@ -40,7 +40,7 @@ typedef float BaseFloat; #include // for discussion on what to do if you need compile kaldi -// without OpenFST, see the bottom of this this file +// without OpenFST, see the bottom of this file #ifndef COMPILE_WITHOUT_OPENFST diff --git a/speechx/speechx/kaldi/feat/pitch-functions.cc b/speechx/speechx/kaldi/feat/pitch-functions.cc index 430e9bdb53a..d71169ec916 100644 --- a/speechx/speechx/kaldi/feat/pitch-functions.cc +++ b/speechx/speechx/kaldi/feat/pitch-functions.cc @@ -746,7 +746,7 @@ OnlinePitchFeatureImpl::OnlinePitchFeatureImpl( Vector lags_offset(lags_); // lags_offset equals lags_ (which are the log-spaced lag values we want to // measure the NCCF at) with nccf_first_lag_ / opts.resample_freq subtracted - // from each element, so we can treat the measured NCCF values as as starting + // from each element, so we can treat the measured NCCF values as starting // from sample zero in a signal that starts at the point start / // opts.resample_freq. This is necessary because the ArbitraryResample code // assumes that the input signal starts from sample zero. diff --git a/speechx/speechx/kaldi/lat/lattice-functions.h b/speechx/speechx/kaldi/lat/lattice-functions.h index 6b1b6656c27..785d3f96ec9 100644 --- a/speechx/speechx/kaldi/lat/lattice-functions.h +++ b/speechx/speechx/kaldi/lat/lattice-functions.h @@ -355,12 +355,12 @@ bool PruneLattice(BaseFloat beam, LatticeType *lat); // // // /// This function returns the number of words in the longest sentence in a -// /// CompactLattice (i.e. the the maximum of any path, of the count of +// /// CompactLattice (i.e. the maximum of any path, of the count of // /// olabels on that path). // int32 LongestSentenceLength(const Lattice &lat); // // /// This function returns the number of words in the longest sentence in a -// /// CompactLattice, i.e. the the maximum of any path, of the count of +// /// CompactLattice, i.e. the maximum of any path, of the count of // /// labels on that path... note, in CompactLattice, the ilabels and olabels // /// are identical because it is an acceptor. // int32 LongestSentenceLength(const CompactLattice &lat); @@ -408,7 +408,7 @@ bool PruneLattice(BaseFloat beam, LatticeType *lat); // // /// This function computes the mapping from the pair // /// (frame-index, transition-id) to the pair -// /// (sum-of-acoustic-scores, num-of-occurences) over all occurences of the +// /// (sum-of-acoustic-scores, num-of-occurrences) over all occurrences of the // /// transition-id in that frame. // /// frame-index in the lattice. // /// This function is useful for retaining the acoustic scores in a @@ -422,13 +422,13 @@ bool PruneLattice(BaseFloat beam, LatticeType *lat); // /// @param [out] acoustic_scores // /// Pointer to a map from the pair (frame-index, // /// transition-id) to a pair (sum-of-acoustic-scores, -// /// num-of-occurences). +// /// num-of-occurrences). // /// Usually the acoustic scores for a pdf-id (and hence // /// transition-id) on a frame will be the same for all the -// /// occurences of the pdf-id in that frame. +// /// occurrences of the pdf-id in that frame. // /// But if not, we will take the average of the acoustic // /// scores. Hence, we store both the sum-of-acoustic-scores -// /// and the num-of-occurences of the transition-id in that +// /// and the num-of-occurrences of the transition-id in that // /// frame. // void ComputeAcousticScoresMap( // const Lattice &lat, @@ -440,8 +440,8 @@ bool PruneLattice(BaseFloat beam, LatticeType *lat); // /// // /// @param [in] acoustic_scores // /// A map from the pair (frame-index, transition-id) to a -// /// pair (sum-of-acoustic-scores, num-of-occurences) of -// /// the occurences of the transition-id in that frame. +// /// pair (sum-of-acoustic-scores, num-of-occurrences) of +// /// the occurrences of the transition-id in that frame. // /// See the comments for ComputeAcousticScoresMap for // /// details. // /// @param [out] lat Pointer to the output lattice. diff --git a/speechx/speechx/kaldi/matrix/kaldi-matrix.cc b/speechx/speechx/kaldi/matrix/kaldi-matrix.cc index faf23cdf0c5..85e6fecc861 100644 --- a/speechx/speechx/kaldi/matrix/kaldi-matrix.cc +++ b/speechx/speechx/kaldi/matrix/kaldi-matrix.cc @@ -1646,7 +1646,7 @@ SubMatrix::SubMatrix(const MatrixBase &M, static_cast(M.num_rows_ - ro) && static_cast(c) <= static_cast(M.num_cols_ - co)); - // point to the begining of window + // point to the beginning of window MatrixBase::num_rows_ = r; MatrixBase::num_cols_ = c; MatrixBase::stride_ = M.Stride(); diff --git a/speechx/speechx/kaldi/matrix/sparse-matrix.cc b/speechx/speechx/kaldi/matrix/sparse-matrix.cc index 68a61e17dc3..192d258457c 100644 --- a/speechx/speechx/kaldi/matrix/sparse-matrix.cc +++ b/speechx/speechx/kaldi/matrix/sparse-matrix.cc @@ -998,7 +998,7 @@ void FilterCompressedMatrixRows(const CompressedMatrix &in, // iterating row-wise versus column-wise in compressed-matrix uncompression. if (num_kept_rows > heuristic * in.NumRows()) { - // if quite a few of the the rows are kept, it may be more efficient + // if quite a few of the rows are kept, it may be more efficient // to uncompress the entire compressed matrix, since per-column operation // is more efficient. Matrix full_mat(in); diff --git a/speechx/speechx/kaldi/util/kaldi-table-inl.h b/speechx/speechx/kaldi/util/kaldi-table-inl.h index 6aca2f137e3..175e27049a0 100644 --- a/speechx/speechx/kaldi/util/kaldi-table-inl.h +++ b/speechx/speechx/kaldi/util/kaldi-table-inl.h @@ -1587,7 +1587,7 @@ template class RandomAccessTableReaderImplBase { // this from a pipe. In principle we could read it on-demand as for the // archives, but this would probably be overkill. -// Note: the code for this this class is similar to TableWriterScriptImpl: +// Note: the code for this class is similar to TableWriterScriptImpl: // try to keep them in sync. template class RandomAccessTableReaderScriptImpl: diff --git a/speechx/speechx/nnet/ds2_nnet.cc b/speechx/speechx/nnet/ds2_nnet.cc index 22c7f61b82d..f30d7979cd2 100644 --- a/speechx/speechx/nnet/ds2_nnet.cc +++ b/speechx/speechx/nnet/ds2_nnet.cc @@ -105,7 +105,7 @@ paddle_infer::Predictor* PaddleNnet::GetPredictor() { while (pred_id < pool_usages.size()) { if (pool_usages[pred_id] == false) { - predictor = pool->Retrive(pred_id); + predictor = pool->Retrieve(pred_id); break; } ++pred_id; diff --git a/speechx/speechx/protocol/websocket/websocket_server.cc b/speechx/speechx/protocol/websocket/websocket_server.cc index 14f2f6e9fb4..d1bed1ca11e 100644 --- a/speechx/speechx/protocol/websocket/websocket_server.cc +++ b/speechx/speechx/protocol/websocket/websocket_server.cc @@ -32,14 +32,14 @@ void ConnectionHandler::OnSpeechStart() { decode_thread_ = std::make_shared( &ConnectionHandler::DecodeThreadFunc, this); got_start_tag_ = true; - LOG(INFO) << "Server: Recieved speech start signal, start reading speech"; + LOG(INFO) << "Server: Received speech start signal, start reading speech"; json::value rv = {{"status", "ok"}, {"type", "server_ready"}}; ws_.text(true); ws_.write(asio::buffer(json::serialize(rv))); } void ConnectionHandler::OnSpeechEnd() { - LOG(INFO) << "Server: Recieved speech end signal"; + LOG(INFO) << "Server: Received speech end signal"; if (recognizer_ != nullptr) { recognizer_->SetFinished(); } @@ -70,8 +70,8 @@ void ConnectionHandler::OnSpeechData(const beast::flat_buffer& buffer) { pcm_data(i) = static_cast(*pdata); pdata++; } - VLOG(2) << "Server: Recieved " << num_samples << " samples"; - LOG(INFO) << "Server: Recieved " << num_samples << " samples"; + VLOG(2) << "Server: Received " << num_samples << " samples"; + LOG(INFO) << "Server: Received " << num_samples << " samples"; CHECK(recognizer_ != nullptr); recognizer_->Accept(pcm_data); diff --git a/tools/extras/install_mkl.sh b/tools/extras/install_mkl.sh index 8c1899bdf2f..01bce64fe27 100755 --- a/tools/extras/install_mkl.sh +++ b/tools/extras/install_mkl.sh @@ -166,7 +166,7 @@ variable, sudo might not allow it to propagate to the command that it invokes." fi # The install variants, each in a function to simplify error reporting. -# Each one invokes a subshell with a 'set -x' to to show system-modifying +# Each one invokes a subshell with a 'set -x' to show system-modifying # commands it runs. The subshells simply limit the scope of this diagnostics # and avoid creating noise (if we were using 'set +x', it would be printed). Install_redhat () { diff --git a/utils/fst/ctc_token_fst.py b/utils/fst/ctc_token_fst.py index 2262912c8bf..f63e9cdacb5 100755 --- a/utils/fst/ctc_token_fst.py +++ b/utils/fst/ctc_token_fst.py @@ -6,7 +6,7 @@ def main(args): """Token Transducer""" # entry print('0 1 ') - # skip begining and ending + # skip beginning and ending print('1 1 ') print('2 2 ') # exit diff --git a/utils/tokenizer.perl b/utils/tokenizer.perl index ae97d6582bd..836fe19c612 100644 --- a/utils/tokenizer.perl +++ b/utils/tokenizer.perl @@ -296,7 +296,7 @@ sub tokenize $text =~ s/DOTMULTI\./DOTDOTMULTI/g; } - # seperate out "," except if within numbers (5,300) + # separate out "," except if within numbers (5,300) #$text =~ s/([^\p{IsN}])[,]([^\p{IsN}])/$1 , $2/g; # separate out "," except if within numbers (5,300)