Fix some typos. (#3178)

Signed-off-by: Yulv-git <[email protected]>
PaddlePaddle · Apr 21, 2023 · 8c7859d · 8c7859d
1 parent 35d874c
commit 8c7859d
Show file tree

Hide file tree

Showing 41 changed files with 63 additions and 63 deletions.
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
@@ -27,4 +27,4 @@ git commit -m "xxxxxx, test=doc"
 1. 虽然跳过了 CI，但是还要先排队排到才能跳过，所以非自己方向看到 pending 不要着急 🤣
 2. 在 `git commit --amend` 的时候才加 `test=xxx` 可能不太有效
 3. 一个 pr 多次提交 commit 注意每次都要加 `test=xxx`，因为每个 commit 都会触发 CI
-4. 删除 python 环境中已经安装好的的 paddlespeech，否则可能会影响 import paddlespeech 的顺序</div>
+4. 删除 python 环境中已经安装好的 paddlespeech，否则可能会影响 import paddlespeech 的顺序</div>
diff --git a/audio/paddleaudio/backends/soundfile_backend.py b/audio/paddleaudio/backends/soundfile_backend.py
@@ -191,7 +191,7 @@ def soundfile_save(y: np.ndarray, sr: int, file: os.PathLike) -> None:
 
  if sr <= 0:
  raise ParameterError(
- f'Sample rate should be larger than 0, recieved sr = {sr}')
+ f'Sample rate should be larger than 0, received sr = {sr}')
 
  if y.dtype not in ['int16', 'int8']:
  warnings.warn(

diff --git a/demos/TTSAndroid/README.md b/demos/TTSAndroid/README.md
@@ -1,6 +1,6 @@
 # 语音合成 Java API Demo 使用指南
 
-在 Android 上实现语音合成功能，此 Demo 有很好的的易用性和开放性，如在 Demo 中跑自己训练好的模型等。
+在 Android 上实现语音合成功能，此 Demo 有很好的易用性和开放性，如在 Demo 中跑自己训练好的模型等。
 
 本文主要介绍语音合成 Demo 运行方法。
 

diff --git a/demos/TTSArmLinux/front.conf b/demos/TTSArmLinux/front.conf
@@ -6,13 +6,13 @@
 --jieba_stop_word_path=./dict/jieba/stop_words.utf8
 
 # dict conf fastspeech2_0.4
---seperate_tone=false
+--separate_tone=false
 --word2phone_path=./dict/fastspeech2_nosil_baker_ckpt_0.4/word2phone_fs2.dict
 --phone2id_path=./dict/fastspeech2_nosil_baker_ckpt_0.4/phone_id_map.txt
 --tone2id_path=./dict/fastspeech2_nosil_baker_ckpt_0.4/word2phone_fs2.dict
 
 # dict conf speedyspeech_0.5
-#--seperate_tone=true
+#--separate_tone=true
 #--word2phone_path=./dict/speedyspeech_nosil_baker_ckpt_0.5/word2phone.dict
 #--phone2id_path=./dict/speedyspeech_nosil_baker_ckpt_0.5/phone_id_map.txt
 #--tone2id_path=./dict/speedyspeech_nosil_baker_ckpt_0.5/tone_id_map.txt

diff --git a/demos/TTSCppFrontend/front_demo/front.conf b/demos/TTSCppFrontend/front_demo/front.conf
@@ -6,13 +6,13 @@
 --jieba_stop_word_path=./front_demo/dict/jieba/stop_words.utf8
 
 # dict conf fastspeech2_0.4
---seperate_tone=false
+--separate_tone=false
 --word2phone_path=./front_demo/dict/fastspeech2_nosil_baker_ckpt_0.4/word2phone_fs2.dict
 --phone2id_path=./front_demo/dict/fastspeech2_nosil_baker_ckpt_0.4/phone_id_map.txt
 --tone2id_path=./front_demo/dict/fastspeech2_nosil_baker_ckpt_0.4/word2phone_fs2.dict
 
 # dict conf speedyspeech_0.5
-#--seperate_tone=true
+#--separate_tone=true
 #--word2phone_path=./front_demo/dict/speedyspeech_nosil_baker_ckpt_0.5/word2phone.dict
 #--phone2id_path=./front_demo/dict/speedyspeech_nosil_baker_ckpt_0.5/phone_id_map.txt
 #--tone2id_path=./front_demo/dict/speedyspeech_nosil_baker_ckpt_0.5/tone_id_map.txt

diff --git a/demos/TTSCppFrontend/front_demo/front_demo.cpp b/demos/TTSCppFrontend/front_demo/front_demo.cpp
@@ -20,7 +20,7 @@
 
 DEFINE_string(sentence, "你好，欢迎使用语音合成服务", "Text to be synthesized");
 DEFINE_string(front_conf, "./front_demo/front.conf", "Front conf file");
-// DEFINE_string(seperate_tone, "true", "If true, get phoneids and tonesid");
+// DEFINE_string(separate_tone, "true", "If true, get phoneids and tonesid");
 
 
 int main(int argc, char** argv) {

diff --git a/demos/TTSCppFrontend/front_demo/gentools/word2phones.py b/demos/TTSCppFrontend/front_demo/gentools/word2phones.py
@@ -20,7 +20,7 @@
 newdict = "./dict/word_phones.dict"
 
 
-def GenPhones(initials, finals, seperate=True):
+def GenPhones(initials, finals, separate=True):
 
  phones = []
  for c, v in zip(initials, finals):
@@ -30,9 +30,9 @@ def GenPhones(initials, finals, seperate=True):
  elif c in ['zh', 'ch', 'sh', 'r']:
  v = re.sub('i', 'iii', v)
  if c:
- if seperate is True:
+ if separate is True:
  phones.append(c + '0')
- elif seperate is False:
+ elif separate is False:
  phones.append(c)
  else:
  print("Not sure whether phone and tone need to be separated")

diff --git a/demos/TTSCppFrontend/src/front/front_interface.cpp b/demos/TTSCppFrontend/src/front/front_interface.cpp
@@ -126,7 +126,7 @@ int FrontEngineInterface::init() {
  }
 
  // 生成音调字典（音调到音调id的映射）
- if (_seperate_tone == "true") {
+ if (_separate_tone == "true") {
  if (0 != GenDict(_tone2id_path, &tone_id_map)) {
  LOG(ERROR) << "Genarate tone2id dict failed";
  return -1;
@@ -168,7 +168,7 @@ int FrontEngineInterface::ReadConfFile() {
  _jieba_stop_word_path = conf_map["jieba_stop_word_path"];
 
  // dict path
- _seperate_tone = conf_map["seperate_tone"];
+ _separate_tone = conf_map["separate_tone"];
  _word2phone_path = conf_map["word2phone_path"];
  _phone2id_path = conf_map["phone2id_path"];
  _tone2id_path = conf_map["tone2id_path"];
@@ -295,7 +295,7 @@ int FrontEngineInterface::GetWordsIds(
  }
  }
  } else { // 标点符号
- if (_seperate_tone == "true") {
+ if (_separate_tone == "true") {
  phone = "sp0"; // speedyspeech
  } else {
  phone = "sp"; // fastspeech2
@@ -354,7 +354,7 @@ int FrontEngineInterface::Phone2Phoneid(const std::string &phone,
  std::string temp_phone;
  for (int i = 0; i < phone_vec.size(); i++) {
  temp_phone = phone_vec[i];
- if (_seperate_tone == "true") {
+ if (_separate_tone == "true") {
  phoneid->push_back(atoi(
  (phone_id_map[temp_phone.substr(0, temp_phone.length() - 1)])
  .c_str()));

diff --git a/demos/TTSCppFrontend/src/front/front_interface.h b/demos/TTSCppFrontend/src/front/front_interface.h
@@ -182,7 +182,7 @@ class FrontEngineInterface : public TextNormalizer {
  std::string _jieba_idf_path;
  std::string _jieba_stop_word_path;
 
- std::string _seperate_tone;
+ std::string _separate_tone;
  std::string _word2phone_path;
  std::string _phone2id_path;
  std::string _tone2id_path;

diff --git a/demos/speech_web/README.md b/demos/speech_web/README.md
@@ -23,7 +23,7 @@ Paddle Speech Demo 是一个以 PaddleSpeech 的语音交互功能为主体开
 
 + ERNIE-SAT：语言-语音跨模态大模型 ERNIE-SAT 可视化展示示例，支持个性化合成，跨语言语音合成（音频为中文则输入英文文本进行合成），语音编辑（修改音频文字中间的结果）功能。 ERNIE-SAT 更多实现细节，可以参考：
  + [【ERNIE-SAT with AISHELL-3 dataset】](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/ernie_sat)
- + [【ERNIE-SAT with with AISHELL3 and VCTK datasets】](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3_vctk/ernie_sat)
+ + [【ERNIE-SAT with AISHELL3 and VCTK datasets】](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3_vctk/ernie_sat)
  + [【ERNIE-SAT with VCTK dataset】](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/ernie_sat)
 
 运行效果：

diff --git a/demos/speech_web/speech_server/main.py b/demos/speech_web/speech_server/main.py
@@ -260,7 +260,7 @@ async def websocket_endpoint_online(websocket: WebSocket):
  # and we break the loop
  if message['signal'] == 'start':
  resp = {"status": "ok", "signal": "server_ready"}
- # do something at begining here
+ # do something at beginning here
  # create the instance to process the audio
  # connection_handler = chatbot.asr.connection_handler
  connection_handler = PaddleASRConnectionHanddler(engine)

diff --git a/docs/tutorial/st/st_tutorial.ipynb b/docs/tutorial/st/st_tutorial.ipynb
@@ -62,7 +62,7 @@
  "collapsed": false
  },
  "source": [
- "# 使用Transformer进行端到端语音翻译的的基本流程\n",
+ "# 使用Transformer进行端到端语音翻译的基本流程\n",
  "## 基础模型\n",
  "由于 ASR 章节已经介绍了 Transformer 以及语音特征抽取，在此便不做过多介绍，感兴趣的同学可以去相关章节进行了解。\n",
  "\n",

diff --git a/docs/tutorial/tts/tts_tutorial.ipynb b/docs/tutorial/tts/tts_tutorial.ipynb
@@ -464,7 +464,7 @@
  "<br><center> FastSpeech2 网络结构图</center></br>\n",
  "\n",
  "\n",
- "PaddleSpeech TTS 实现的 FastSpeech2 与论文不同的地方在于，我们使用的的是 phone 级别的 `pitch` 和 `energy`(与 FastPitch 类似)，这样的合成结果可以更加**稳定**。\n",
+ "PaddleSpeech TTS 实现的 FastSpeech2 与论文不同的地方在于，我们使用的是 phone 级别的 `pitch` 和 `energy`(与 FastPitch 类似)，这样的合成结果可以更加**稳定**。\n",
  "<center><img src=\"https://ai-studio-static-online.cdn.bcebos.com/862c21456c784c41a83a308b7d9707f0810cc3b3c6f94ed48c60f5d32d0072f0\"></center>\n",
  "<br><center> FastPitch 网络结构图</center></br>\n",
  "\n",

diff --git a/examples/librispeech/asr2/README.md b/examples/librispeech/asr2/README.md
@@ -153,7 +153,7 @@ After training the model, we need to get the final model for testing and inferen
 ```bash
  if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
  # avg n best model
- avg.sh lastest exp/${ckpt}/checkpoints ${avg_num}
+ avg.sh latest exp/${ckpt}/checkpoints ${avg_num}
  fi
 ```
 The `avg.sh` is in the `../../../utils/` which is define in the `path.sh`.

diff --git a/examples/other/mfa/local/generate_lexicon.py b/examples/other/mfa/local/generate_lexicon.py
@@ -48,7 +48,7 @@ def rule(C, V, R, T):
  
  'i' is distinguished when appeared in phonemes, and separated into 3 categories, 'i', 'ii' and 'iii'.
 
- Erhua is is possibly applied to every finals, except for finals that already ends with 'r'.
+ Erhua is possibly applied to every finals, except for finals that already ends with 'r'.
 
  When a syllable is impossible or does not have any characters with this pronunciation, return None
  to filter it out.

diff --git a/examples/tiny/asr1/README.md b/examples/tiny/asr1/README.md
@@ -37,7 +37,7 @@ It will support the way of using `--variable value` in the shell scripts.
 Some local variables are set in `run.sh`. 
 `gpus` denotes the GPU number you want to use. If you set `gpus=`, it means you only use CPU. 
 `stage` denotes the number of stage you want the start from in the experiments.
-`stop stage` denotes the number of stage you want the stop at in the expriments. 
+`stop stage` denotes the number of stage you want the stop at in the experiments. 
 `conf_path` denotes the config path of the model.
 `avg_num`denotes the number K of top-K models you want to average to get the final model.
 `ckpt` denotes the checkpoint prefix of the model, e.g. "transformerr"

diff --git a/paddlespeech/s2t/__init__.py b/paddlespeech/s2t/__init__.py
@@ -267,7 +267,7 @@ def to(x: paddle.Tensor, *args, **kwargs) -> paddle.Tensor:
 
 
 if not hasattr(paddle.Tensor, 'to'):
- logger.debug("register user to to paddle.Tensor, remove this when fixed!")
+ logger.debug("register user to paddle.Tensor, remove this when fixed!")
  setattr(paddle.Tensor, 'to', to)
  setattr(paddle.static.Variable, 'to', to)
 

diff --git a/paddlespeech/s2t/frontend/augmentor/augmentation.py b/paddlespeech/s2t/frontend/augmentor/augmentation.py
@@ -45,7 +45,7 @@ class AugmentationPipeline():
  samples to make the model invariant to certain types of perturbations in the
  real world, improving model's generalization ability.
 
- The pipeline is built according the the augmentation configuration in json
+ The pipeline is built according to the augmentation configuration in json
  string, e.g.
  
  .. code-block::

diff --git a/paddlespeech/s2t/io/speechbrain/sampler.py b/paddlespeech/s2t/io/speechbrain/sampler.py
@@ -283,7 +283,7 @@ def _get_boundaries_through_warping(
  num_quantiles, )
  # get quantiles using lognormal distribution
  quantiles = lognorm.ppf(latent_boundaries, 1)
- # scale up to to max_batch_length
+ # scale up to max_batch_length
  bucket_boundaries = quantiles * max_batch_length / quantiles[-1]
  # compute resulting bucket length multipliers
  length_multipliers = [

diff --git a/paddlespeech/s2t/models/u2/u2.py b/paddlespeech/s2t/models/u2/u2.py
@@ -560,7 +560,7 @@ def attention_rescoring(self,
  [len(hyp[0]) for hyp in hyps], place=device,
  dtype=paddle.long) # (beam_size,)
  hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
- hyps_lens = hyps_lens + 1 # Add <sos> at begining
+ hyps_lens = hyps_lens + 1 # Add <sos> at beginning
  logger.debug(
  f"hyps pad: {hyps_pad} {self.sos} {self.eos} {self.ignore_id}")
 
@@ -709,7 +709,7 @@ def forward_attention_decoder(self,
  hypothesis from ctc prefix beam search and one encoder output
  Args:
  hyps (paddle.Tensor): hyps from ctc prefix beam search, already
- pad sos at the begining, (B, T)
+ pad sos at the beginning, (B, T)
  hyps_lens (paddle.Tensor): length of each hyp in hyps, (B)
  encoder_out (paddle.Tensor): corresponding encoder output, (B=1, T, D)
  Returns:

diff --git a/paddlespeech/s2t/models/u2_st/u2_st.py b/paddlespeech/s2t/models/u2_st/u2_st.py
@@ -455,7 +455,7 @@ def forward_attention_decoder(
  hypothesis from ctc prefix beam search and one encoder output
  Args:
  hyps (paddle.Tensor): hyps from ctc prefix beam search, already
- pad sos at the begining, (B, T)
+ pad sos at the beginning, (B, T)
  hyps_lens (paddle.Tensor): length of each hyp in hyps, (B)
  encoder_out (paddle.Tensor): corresponding encoder output, (B=1, T, D)
  Returns:

diff --git a/paddlespeech/server/engine/asr/online/python/asr_engine.py b/paddlespeech/server/engine/asr/online/python/asr_engine.py
@@ -609,7 +609,7 @@ def rescoring(self):
  dtype=paddle.long) # (beam_size,)
  hyps_pad, _ = add_sos_eos(hyps_pad, self.model.sos, self.model.eos,
  self.model.ignore_id)
- hyps_lens = hyps_lens + 1 # Add <sos> at begining
+ hyps_lens = hyps_lens + 1 # Add <sos> at beginning
 
  # ctc score in ln domain
  # (beam_size, max_hyps_len, vocab_size)

diff --git a/paddlespeech/server/ws/asr_api.py b/paddlespeech/server/ws/asr_api.py
@@ -67,7 +67,7 @@ async def websocket_endpoint(websocket: WebSocket):
  # and we break the loop
  if message['signal'] == 'start':
  resp = {"status": "ok", "signal": "server_ready"}
- # do something at begining here
+ # do something at beginning here
  # create the instance to process the audio
  #connection_handler = PaddleASRConnectionHanddler(asr_model)
  connection_handler = asr_model.new_handler()

diff --git a/paddlespeech/t2s/frontend/generate_lexicon.py b/paddlespeech/t2s/frontend/generate_lexicon.py
@@ -45,7 +45,7 @@ def rule(C, V, R, T):
  'u' in syllables when certain conditions are satisfied.
 
  'i' is distinguished when appeared in phonemes, and separated into 3 categories, 'i', 'ii' and 'iii'.
- Erhua is is possibly applied to every finals, except for finals that already ends with 'r'.
+ Erhua is possibly applied to every finals, except for finals that already ends with 'r'.
  When a syllable is impossible or does not have any characters with this pronunciation, return None
  to filter it out.
  """

diff --git a/paddlespeech/t2s/models/waveflow.py b/paddlespeech/t2s/models/waveflow.py
@@ -236,7 +236,7 @@ def add_input(self, x_row, condition_row):
 
  Returns:
  res (Tensor): 
- A row of the the residual output. shape=(batch_size, channel, 1, width)
+ A row of the residual output. shape=(batch_size, channel, 1, width)
  skip (Tensor): 
  A row of the skip output. shape=(batch_size, channel, 1, width)
 
@@ -343,7 +343,7 @@ def add_input(self, x_row, condition_row):
  
  Returns:
  res (Tensor): 
- A row of the the residual output. shape=(batch_size, channel, 1, width) 
+ A row of the residual output. shape=(batch_size, channel, 1, width) 
  skip (Tensor): 
  A row of the skip output. shape=(batch_size, channel, 1, width)
  
@@ -465,7 +465,7 @@ def _start_sequence(self):
  self.resnet.start_sequence()
 
  def inverse(self, z, condition):
- """Sampling from the the distrition p(X). It is done by sample form
+ """Sampling from the distrition p(X). It is done by sample form
  p(Z) and transform the sample. It is a auto regressive transformation.
 
  Args:
@@ -600,7 +600,7 @@ def forward(self, x, condition):
  return z, log_det_jacobian
 
  def inverse(self, z, condition):
- """Sampling from the the distrition p(X).
+ """Sampling from the distrition p(X).
 
  It is done by sample a ``z`` form p(Z) and transform it into ``x``.
  Each Flow transform .. math:: `z_{i-1}` to .. math:: `z_{i}` in an

diff --git a/paddlespeech/t2s/modules/transformer/lightconv.py b/paddlespeech/t2s/modules/transformer/lightconv.py
@@ -110,7 +110,7 @@ def forward(self, query, key, value, mask):
  (batch, time1, time2) mask
 
  Return:
- Tensor: ouput. (batch, time1, d_model) 
+ Tensor: output. (batch, time1, d_model) 
 
  """
  # linear -> GLU -> lightconv -> linear

diff --git a/paddlespeech/vector/exps/ecapa_tdnn/train.py b/paddlespeech/vector/exps/ecapa_tdnn/train.py
@@ -51,7 +51,7 @@ def main(args, config):
  # stage0: set the training device, cpu or gpu
  paddle.set_device(args.device)
 
- # stage1: we must call the paddle.distributed.init_parallel_env() api at the begining
+ # stage1: we must call the paddle.distributed.init_parallel_env() api at the beginning
  paddle.distributed.init_parallel_env()
  nranks = paddle.distributed.get_world_size()
  rank = paddle.distributed.get_rank()
@@ -146,7 +146,7 @@ def main(args, config):
  timer.start()
 
  for epoch in range(start_epoch + 1, config.epochs + 1):
- # at the begining, model must set to train mode
+ # at the beginning, model must set to train mode
  model.train()
 
  avg_loss = 0

diff --git a/paddlespeech/vector/exps/ge2e/preprocess.py b/paddlespeech/vector/exps/ge2e/preprocess.py
@@ -42,7 +42,7 @@
  parser.add_argument(
  "--skip_existing",
  action="store_true",
- help="Whether to skip ouput files with the same name. Useful if this script was interrupted."
+ help="Whether to skip output files with the same name. Useful if this script was interrupted."
  )
  parser.add_argument(
  "--no_trim",

diff --git a/speechx/examples/ds2_ol/onnx/local/onnx_infer_shape.py b/speechx/examples/ds2_ol/onnx/local/onnx_infer_shape.py
@@ -2078,7 +2078,7 @@ def _infer_PythonOp(self, node):
  output_tensor_ranks = get_attribute(node, 'output_tensor_ranks')
  assert output_tensor_ranks
 
- # set the context output seperately.
+ # set the context output separately.
  # The first output is autograd's context.
  vi = self.known_vi_[node.output[0]]
  vi.CopyFrom(

diff --git a/speechx/speechx/frontend/audio/db_norm.cc b/speechx/speechx/frontend/audio/db_norm.cc
@@ -76,7 +76,7 @@ bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* waves) const {
  if (gain > opts_.max_gain_db) {
  LOG(ERROR)
  << "Unable to normalize segment to " << opts_.target_db << "dB,"
- << "because the the probable gain have exceeds opts_.max_gain_db"
+ << "because the probable gain has exceeded opts_.max_gain_db"
  << opts_.max_gain_db << "dB.";
  return false;
  }

diff --git a/speechx/speechx/kaldi/base/kaldi-types.h b/speechx/speechx/kaldi/base/kaldi-types.h
@@ -40,7 +40,7 @@ typedef float BaseFloat;
 #include <stdint.h>
 
 // for discussion on what to do if you need compile kaldi
-// without OpenFST, see the bottom of this this file
+// without OpenFST, see the bottom of this file
 
 #ifndef COMPILE_WITHOUT_OPENFST
 

diff --git a/speechx/speechx/kaldi/feat/pitch-functions.cc b/speechx/speechx/kaldi/feat/pitch-functions.cc
@@ -746,7 +746,7 @@ OnlinePitchFeatureImpl::OnlinePitchFeatureImpl(
  Vector<BaseFloat> lags_offset(lags_);
  // lags_offset equals lags_ (which are the log-spaced lag values we want to
  // measure the NCCF at) with nccf_first_lag_ / opts.resample_freq subtracted
- // from each element, so we can treat the measured NCCF values as as starting
+ // from each element, so we can treat the measured NCCF values as starting
  // from sample zero in a signal that starts at the point start /
  // opts.resample_freq. This is necessary because the ArbitraryResample code
  // assumes that the input signal starts from sample zero.