update text frontend

PaddlePaddle · Nov 26, 2021 · dad1cbb · dad1cbb
1 parent b6ade97
commit dad1cbb
Show file tree

Hide file tree

Showing 13 changed files with 70 additions and 12 deletions.
diff --git a/demos/style_fs2/style_syn.py b/demos/style_fs2/style_syn.py
@@ -34,7 +34,9 @@ def evaluate(args, fastspeech2_config, pwg_config):
  sentences = []
  with open(args.text, 'rt') as f:
  for line in f:
- utt_id, sentence = line.strip().split()
+ items = line.strip().split()
+ utt_id = items[0]
+ sentence = ",".join(items[1:])
  sentences.append((utt_id, sentence))
 
  with open(args.phones_dict, "r") as f:

diff --git a/examples/ljspeech/voc1/README.md b/examples/ljspeech/voc1/README.md
@@ -137,4 +137,4 @@ pwg_ljspeech_ckpt_0.5
 └── pwg_stats.npy # statistics used to normalize spectrogram when training parallel wavegan
 ```
 ## Acknowledgement
-We adapted some code from https://github.com/kan-bayashi/ParallelWaveGAN.
+We adapted some code from https://github.com/kan-bayashi/ParallelWaveGAN.
diff --git a/paddlespeech/t2s/exps/fastspeech2/inference.py b/paddlespeech/t2s/exps/fastspeech2/inference.py
@@ -82,7 +82,9 @@ def main():
 
  with open(args.text, 'rt') as f:
  for line in f:
- utt_id, sentence = line.strip().split()
+ items = line.strip().split()
+ utt_id = items[0]
+ sentence = ",".join(items[1:])
  sentences.append((utt_id, sentence))
 
  for utt_id, sentence in sentences:

diff --git a/paddlespeech/t2s/exps/fastspeech2/multi_spk_synthesize_e2e.py b/paddlespeech/t2s/exps/fastspeech2/multi_spk_synthesize_e2e.py
@@ -37,7 +37,9 @@ def evaluate(args, fastspeech2_config, pwg_config):
  sentences = []
  with open(args.text, 'rt') as f:
  for line in f:
- utt_id, sentence = line.strip().split()
+ items = line.strip().split()
+ utt_id = items[0]
+ sentence = ",".join(items[1:])
  sentences.append((utt_id, sentence))
 
  with open(args.phones_dict, "r") as f:

diff --git a/paddlespeech/t2s/exps/fastspeech2/synthesize_e2e.py b/paddlespeech/t2s/exps/fastspeech2/synthesize_e2e.py
@@ -40,7 +40,9 @@ def evaluate(args, fastspeech2_config, pwg_config):
  sentences = []
  with open(args.text, 'rt') as f:
  for line in f:
- utt_id, sentence = line.strip().split()
+ items = line.strip().split()
+ utt_id = items[0]
+ sentence = ",".join(items[1:])
  sentences.append((utt_id, sentence))
 
  with open(args.phones_dict, "r") as f:

diff --git a/paddlespeech/t2s/exps/fastspeech2/synthesize_e2e_melgan.py b/paddlespeech/t2s/exps/fastspeech2/synthesize_e2e_melgan.py
@@ -40,7 +40,9 @@ def evaluate(args, fastspeech2_config, melgan_config):
  sentences = []
  with open(args.text, 'rt') as f:
  for line in f:
- utt_id, sentence = line.strip().split()
+ items = line.strip().split()
+ utt_id = items[0]
+ sentence = ",".join(items[1:])
  sentences.append((utt_id, sentence))
 
  with open(args.phones_dict, "r") as f:

diff --git a/paddlespeech/t2s/exps/speedyspeech/inference.py b/paddlespeech/t2s/exps/speedyspeech/inference.py
@@ -87,7 +87,9 @@ def main():
 
  with open(args.text, 'rt') as f:
  for line in f:
- utt_id, sentence = line.strip().split()
+ items = line.strip().split()
+ utt_id = items[0]
+ sentence = ",".join(items[1:])
  sentences.append((utt_id, sentence))
 
  for utt_id, sentence in sentences:

diff --git a/paddlespeech/t2s/exps/speedyspeech/synthesize_e2e.py b/paddlespeech/t2s/exps/speedyspeech/synthesize_e2e.py
@@ -40,7 +40,9 @@ def evaluate(args, speedyspeech_config, pwg_config):
  sentences = []
  with open(args.text, 'rt') as f:
  for line in f:
- utt_id, sentence = line.strip().split()
+ items = line.strip().split()
+ utt_id = items[0]
+ sentence = ",".join(items[1:])
  sentences.append((utt_id, sentence))
 
  with open(args.phones_dict, "r") as f:

diff --git a/paddlespeech/t2s/frontend/zh_frontend.py b/paddlespeech/t2s/frontend/zh_frontend.py
@@ -149,9 +149,14 @@ def _merge_erhua(self,
  if word not in self.must_erhua and (word in self.not_erhua or
  pos in {"a", "j", "nr"}):
  return initials, finals
+ # "……" 等情况直接返回
+ if len(finals) != len(word):
+ return initials, finals
+
+ assert len(finals) == len(word)
+
  new_initials = []
  new_finals = []
- assert len(finals) == len(word)
  for i, phn in enumerate(finals):
  if i == len(finals) - 1 and word[i] == "儿" and phn in {
  "er2", "er5"

diff --git a/paddlespeech/t2s/frontend/zh_normalization/chronology.py b/paddlespeech/t2s/frontend/zh_normalization/chronology.py
@@ -32,6 +32,15 @@ def _time_num2str(num_string: str) -> str:
  r':([0-5][0-9])'
  r'(:([0-5][0-9]))?')
 
+# 时间范围，如8:30-12:30
+RE_TIME_RANGE = re.compile(r'([0-1]?[0-9]|2[0-3])'
+ r':([0-5][0-9])'
+ r'(:([0-5][0-9]))?'
+ r'(~|-)'
+ r'([0-1]?[0-9]|2[0-3])'
+ r':([0-5][0-9])'
+ r'(:([0-5][0-9]))?')
+
 
 def replace_time(match) -> str:
  """
@@ -42,15 +51,32 @@ def replace_time(match) -> str:
  ----------
  str
  """
+
+ is_range = len(match.groups()) > 5
+
  hour = match.group(1)
  minute = match.group(2)
  second = match.group(4)
 
+ if is_range:
+ hour_2 = match.group(6)
+ minute_2 = match.group(7)
+ second_2 = match.group(9)
+
  result = f"{num2str(hour)}点"
  if minute.lstrip('0'):
  result += f"{_time_num2str(minute)}分"
  if second and second.lstrip('0'):
  result += f"{_time_num2str(second)}秒"
+
+ if is_range:
+ result += "至"
+ result += f"{num2str(hour_2)}点"
+ if minute_2.lstrip('0'):
+ result += f"{_time_num2str(minute_2)}分"
+ if second_2 and second_2.lstrip('0'):
+ result += f"{_time_num2str(second_2)}秒"
+
  return result
 
 

diff --git a/paddlespeech/t2s/frontend/zh_normalization/phonecode.py b/paddlespeech/t2s/frontend/zh_normalization/phonecode.py
@@ -26,16 +26,19 @@
 RE_TELEPHONE = re.compile(
  r"(?<!\d)((0(10|2[1-3]|[3-9]\d{2})-?)?[1-9]\d{7,8})(?!\d)")
 
+# 全国统一的号码400开头
+RE_NATIONAL_UNIFORM_NUMBER = re.compile(r"(400)(-)?\d{3}(-)?\d{4}")
+
 
 def phone2str(phone_string: str, mobile=True) -> str:
  if mobile:
  sp_parts = phone_string.strip('+').split()
- result = ''.join(
+ result = '，'.join(
  [verbalize_digit(part, alt_one=True) for part in sp_parts])
  return result
  else:
  sil_parts = phone_string.split('-')
- result = ''.join(
+ result = '，'.join(
  [verbalize_digit(part, alt_one=True) for part in sil_parts])
  return result
 

diff --git a/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py b/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py
@@ -18,6 +18,7 @@
 from .chronology import RE_DATE
 from .chronology import RE_DATE2
 from .chronology import RE_TIME
+from .chronology import RE_TIME_RANGE
 from .chronology import replace_date
 from .chronology import replace_date2
 from .chronology import replace_time
@@ -40,6 +41,7 @@
 from .num import replace_positive_quantifier
 from .num import replace_range
 from .phonecode import RE_MOBILE_PHONE
+from .phonecode import RE_NATIONAL_UNIFORM_NUMBER
 from .phonecode import RE_TELEPHONE
 from .phonecode import replace_mobile
 from .phonecode import replace_phone
@@ -76,12 +78,19 @@ def normalize_sentence(self, sentence: str) -> str:
  # number related NSW verbalization
  sentence = RE_DATE.sub(replace_date, sentence)
  sentence = RE_DATE2.sub(replace_date2, sentence)
+
+ # range first
+ sentence = RE_TIME_RANGE.sub(replace_time, sentence)
  sentence = RE_TIME.sub(replace_time, sentence)
+
  sentence = RE_TEMPERATURE.sub(replace_temperature, sentence)
  sentence = RE_FRAC.sub(replace_frac, sentence)
  sentence = RE_PERCENTAGE.sub(replace_percentage, sentence)
  sentence = RE_MOBILE_PHONE.sub(replace_mobile, sentence)
+
  sentence = RE_TELEPHONE.sub(replace_phone, sentence)
+ sentence = RE_NATIONAL_UNIFORM_NUMBER.sub(replace_phone, sentence)
+
  sentence = RE_RANGE.sub(replace_range, sentence)
  sentence = RE_INTEGER.sub(replace_negative_num, sentence)
  sentence = RE_DECIMAL_NUM.sub(replace_number, sentence)
@@ -94,5 +103,6 @@ def normalize_sentence(self, sentence: str) -> str:
 
  def normalize(self, text: str) -> List[str]:
  sentences = self._split(text)
+
  sentences = [self.normalize_sentence(sent) for sent in sentences]
  return sentences
diff --git a/paddlespeech/t2s/models/fastspeech2/fastspeech2.py b/paddlespeech/t2s/models/fastspeech2/fastspeech2.py
@@ -307,7 +307,7 @@ def __init__(
  num_embeddings=idim,
  embedding_dim=adim,
  padding_idx=self.padding_idx)
- 
+
  if encoder_type == "transformer":
  print("encoder_type is transformer")
  self.encoder = TransformerEncoder(