From 1c3d2cb89ef2d3bbef9c166980aa30d48d205134 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?David=20An=20=EF=BC=88An=20Hongliang=EF=BC=89?=
 <david.95@live.cn>
Date: Mon, 21 Nov 2022 15:25:10 +0800
Subject: [PATCH] add double byte char for zh normalization (#2661)

---
 .../t2s/frontend/zh_normalization/constants.py        |  6 +++---
 .../frontend/zh_normalization/text_normlization.py    | 11 +++++++++++
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/paddlespeech/t2s/frontend/zh_normalization/constants.py b/paddlespeech/t2s/frontend/zh_normalization/constants.py
index 5d2b0b34ea3..6423ad74a5c 100644
--- a/paddlespeech/t2s/frontend/zh_normalization/constants.py
+++ b/paddlespeech/t2s/frontend/zh_normalization/constants.py
@@ -19,7 +19,7 @@
 # 全角半角转换
 # 英文字符全角 -> 半角映射表 (num: 52)
 F2H_ASCII_LETTERS = {
-    chr(ord(char) + 65248): char
+    ord(char) + 65248: ord(char)
     for char in string.ascii_letters
 }
 
@@ -27,12 +27,12 @@
 H2F_ASCII_LETTERS = {value: key for key, value in F2H_ASCII_LETTERS.items()}
 
 # 数字字符全角 -> 半角映射表 (num: 10)
-F2H_DIGITS = {chr(ord(char) + 65248): char for char in string.digits}
+F2H_DIGITS = {ord(char) + 65248: ord(char) for char in string.digits}
 # 数字字符半角 -> 全角映射表
 H2F_DIGITS = {value: key for key, value in F2H_DIGITS.items()}
 
 # 标点符号全角 -> 半角映射表 (num: 32)
-F2H_PUNCTUATIONS = {chr(ord(char) + 65248): char for char in string.punctuation}
+F2H_PUNCTUATIONS = {ord(char) + 65248: ord(char) for char in string.punctuation}
 # 标点符号半角 -> 全角映射表
 H2F_PUNCTUATIONS = {value: key for key, value in F2H_PUNCTUATIONS.items()}
 
diff --git a/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py b/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py
index 8f8e3b07d12..1942e666126 100644
--- a/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py
+++ b/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py
@@ -74,6 +74,17 @@ def _split(self, text: str, lang="zh") -> List[str]:
     def _post_replace(self, sentence: str) -> str:
         sentence = sentence.replace('/', '每')
         sentence = sentence.replace('~', '至')
+        sentence = sentence.replace('～', '至')
+        sentence = sentence.replace('①', '一')
+        sentence = sentence.replace('②', '二')
+        sentence = sentence.replace('③', '三')
+        sentence = sentence.replace('④', '四')
+        sentence = sentence.replace('⑤', '五')
+        sentence = sentence.replace('⑥', '六')
+        sentence = sentence.replace('⑦', '七')
+        sentence = sentence.replace('⑧', '八')
+        sentence = sentence.replace('⑨', '九')
+        sentence = sentence.replace('⑩', '十')
 
         return sentence