Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fast_inference] 回退策略,减少padding影响,开放选项,同步代码 #986

Merged
merged 30 commits into from
Apr 19, 2024
Merged
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
6c2a7f1
Update README
Lion-Wu Mar 17, 2024
a495540
Merge pull request #790 from Lion-Wu/doc/README
RVC-Boss Mar 17, 2024
ca57a01
Optimize-English-G2P
KamioRinn Mar 18, 2024
8fdc7e3
docs: change akward expression
pengooseDev Mar 18, 2024
a6e4b38
docs: update Changelog_KO.md
pengooseDev Mar 18, 2024
f740949
Merge pull request #804 from KamioRinn/Optimize-English-G2P
RVC-Boss Mar 19, 2024
06ec491
Merge pull request #809 from pengooseDev/main
RVC-Boss Mar 19, 2024
e3d3c32
Fix CN punc in EN,add 's match
KamioRinn Mar 19, 2024
b451372
Merge pull request #812 from KamioRinn/Optimize-English-G2P
RVC-Boss Mar 19, 2024
97f304c
Adjust normalize and g2p logic
KamioRinn Mar 19, 2024
7bc0836
Merge pull request #821 from KamioRinn/Optimize-English-G2P
RVC-Boss Mar 20, 2024
2cf8e79
Update zh_CN.json
Yuan-ManX Mar 21, 2024
41431d0
Merge pull request #841 from Yuan-ManX/zh_CN-3
RVC-Boss Mar 21, 2024
17aff21
Update README (#827)
XXXXRT666 Mar 21, 2024
6ccfd36
修复英文多音字,调整字典热加载,新增姓名匹配 (#869)
KamioRinn Mar 25, 2024
4afecd1
Make API Great Again (#894)
KamioRinn Mar 30, 2024
2885f96
Update README (#895)
Lion-Wu Mar 31, 2024
1fca6b2
fix typo s/Licence /License (#904)
digger-yu Apr 1, 2024
8582131
fix reformat cmd (#917)
SapphireLab Apr 3, 2024
4e43f60
Update README.md
RVC-Boss Apr 6, 2024
a3c4e04
Normalize chinese arithmetic operations (#947)
KamioRinn Apr 12, 2024
f0138ea
改变训练和推理时的mask策略,以修复当batch_size>1时,产生的复读现象
ChasonJiang Apr 12, 2024
b1e9593
Merge branch 'main' of https://github.com/RVC-Boss/GPT-SoVITS into sy…
ChasonJiang Apr 12, 2024
4ec2c5c
同步main分支代码,增加“保持随机”选项
ChasonJiang Apr 12, 2024
b162848
在colab中运行colab_webui.ipynb发生的uvr5模型缺失问题 (#968)
shadow01a Apr 15, 2024
e1ba8d7
[ASR] 修复FasterWhisper遍历输入路径失败 (#956)
SapphireLab Apr 15, 2024
302cf08
Merge branch 'main' of https://github.com/RVC-Boss/GPT-SoVITS into sy…
ChasonJiang Apr 16, 2024
ef1cd01
回退mask策略;
ChasonJiang Apr 16, 2024
0a0e363
Merge branch 'fast_inference_' of https://github.com/RVC-Boss/GPT-SoV…
ChasonJiang Apr 16, 2024
c930f3d
删除无用注释
ChasonJiang Apr 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Adjust normalize and g2p logic
  • Loading branch information
KamioRinn committed Mar 19, 2024
commit 97f304c2bfa5bb185726588b3b9b44a0f797ae13
107 changes: 85 additions & 22 deletions GPT_SoVITS/text/english.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@

from text import symbols

import unicodedata
from builtins import str as unicode
from g2p_en.expand import normalize_numbers
from nltk.tokenize import TweetTokenizer
word_tokenize = TweetTokenizer().tokenize
from nltk import pos_tag

current_file_path = os.path.dirname(__file__)
CMU_DICT_PATH = os.path.join(current_file_path, "cmudict.rep")
CMU_DICT_FAST_PATH = os.path.join(current_file_path, "cmudict-fast.rep")
Expand Down Expand Up @@ -188,9 +195,6 @@ def get_dict():
return g2p_dict


eng_dict = get_dict()


def text_normalize(text):
# todo: eng text normalize
# 适配中文及 g2p_en 标点
Expand All @@ -204,6 +208,16 @@ def text_normalize(text):
for p, r in rep_map.items():
text = re.sub(p, r, text)

# 来自 g2p_en 文本格式化处理
# 增加大写兼容
text = unicode(text)
text = normalize_numbers(text)
text = ''.join(char for char in unicodedata.normalize('NFD', text)
if unicodedata.category(char) != 'Mn') # Strip accents
text = re.sub("[^ A-Za-z'.,?!\-]", "", text)
text = re.sub(r"(?i)i\.e\.", "that is", text)
text = re.sub(r"(?i)e\.g\.", "for example", text)

return text


Expand All @@ -220,30 +234,85 @@ def __init__(self):
for word in ["AE", "AI", "AR", "IOS", "HUD", "OS"]:
del self.cmu[word.lower()]

# "A" 落单不读 "AH0" 读 "EY1"
self.cmu['a'] = [['EY1']]


def predict(self, word):
# 小写 oov 长度小于等于 3 直接读字母
def __call__(self, text):
# tokenization
words = word_tokenize(text)
tokens = pos_tag(words) # tuples of (word, tag)

# steps
prons = []
for o_word, pos in tokens:
# 还原 g2p_en 小写操作逻辑
word = o_word.lower()

if re.search("[a-z]", word) is None:
pron = [word]
# 先把单字母推出去
elif len(word) == 1:
# 单读 A 发音修正, 这里需要原格式 o_word 判断大写
if o_word == "A":
pron = ['EY1']
else:
pron = self.cmu[word][0]
# g2p_en 原版多音字处理
elif word in self.homograph2features: # Check homograph
pron1, pron2, pos1 = self.homograph2features[word]
if pos.startswith(pos1):
pron = pron1
else:
pron = pron2
else:
# 递归查找预测
pron = self.qryword(word)

prons.extend(pron)
prons.extend([" "])

return prons[:-1]


def qryword(self, word):
# 查字典, 单字母除外
if len(word) > 1 and word in self.cmu: # lookup CMU dict
return self.cmu[word][0]

# oov 长度小于等于 3 直接读字母
if (len(word) <= 3):
return [phone for w in word for phone in self(w)]
phones = []
for w in word:
# 单读 A 发音修正, 此处不存在大写的情况
if w == "a":
phones.extend(['EY1'])
else:
phones.extend(self.cmu[w][0])
return phones

# 尝试分离所有格
if re.match(r"^([a-z]+)('s)$", word):
phone = self(word[:-2])
phone.extend(['Z'])
return phone
phones = self.qryword(word[:-2])
# P T K F TH HH 无声辅音结尾 's 发 ['S']
if phones[-1] in ['P', 'T', 'K', 'F', 'TH', 'HH']:
phones.extend(['S'])
# S Z SH ZH CH JH 擦声结尾 's 发 ['IH1', 'Z'] 或 ['AH0', 'Z']
elif phones[-1] in ['S', 'Z', 'SH', 'ZH', 'CH', 'JH']:
phones.extend(['AH0', 'Z'])
# B D G DH V M N NG L R W Y 有声辅音结尾 's 发 ['Z']
# AH0 AH1 AH2 EY0 EY1 EY2 AE0 AE1 AE2 EH0 EH1 EH2 OW0 OW1 OW2 UH0 UH1 UH2 IY0 IY1 IY2 AA0 AA1 AA2 AO0 AO1 AO2
# ER ER0 ER1 ER2 UW0 UW1 UW2 AY0 AY1 AY2 AW0 AW1 AW2 OY0 OY1 OY2 IH IH0 IH1 IH2 元音结尾 's 发 ['Z']
else:
phones.extend(['Z'])
return phones

# 尝试进行分词,应对复合词
comps = wordsegment.segment(word.lower())

# 无法分词的送回去预测
if len(comps)==1:
return super().predict(word)
return self.predict(word)

# 可以分词的递归处理
return [phone for comp in comps for phone in self(comp)]
return [phone for comp in comps for phone in self.qryword(comp)]


_g2p = en_G2p()
Expand All @@ -258,12 +327,6 @@ def g2p(text):


if __name__ == "__main__":
# print(get_dict())
print(g2p("hello"))
print(g2p("In this; paper, we propose 1 DSPGAN, a GAN-based universal vocoder."))
# all_phones = set()
# for k, syllables in eng_dict.items():
# for group in syllables:
# for ph in group:
# all_phones.add(ph)
# print(all_phones)
print(g2p(text_normalize("e.g. I used openai's AI tool to draw a picture.")))
print(g2p(text_normalize("In this; paper, we propose 1 DSPGAN, a GAN-based universal vocoder.")))