Fix generating Chinese lexicon for Kokoro TTS 1.0 (#1888)
This commit is contained in:
@@ -2,10 +2,21 @@
|
|||||||
# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
|
# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from pypinyin import phrases_dict, pinyin_dict
|
|
||||||
from misaki import zh
|
|
||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
|
|
||||||
|
from misaki import zh
|
||||||
|
from pypinyin import load_phrases_dict, phrases_dict, pinyin_dict
|
||||||
|
|
||||||
|
user_dict = {
|
||||||
|
"还田": [["huan2"], ["tian2"]],
|
||||||
|
"行长": [["hang2"], ["zhang3"]],
|
||||||
|
"银行行长": [["yin2"], ["hang2"], ["hang2"], ["zhang3"]],
|
||||||
|
}
|
||||||
|
|
||||||
|
load_phrases_dict(user_dict)
|
||||||
|
|
||||||
|
phrases_dict.phrases_dict.update(**user_dict)
|
||||||
|
|
||||||
|
|
||||||
def generate_english_lexicon(kind: str):
|
def generate_english_lexicon(kind: str):
|
||||||
assert kind in ("us", "gb"), kind
|
assert kind in ("us", "gb"), kind
|
||||||
@@ -59,11 +70,13 @@ def generate_chinese_lexicon():
|
|||||||
if not (0x4E00 <= key <= 0x9FFF):
|
if not (0x4E00 <= key <= 0x9FFF):
|
||||||
continue
|
continue
|
||||||
w = chr(key)
|
w = chr(key)
|
||||||
tokens: str = g2p(w)
|
tokens: str = g2p.word2ipa(w)
|
||||||
|
tokens = tokens.replace(chr(815), "")
|
||||||
lexicon.append((w, tokens))
|
lexicon.append((w, tokens))
|
||||||
|
|
||||||
for key in phrases:
|
for key in phrases:
|
||||||
tokens: str = g2p(key)
|
tokens: str = g2p.word2ipa(key)
|
||||||
|
tokens = tokens.replace(chr(815), "")
|
||||||
lexicon.append((key, tokens))
|
lexicon.append((key, tokens))
|
||||||
return lexicon
|
return lexicon
|
||||||
|
|
||||||
|
|||||||
@@ -114,11 +114,6 @@ if [ ! -f ./lexicon-zh.txt ]; then
|
|||||||
./generate_lexicon.py
|
./generate_lexicon.py
|
||||||
fi
|
fi
|
||||||
|
|
||||||
grep '还钱' ./lexicon-zh.txt
|
|
||||||
sed -i.bak 's/还钱 x a i/还钱 x w a/' ./lexicon-zh.txt
|
|
||||||
rm -v ./lexicon-zh.txt.bak
|
|
||||||
grep '还钱' ./lexicon-zh.txt
|
|
||||||
|
|
||||||
if [ ! -f ./voices.bin ]; then
|
if [ ! -f ./voices.bin ]; then
|
||||||
./generate_voices_bin.py
|
./generate_voices_bin.py
|
||||||
fi
|
fi
|
||||||
|
|||||||
Reference in New Issue
Block a user