diff --git a/scripts/kokoro/v1.0/generate_lexicon.py b/scripts/kokoro/v1.0/generate_lexicon.py index a7ad46f5..aa37911a 100755 --- a/scripts/kokoro/v1.0/generate_lexicon.py +++ b/scripts/kokoro/v1.0/generate_lexicon.py @@ -2,10 +2,21 @@ # Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) import json -from pypinyin import phrases_dict, pinyin_dict -from misaki import zh from typing import List, Tuple +from misaki import zh +from pypinyin import load_phrases_dict, phrases_dict, pinyin_dict + +user_dict = { + "还田": [["huan2"], ["tian2"]], + "行长": [["hang2"], ["zhang3"]], + "银行行长": [["yin2"], ["hang2"], ["hang2"], ["zhang3"]], +} + +load_phrases_dict(user_dict) + +phrases_dict.phrases_dict.update(**user_dict) + def generate_english_lexicon(kind: str): assert kind in ("us", "gb"), kind @@ -59,11 +70,13 @@ def generate_chinese_lexicon(): if not (0x4E00 <= key <= 0x9FFF): continue w = chr(key) - tokens: str = g2p(w) + tokens: str = g2p.word2ipa(w) + tokens = tokens.replace(chr(815), "") lexicon.append((w, tokens)) for key in phrases: - tokens: str = g2p(key) + tokens: str = g2p.word2ipa(key) + tokens = tokens.replace(chr(815), "") lexicon.append((key, tokens)) return lexicon diff --git a/scripts/kokoro/v1.0/run.sh b/scripts/kokoro/v1.0/run.sh index 2c00305a..d6b3584d 100755 --- a/scripts/kokoro/v1.0/run.sh +++ b/scripts/kokoro/v1.0/run.sh @@ -114,11 +114,6 @@ if [ ! -f ./lexicon-zh.txt ]; then ./generate_lexicon.py fi -grep '还钱' ./lexicon-zh.txt -sed -i.bak 's/还钱 x a i/还钱 x w a/' ./lexicon-zh.txt -rm -v ./lexicon-zh.txt.bak -grep '还钱' ./lexicon-zh.txt - if [ ! -f ./voices.bin ]; then ./generate_voices_bin.py fi