Add Kokoro v1.1-zh (#1942)

This commit is contained in:
Fangjun Kuang
2025-02-28 15:47:59 +08:00
committed by GitHub
parent f5dfcf8d2f
commit dfcbc8d40b
20 changed files with 897 additions and 61 deletions

View File

@@ -2,11 +2,6 @@
# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
import argparse
import json
from pathlib import Path
import numpy as np
import onnx
import torch

View File

@@ -4,19 +4,6 @@
import json
from typing import List, Tuple
from misaki import zh
from pypinyin import load_phrases_dict, phrases_dict, pinyin_dict
user_dict = {
"还田": [["huan2"], ["tian2"]],
"行长": [["hang2"], ["zhang3"]],
"银行行长": [["yin2"], ["hang2"], ["hang2"], ["zhang3"]],
}
load_phrases_dict(user_dict)
phrases_dict.phrases_dict.update(**user_dict)
def generate_english_lexicon(kind: str):
assert kind in ("us", "gb"), kind
@@ -59,28 +46,6 @@ def generate_english_lexicon(kind: str):
return list(user_defined_lower.items()) + list(lexicon.items())
def generate_chinese_lexicon():
word_dict = pinyin_dict.pinyin_dict
phrases = phrases_dict.phrases_dict
g2p = zh.ZHG2P()
lexicon = []
for key in word_dict:
if not (0x4E00 <= key <= 0x9FFF):
continue
w = chr(key)
tokens: str = g2p.word2ipa(w)
tokens = tokens.replace(chr(815), "")
lexicon.append((w, tokens))
for key in phrases:
tokens: str = g2p.word2ipa(key)
tokens = tokens.replace(chr(815), "")
lexicon.append((key, tokens))
return lexicon
def save(filename: str, lexicon: List[Tuple[str, str]]):
with open(filename, "w", encoding="utf-8") as f:
for word, phones in lexicon:
@@ -91,11 +56,9 @@ def save(filename: str, lexicon: List[Tuple[str, str]]):
def main():
us = generate_english_lexicon("us")
gb = generate_english_lexicon("gb")
zh = generate_chinese_lexicon()
save("lexicon-us-en.txt", us)
save("lexicon-gb-en.txt", gb)
save("lexicon-zh.txt", zh)
if __name__ == "__main__":

View File

@@ -0,0 +1,56 @@
#!/usr/bin/env python3
# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
from typing import List, Tuple
from misaki import zh
from pypinyin import load_phrases_dict, phrases_dict, pinyin_dict
user_dict = {
"还田": [["huan2"], ["tian2"]],
"行长": [["hang2"], ["zhang3"]],
"银行行长": [["yin2"], ["hang2"], ["hang2"], ["zhang3"]],
}
load_phrases_dict(user_dict)
phrases_dict.phrases_dict.update(**user_dict)
def generate_chinese_lexicon():
word_dict = pinyin_dict.pinyin_dict
phrases = phrases_dict.phrases_dict
g2p = zh.ZHG2P()
lexicon = []
for key in word_dict:
if not (0x4E00 <= key <= 0x9FFF):
continue
w = chr(key)
tokens: str = g2p.word2ipa(w)
tokens = tokens.replace(chr(815), "")
lexicon.append((w, tokens))
for key in phrases:
tokens: str = g2p.word2ipa(key)
tokens = tokens.replace(chr(815), "")
lexicon.append((key, tokens))
return lexicon
def save(filename: str, lexicon: List[Tuple[str, str]]):
with open(filename, "w", encoding="utf-8") as f:
for word, phones in lexicon:
tokens = " ".join(list(phones))
f.write(f"{word} {tokens}\n")
def main():
zh = generate_chinese_lexicon()
save("lexicon-zh.txt", zh)
if __name__ == "__main__":
main()

View File

@@ -111,7 +111,11 @@ if [ ! -f ./tokens.txt ]; then
fi
if [ ! -f ./lexicon-zh.txt ]; then
./generate_lexicon.py
./generate_lexicon_zh.py
fi
if [[ ! -f ./lexicon-us-en.txt || ! -f ./lexicon-gb-en.txt ]]; then
./generate_lexicon_en.py
fi
if [ ! -f ./voices.bin ]; then

View File

@@ -10,8 +10,6 @@ import jieba
import numpy as np
import onnxruntime as ort
import soundfile as sf
import torch
from misaki import zh
try:
from piper_phonemize import phonemize_espeak
@@ -114,7 +112,6 @@ class OnnxModel:
def __call__(self, text: str, voice: str):
punctuations = ';:,.!?-…()"“”'
text = text.lower()
g2p = zh.ZHG2P()
tokens = ""