Refactor kokoro export (#2302)

- generate samples for https://k2-fsa.github.io/sherpa/onnx/tts/all/
- provide int8 model for kokoro v0.19 kokoro-int8-en-v0_19.tar.bz2
This commit is contained in:
Fangjun Kuang
2025-06-18 20:30:10 +08:00
committed by GitHub
parent 3878170991
commit 59d118c256
18 changed files with 494 additions and 215 deletions

View File

@@ -0,0 +1,26 @@
#!/usr/bin/env python3
# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
def get_vocab():
# https://huggingface.co/hexgrad/kLegacy/blob/main/v0.19/kokoro.py#L75
_pad = "$"
_punctuation = ';:,.!?¡¿—…"«»“” '
_letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
_letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'"
symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa)
dicts = {}
for i in range(len((symbols))):
dicts[symbols[i]] = i
return dicts
def main():
token2id = get_vocab()
with open("tokens.txt", "w", encoding="utf-8") as f:
for s, i in token2id.items():
f.write(f"{s} {i}\n")
if __name__ == "__main__":
main()