- generate samples for https://k2-fsa.github.io/sherpa/onnx/tts/all/ - provide int8 model for kokoro v0.19 kokoro-int8-en-v0_19.tar.bz2
27 lines
951 B
Python
Executable File
27 lines
951 B
Python
Executable File
#!/usr/bin/env python3
|
|
# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
|
|
|
|
|
|
def get_vocab():
|
|
# https://huggingface.co/hexgrad/kLegacy/blob/main/v0.19/kokoro.py#L75
|
|
_pad = "$"
|
|
_punctuation = ';:,.!?¡¿—…"«»“” '
|
|
_letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
|
_letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
|
|
symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa)
|
|
dicts = {}
|
|
for i in range(len((symbols))):
|
|
dicts[symbols[i]] = i
|
|
return dicts
|
|
|
|
|
|
def main():
|
|
token2id = get_vocab()
|
|
with open("tokens.txt", "w", encoding="utf-8") as f:
|
|
for s, i in token2id.items():
|
|
f.write(f"{s} {i}\n")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|