62 lines
1.7 KiB
Python
Executable File
62 lines
1.7 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
|
|
|
|
|
|
import onnx
|
|
import torch
|
|
|
|
from generate_voices_bin import speaker2id
|
|
|
|
|
|
def main():
|
|
model = onnx.load("./kokoro.onnx")
|
|
style = torch.load(
|
|
"./Kokoro-82M/voices/af_alloy.pt", weights_only=True, map_location="cpu"
|
|
)
|
|
|
|
id2speaker_str = ""
|
|
speaker2id_str = ""
|
|
sep = ""
|
|
for s, i in speaker2id.items():
|
|
speaker2id_str += f"{sep}{s}->{i}"
|
|
id2speaker_str += f"{sep}{i}->{s}"
|
|
sep = ","
|
|
|
|
meta_data = {
|
|
"model_type": "kokoro",
|
|
"language": "multi-lang, e.g., English, Chinese",
|
|
"has_espeak": 1,
|
|
"sample_rate": 24000,
|
|
"version": 2,
|
|
"voice": "en-us",
|
|
"style_dim": ",".join(map(str, style.shape)),
|
|
"n_speakers": len(speaker2id),
|
|
"id2speaker": id2speaker_str,
|
|
"speaker2id": speaker2id_str,
|
|
"speaker_names": ",".join(map(str, speaker2id.keys())),
|
|
"model_url": "https://github.com/thewh1teagle/kokoro-onnx/releases/tag/model-files",
|
|
"see_also": "https://huggingface.co/spaces/hexgrad/Kokoro-TTS",
|
|
"see_also_2": "https://huggingface.co/hexgrad/Kokoro-82M",
|
|
"maintainer": "k2-fsa",
|
|
"comment": "This is Kokoro v1.0, a multilingual TTS model, supporting English, Chinese, French, Japanese etc.",
|
|
}
|
|
|
|
print(model.metadata_props)
|
|
|
|
while len(model.metadata_props):
|
|
model.metadata_props.pop()
|
|
|
|
for key, value in meta_data.items():
|
|
meta = model.metadata_props.add()
|
|
meta.key = key
|
|
meta.value = str(value)
|
|
print("--------------------")
|
|
|
|
print(model.metadata_props)
|
|
|
|
onnx.save(model, "./kokoro.onnx")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|