Export kokoro 1.0 int8 models (#2137)

This commit is contained in:
Fangjun Kuang
2025-04-20 14:35:02 +08:00
committed by GitHub
parent be0f382a54
commit 6cabaa11bf
7 changed files with 212 additions and 99 deletions

View File

@@ -10,7 +10,9 @@ from generate_voices_bin import speaker2id
def main():
model = onnx.load("./kokoro.onnx")
style = torch.load("./voices/af_alloy.pt", weights_only=True, map_location="cpu")
style = torch.load(
"./Kokoro-82M/voices/af_alloy.pt", weights_only=True, map_location="cpu"
)
id2speaker_str = ""
speaker2id_str = ""

View File

@@ -0,0 +1,42 @@
#!/usr/bin/env python3
import argparse
import onnxruntime
from onnxruntime.quantization import QuantType, quantize_dynamic
def show(filename):
session_opts = onnxruntime.SessionOptions()
session_opts.log_severity_level = 3
sess = onnxruntime.InferenceSession(filename, session_opts)
for i in sess.get_inputs():
print(i)
print("-----")
for i in sess.get_outputs():
print(i)
"""
NodeArg(name='tokens', type='tensor(int64)', shape=[1, 'sequence_length'])
NodeArg(name='style', type='tensor(float)', shape=[1, 256])
NodeArg(name='speed', type='tensor(float)', shape=[1])
-----
NodeArg(name='audio', type='tensor(float)', shape=['audio_length'])
"""
def main():
show("./kokoro.onnx")
quantize_dynamic(
model_input="kokoro.onnx",
model_output="kokoro.int8.onnx",
# op_types_to_quantize=["MatMul"],
weight_type=QuantType.QUInt8,
)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,53 @@
#!/usr/bin/env python3
import json
import torch
from kokoro import KModel
from kokoro.model import KModelForONNX
@torch.no_grad()
def main():
with open("Kokoro-82M/config.json") as f:
config = json.load(f)
model = (
KModel(
repo_id="not-used-any-value-is-ok",
model="Kokoro-82M/kokoro-v1_0.pth",
config=config,
disable_complex=True,
)
.to("cpu")
.eval()
)
x = torch.randint(1, 100, (48,)).numpy()
x = torch.LongTensor([[0, *x, 0]])
style = torch.rand(1, 256, dtype=torch.float32)
speed = torch.rand(1)
print(x.shape, x.dtype)
print(style.shape, style.dtype)
print(speed, speed.dtype)
model2 = KModelForONNX(model)
torch.onnx.export(
model2,
(x, style, speed),
"kokoro.onnx",
input_names=["tokens", "style", "speed"],
output_names=["audio"],
dynamic_axes={
"tokens": {1: "sequence_length"},
"audio": {0: "audio_length"},
},
opset_version=14, # minimum working version for this kokoro model is 14
)
if __name__ == "__main__":
main()

View File

@@ -6,7 +6,7 @@ import json
def main():
with open("config.json") as f:
with open("Kokoro-82M/config.json") as f:
config = json.load(f)
vocab = config["vocab"]

View File

@@ -71,7 +71,7 @@ def main():
with open("voices.bin", "wb") as f:
for _, speaker in id2speaker.items():
m = torch.load(
f"voices/{speaker}.pt",
f"Kokoro-82M/voices/{speaker}.pt",
weights_only=True,
map_location="cpu",
).numpy()

View File

@@ -3,93 +3,29 @@
set -ex
if [ ! -f kokoro.onnx ]; then
# see https://github.com/taylorchu/kokoro-onnx/releases
curl -SL -O https://github.com/taylorchu/kokoro-onnx/releases/download/v0.2.0/kokoro.onnx
fi
git clone https://huggingface.co/hexgrad/Kokoro-82M
if [ ! -f config.json ]; then
# see https://huggingface.co/hexgrad/Kokoro-82M/blob/main/config.json
curl -SL -O https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/config.json
fi
# see https://huggingface.co/spaces/hexgrad/Kokoro-TTS/blob/main/app.py#L83
# and
# https://huggingface.co/hexgrad/Kokoro-82M/tree/main/voices
#
# af -> American female
# am -> American male
# bf -> British female
# bm -> British male
voices=(
af_alloy
af_aoede
af_bella
af_heart
af_jessica
af_kore
af_nicole
af_nova
af_river
af_sarah
af_sky
am_adam
am_echo
am_eric
am_fenrir
am_liam
am_michael
am_onyx
am_puck
am_santa
bf_alice
bf_emma
bf_isabella
bf_lily
bm_daniel
bm_fable
bm_george
bm_lewis
ef_dora
em_alex
ff_siwis
hf_alpha
hf_beta
hm_omega
hm_psi
if_sara
im_nicola
jf_alpha
jf_gongitsune
jf_nezumi
jf_tebukuro
jm_kumo
pf_dora
pm_alex
pm_santa
zf_xiaobei # 东北话
zf_xiaoni
zf_xiaoxiao
zf_xiaoyi
zm_yunjian
zm_yunxi
zm_yunxia
zm_yunyang
)
mkdir -p voices
if [ ! -f ./kokoro.onnx ]; then
python3 ./export_onnx.py
fi
for v in ${voices[@]}; do
if [ ! -f voices/$v.pt ]; then
curl -SL --output voices/$v.pt https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/voices/$v.pt
fi
done
if [ ! -f ./.add-meta-data.done ]; then
python3 ./add_meta_data.py
touch ./.add-meta-data.done
fi
if [ ! -f ./kokoro.int8.onnx ]; then
python3 ./dynamic_quantization.py
fi
if [ ! -f us_gold.json ]; then
curl -SL -O https://raw.githubusercontent.com/hexgrad/misaki/refs/heads/main/misaki/data/us_gold.json
fi