From 6cabaa11bf0365e8813f8fc55aefaea3d0215d94 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sun, 20 Apr 2025 14:35:02 +0800 Subject: [PATCH] Export kokoro 1.0 int8 models (#2137) --- .github/workflows/export-kokoro.yaml | 128 ++++++++++++++++---- scripts/kokoro/v1.0/add_meta_data.py | 4 +- scripts/kokoro/v1.0/dynamic_quantization.py | 42 +++++++ scripts/kokoro/v1.0/export_onnx.py | 53 ++++++++ scripts/kokoro/v1.0/generate_tokens.py | 2 +- scripts/kokoro/v1.0/generate_voices_bin.py | 2 +- scripts/kokoro/v1.0/run.sh | 80 ++---------- 7 files changed, 212 insertions(+), 99 deletions(-) create mode 100755 scripts/kokoro/v1.0/dynamic_quantization.py create mode 100755 scripts/kokoro/v1.0/export_onnx.py diff --git a/.github/workflows/export-kokoro.yaml b/.github/workflows/export-kokoro.yaml index 69b77803..360a14b7 100644 --- a/.github/workflows/export-kokoro.yaml +++ b/.github/workflows/export-kokoro.yaml @@ -3,7 +3,7 @@ name: export-kokoro-to-onnx on: push: branches: - - export-kokoro-2 + - fix-export-kokoro-1.0-2 workflow_dispatch: @@ -111,6 +111,26 @@ jobs: ls -lh $d.tar.bz2 + d=kokoro-int8-multi-lang-v1_0 + mkdir $d + cp -v LICENSE $d/LICENSE + cp -a espeak-ng-data $d/ + cp -v $src/kokoro.int8.onnx $d/model.int8.onnx + cp -v $src/voices.bin $d/ + cp -v $src/tokens.txt $d/ + cp -v $src/lexicon*.txt $d/ + cp -v $src/README.md $d/README.md + cp -av dict $d/ + cp -v ./*.fst $d/ + ls -lh $d/ + echo "---" + ls -lh $d/dict + + tar cjfv $d.tar.bz2 $d + rm -rf $d + + ls -lh $d.tar.bz2 + - name: Collect results 1.1-zh if: matrix.version == '1.1-zh' shell: bash @@ -166,6 +186,25 @@ jobs: echo "---" ls -lh *.tar.bz2 + - name: Release + if: github.repository_owner == 'csukuangfj' + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + file: ./*.tar.bz2 + overwrite: true + repo_name: k2-fsa/sherpa-onnx + repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} + tag: tts-models + + - name: Release + if: github.repository_owner == 'k2-fsa' + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + file: ./*.tar.bz2 + overwrite: true + tag: tts-models - name: Publish to huggingface 0.19 if: matrix.version == '0.19' @@ -216,7 +255,7 @@ jobs: git commit -m "add models" git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-en-v0_19 main || true - - name: Publish to huggingface 1.0 + - name: Publish to huggingface 1.0 float32 if: matrix.version == '1.0' env: HF_TOKEN: ${{ secrets.HF_TOKEN }} @@ -267,6 +306,69 @@ jobs: git commit -m "add models" git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 main || true + - name: Publish to huggingface 1.0 int8 + if: matrix.version == '1.0' + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_0 huggingface + cd huggingface + rm -rf ./* + git fetch + git pull + + git lfs track "cmn_dict" + git lfs track "ru_dict" + git lfs track "af_dict" + git lfs track "ar_dict" + git lfs track "da_dict" + git lfs track "en_dict" + git lfs track "fa_dict" + git lfs track "hu_dict" + git lfs track "ia_dict" + git lfs track "it_dict" + git lfs track "lb_dict" + git lfs track "phondata" + git lfs track "ta_dict" + git lfs track "ur_dict" + git lfs track "yue_dict" + git lfs track "*.wav" + git lfs track "lexicon*.txt" + + cp -a ../espeak-ng-data ./ + + cp -v ../scripts/kokoro/v1.0/kokoro.int8.onnx ./model.int8.onnx + + cp -v ../scripts/kokoro/v1.0/tokens.txt . + cp -v ../scripts/kokoro/v1.0/voices.bin . + cp -v ../scripts/kokoro/v1.0/lexicon*.txt . + cp -v ../scripts/kokoro/v1.0/README.md ./README.md + cp -v ../LICENSE ./ + cp -av ../dict ./ + cp -v ../*.fst ./ + + git lfs track "*.onnx" + git add . + + ls -lh + + git status + + git commit -m "add models" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_0 main || true + - name: Publish to huggingface 1.1-zh if: matrix.version == '1.1-zh' env: @@ -299,7 +401,6 @@ jobs: cp -v ../scripts/kokoro/v1.1-zh/kokoro.onnx ./model.onnx - cp -v ../scripts/kokoro/v1.1-zh/tokens.txt . cp -v ../scripts/kokoro/v1.1-zh/voices.bin . cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt . @@ -350,7 +451,6 @@ jobs: cp -v ../scripts/kokoro/v1.1-zh/kokoro.int8.onnx ./model.int8.onnx - cp -v ../scripts/kokoro/v1.1-zh/tokens.txt . cp -v ../scripts/kokoro/v1.1-zh/voices.bin . cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt . @@ -368,23 +468,3 @@ jobs: git commit -m "add models" git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_1 main || true - - - name: Release - if: github.repository_owner == 'csukuangfj' - uses: svenstaro/upload-release-action@v2 - with: - file_glob: true - file: ./*.tar.bz2 - overwrite: true - repo_name: k2-fsa/sherpa-onnx - repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} - tag: tts-models - - - name: Release - if: github.repository_owner == 'k2-fsa' - uses: svenstaro/upload-release-action@v2 - with: - file_glob: true - file: ./*.tar.bz2 - overwrite: true - tag: tts-models diff --git a/scripts/kokoro/v1.0/add_meta_data.py b/scripts/kokoro/v1.0/add_meta_data.py index 102dce45..14a772e8 100755 --- a/scripts/kokoro/v1.0/add_meta_data.py +++ b/scripts/kokoro/v1.0/add_meta_data.py @@ -10,7 +10,9 @@ from generate_voices_bin import speaker2id def main(): model = onnx.load("./kokoro.onnx") - style = torch.load("./voices/af_alloy.pt", weights_only=True, map_location="cpu") + style = torch.load( + "./Kokoro-82M/voices/af_alloy.pt", weights_only=True, map_location="cpu" + ) id2speaker_str = "" speaker2id_str = "" diff --git a/scripts/kokoro/v1.0/dynamic_quantization.py b/scripts/kokoro/v1.0/dynamic_quantization.py new file mode 100755 index 00000000..1e4c1e53 --- /dev/null +++ b/scripts/kokoro/v1.0/dynamic_quantization.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +import argparse + +import onnxruntime +from onnxruntime.quantization import QuantType, quantize_dynamic + + +def show(filename): + session_opts = onnxruntime.SessionOptions() + session_opts.log_severity_level = 3 + sess = onnxruntime.InferenceSession(filename, session_opts) + for i in sess.get_inputs(): + print(i) + + print("-----") + + for i in sess.get_outputs(): + print(i) + + +""" +NodeArg(name='tokens', type='tensor(int64)', shape=[1, 'sequence_length']) +NodeArg(name='style', type='tensor(float)', shape=[1, 256]) +NodeArg(name='speed', type='tensor(float)', shape=[1]) +----- +NodeArg(name='audio', type='tensor(float)', shape=['audio_length']) +""" + + +def main(): + show("./kokoro.onnx") + + quantize_dynamic( + model_input="kokoro.onnx", + model_output="kokoro.int8.onnx", + # op_types_to_quantize=["MatMul"], + weight_type=QuantType.QUInt8, + ) + + +if __name__ == "__main__": + main() diff --git a/scripts/kokoro/v1.0/export_onnx.py b/scripts/kokoro/v1.0/export_onnx.py new file mode 100755 index 00000000..2dadc2d5 --- /dev/null +++ b/scripts/kokoro/v1.0/export_onnx.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 + +import json + +import torch +from kokoro import KModel +from kokoro.model import KModelForONNX + + +@torch.no_grad() +def main(): + with open("Kokoro-82M/config.json") as f: + config = json.load(f) + + model = ( + KModel( + repo_id="not-used-any-value-is-ok", + model="Kokoro-82M/kokoro-v1_0.pth", + config=config, + disable_complex=True, + ) + .to("cpu") + .eval() + ) + + x = torch.randint(1, 100, (48,)).numpy() + x = torch.LongTensor([[0, *x, 0]]) + + style = torch.rand(1, 256, dtype=torch.float32) + speed = torch.rand(1) + + print(x.shape, x.dtype) + print(style.shape, style.dtype) + print(speed, speed.dtype) + + model2 = KModelForONNX(model) + + torch.onnx.export( + model2, + (x, style, speed), + "kokoro.onnx", + input_names=["tokens", "style", "speed"], + output_names=["audio"], + dynamic_axes={ + "tokens": {1: "sequence_length"}, + "audio": {0: "audio_length"}, + }, + opset_version=14, # minimum working version for this kokoro model is 14 + ) + + +if __name__ == "__main__": + main() diff --git a/scripts/kokoro/v1.0/generate_tokens.py b/scripts/kokoro/v1.0/generate_tokens.py index 5c93ef5d..3f899cd9 100755 --- a/scripts/kokoro/v1.0/generate_tokens.py +++ b/scripts/kokoro/v1.0/generate_tokens.py @@ -6,7 +6,7 @@ import json def main(): - with open("config.json") as f: + with open("Kokoro-82M/config.json") as f: config = json.load(f) vocab = config["vocab"] diff --git a/scripts/kokoro/v1.0/generate_voices_bin.py b/scripts/kokoro/v1.0/generate_voices_bin.py index c89ce243..c0346d62 100755 --- a/scripts/kokoro/v1.0/generate_voices_bin.py +++ b/scripts/kokoro/v1.0/generate_voices_bin.py @@ -71,7 +71,7 @@ def main(): with open("voices.bin", "wb") as f: for _, speaker in id2speaker.items(): m = torch.load( - f"voices/{speaker}.pt", + f"Kokoro-82M/voices/{speaker}.pt", weights_only=True, map_location="cpu", ).numpy() diff --git a/scripts/kokoro/v1.0/run.sh b/scripts/kokoro/v1.0/run.sh index de8048c7..19bdff81 100755 --- a/scripts/kokoro/v1.0/run.sh +++ b/scripts/kokoro/v1.0/run.sh @@ -3,93 +3,29 @@ set -ex -if [ ! -f kokoro.onnx ]; then - # see https://github.com/taylorchu/kokoro-onnx/releases - curl -SL -O https://github.com/taylorchu/kokoro-onnx/releases/download/v0.2.0/kokoro.onnx -fi +git clone https://huggingface.co/hexgrad/Kokoro-82M -if [ ! -f config.json ]; then - # see https://huggingface.co/hexgrad/Kokoro-82M/blob/main/config.json - curl -SL -O https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/config.json -fi - -# see https://huggingface.co/spaces/hexgrad/Kokoro-TTS/blob/main/app.py#L83 -# and # https://huggingface.co/hexgrad/Kokoro-82M/tree/main/voices # # af -> American female # am -> American male # bf -> British female # bm -> British male -voices=( -af_alloy -af_aoede -af_bella -af_heart -af_jessica -af_kore -af_nicole -af_nova -af_river -af_sarah -af_sky -am_adam -am_echo -am_eric -am_fenrir -am_liam -am_michael -am_onyx -am_puck -am_santa -bf_alice -bf_emma -bf_isabella -bf_lily -bm_daniel -bm_fable -bm_george -bm_lewis -ef_dora -em_alex -ff_siwis -hf_alpha -hf_beta -hm_omega -hm_psi -if_sara -im_nicola -jf_alpha -jf_gongitsune -jf_nezumi -jf_tebukuro -jm_kumo -pf_dora -pm_alex -pm_santa -zf_xiaobei # 东北话 -zf_xiaoni -zf_xiaoxiao -zf_xiaoyi -zm_yunjian -zm_yunxi -zm_yunxia -zm_yunyang -) -mkdir -p voices +if [ ! -f ./kokoro.onnx ]; then + python3 ./export_onnx.py +fi -for v in ${voices[@]}; do - if [ ! -f voices/$v.pt ]; then - curl -SL --output voices/$v.pt https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/voices/$v.pt - fi -done if [ ! -f ./.add-meta-data.done ]; then python3 ./add_meta_data.py touch ./.add-meta-data.done fi +if [ ! -f ./kokoro.int8.onnx ]; then + python3 ./dynamic_quantization.py +fi + if [ ! -f us_gold.json ]; then curl -SL -O https://raw.githubusercontent.com/hexgrad/misaki/refs/heads/main/misaki/data/us_gold.json fi