Export kokoro 1.0 int8 models (#2137)
This commit is contained in:
128
.github/workflows/export-kokoro.yaml
vendored
128
.github/workflows/export-kokoro.yaml
vendored
@@ -3,7 +3,7 @@ name: export-kokoro-to-onnx
|
|||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- export-kokoro-2
|
- fix-export-kokoro-1.0-2
|
||||||
|
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
@@ -111,6 +111,26 @@ jobs:
|
|||||||
|
|
||||||
ls -lh $d.tar.bz2
|
ls -lh $d.tar.bz2
|
||||||
|
|
||||||
|
d=kokoro-int8-multi-lang-v1_0
|
||||||
|
mkdir $d
|
||||||
|
cp -v LICENSE $d/LICENSE
|
||||||
|
cp -a espeak-ng-data $d/
|
||||||
|
cp -v $src/kokoro.int8.onnx $d/model.int8.onnx
|
||||||
|
cp -v $src/voices.bin $d/
|
||||||
|
cp -v $src/tokens.txt $d/
|
||||||
|
cp -v $src/lexicon*.txt $d/
|
||||||
|
cp -v $src/README.md $d/README.md
|
||||||
|
cp -av dict $d/
|
||||||
|
cp -v ./*.fst $d/
|
||||||
|
ls -lh $d/
|
||||||
|
echo "---"
|
||||||
|
ls -lh $d/dict
|
||||||
|
|
||||||
|
tar cjfv $d.tar.bz2 $d
|
||||||
|
rm -rf $d
|
||||||
|
|
||||||
|
ls -lh $d.tar.bz2
|
||||||
|
|
||||||
- name: Collect results 1.1-zh
|
- name: Collect results 1.1-zh
|
||||||
if: matrix.version == '1.1-zh'
|
if: matrix.version == '1.1-zh'
|
||||||
shell: bash
|
shell: bash
|
||||||
@@ -166,6 +186,25 @@ jobs:
|
|||||||
echo "---"
|
echo "---"
|
||||||
ls -lh *.tar.bz2
|
ls -lh *.tar.bz2
|
||||||
|
|
||||||
|
- name: Release
|
||||||
|
if: github.repository_owner == 'csukuangfj'
|
||||||
|
uses: svenstaro/upload-release-action@v2
|
||||||
|
with:
|
||||||
|
file_glob: true
|
||||||
|
file: ./*.tar.bz2
|
||||||
|
overwrite: true
|
||||||
|
repo_name: k2-fsa/sherpa-onnx
|
||||||
|
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
||||||
|
tag: tts-models
|
||||||
|
|
||||||
|
- name: Release
|
||||||
|
if: github.repository_owner == 'k2-fsa'
|
||||||
|
uses: svenstaro/upload-release-action@v2
|
||||||
|
with:
|
||||||
|
file_glob: true
|
||||||
|
file: ./*.tar.bz2
|
||||||
|
overwrite: true
|
||||||
|
tag: tts-models
|
||||||
|
|
||||||
- name: Publish to huggingface 0.19
|
- name: Publish to huggingface 0.19
|
||||||
if: matrix.version == '0.19'
|
if: matrix.version == '0.19'
|
||||||
@@ -216,7 +255,7 @@ jobs:
|
|||||||
git commit -m "add models"
|
git commit -m "add models"
|
||||||
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-en-v0_19 main || true
|
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-en-v0_19 main || true
|
||||||
|
|
||||||
- name: Publish to huggingface 1.0
|
- name: Publish to huggingface 1.0 float32
|
||||||
if: matrix.version == '1.0'
|
if: matrix.version == '1.0'
|
||||||
env:
|
env:
|
||||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||||
@@ -267,6 +306,69 @@ jobs:
|
|||||||
git commit -m "add models"
|
git commit -m "add models"
|
||||||
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 main || true
|
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 main || true
|
||||||
|
|
||||||
|
- name: Publish to huggingface 1.0 int8
|
||||||
|
if: matrix.version == '1.0'
|
||||||
|
env:
|
||||||
|
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||||
|
uses: nick-fields/retry@v3
|
||||||
|
with:
|
||||||
|
max_attempts: 20
|
||||||
|
timeout_seconds: 200
|
||||||
|
shell: bash
|
||||||
|
command: |
|
||||||
|
git config --global user.email "csukuangfj@gmail.com"
|
||||||
|
git config --global user.name "Fangjun Kuang"
|
||||||
|
|
||||||
|
rm -rf huggingface
|
||||||
|
export GIT_LFS_SKIP_SMUDGE=1
|
||||||
|
export GIT_CLONE_PROTECTION_ACTIVE=false
|
||||||
|
|
||||||
|
git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_0 huggingface
|
||||||
|
cd huggingface
|
||||||
|
rm -rf ./*
|
||||||
|
git fetch
|
||||||
|
git pull
|
||||||
|
|
||||||
|
git lfs track "cmn_dict"
|
||||||
|
git lfs track "ru_dict"
|
||||||
|
git lfs track "af_dict"
|
||||||
|
git lfs track "ar_dict"
|
||||||
|
git lfs track "da_dict"
|
||||||
|
git lfs track "en_dict"
|
||||||
|
git lfs track "fa_dict"
|
||||||
|
git lfs track "hu_dict"
|
||||||
|
git lfs track "ia_dict"
|
||||||
|
git lfs track "it_dict"
|
||||||
|
git lfs track "lb_dict"
|
||||||
|
git lfs track "phondata"
|
||||||
|
git lfs track "ta_dict"
|
||||||
|
git lfs track "ur_dict"
|
||||||
|
git lfs track "yue_dict"
|
||||||
|
git lfs track "*.wav"
|
||||||
|
git lfs track "lexicon*.txt"
|
||||||
|
|
||||||
|
cp -a ../espeak-ng-data ./
|
||||||
|
|
||||||
|
cp -v ../scripts/kokoro/v1.0/kokoro.int8.onnx ./model.int8.onnx
|
||||||
|
|
||||||
|
cp -v ../scripts/kokoro/v1.0/tokens.txt .
|
||||||
|
cp -v ../scripts/kokoro/v1.0/voices.bin .
|
||||||
|
cp -v ../scripts/kokoro/v1.0/lexicon*.txt .
|
||||||
|
cp -v ../scripts/kokoro/v1.0/README.md ./README.md
|
||||||
|
cp -v ../LICENSE ./
|
||||||
|
cp -av ../dict ./
|
||||||
|
cp -v ../*.fst ./
|
||||||
|
|
||||||
|
git lfs track "*.onnx"
|
||||||
|
git add .
|
||||||
|
|
||||||
|
ls -lh
|
||||||
|
|
||||||
|
git status
|
||||||
|
|
||||||
|
git commit -m "add models"
|
||||||
|
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_0 main || true
|
||||||
|
|
||||||
- name: Publish to huggingface 1.1-zh
|
- name: Publish to huggingface 1.1-zh
|
||||||
if: matrix.version == '1.1-zh'
|
if: matrix.version == '1.1-zh'
|
||||||
env:
|
env:
|
||||||
@@ -299,7 +401,6 @@ jobs:
|
|||||||
|
|
||||||
cp -v ../scripts/kokoro/v1.1-zh/kokoro.onnx ./model.onnx
|
cp -v ../scripts/kokoro/v1.1-zh/kokoro.onnx ./model.onnx
|
||||||
|
|
||||||
|
|
||||||
cp -v ../scripts/kokoro/v1.1-zh/tokens.txt .
|
cp -v ../scripts/kokoro/v1.1-zh/tokens.txt .
|
||||||
cp -v ../scripts/kokoro/v1.1-zh/voices.bin .
|
cp -v ../scripts/kokoro/v1.1-zh/voices.bin .
|
||||||
cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt .
|
cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt .
|
||||||
@@ -350,7 +451,6 @@ jobs:
|
|||||||
|
|
||||||
cp -v ../scripts/kokoro/v1.1-zh/kokoro.int8.onnx ./model.int8.onnx
|
cp -v ../scripts/kokoro/v1.1-zh/kokoro.int8.onnx ./model.int8.onnx
|
||||||
|
|
||||||
|
|
||||||
cp -v ../scripts/kokoro/v1.1-zh/tokens.txt .
|
cp -v ../scripts/kokoro/v1.1-zh/tokens.txt .
|
||||||
cp -v ../scripts/kokoro/v1.1-zh/voices.bin .
|
cp -v ../scripts/kokoro/v1.1-zh/voices.bin .
|
||||||
cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt .
|
cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt .
|
||||||
@@ -368,23 +468,3 @@ jobs:
|
|||||||
|
|
||||||
git commit -m "add models"
|
git commit -m "add models"
|
||||||
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_1 main || true
|
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_1 main || true
|
||||||
|
|
||||||
- name: Release
|
|
||||||
if: github.repository_owner == 'csukuangfj'
|
|
||||||
uses: svenstaro/upload-release-action@v2
|
|
||||||
with:
|
|
||||||
file_glob: true
|
|
||||||
file: ./*.tar.bz2
|
|
||||||
overwrite: true
|
|
||||||
repo_name: k2-fsa/sherpa-onnx
|
|
||||||
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
|
||||||
tag: tts-models
|
|
||||||
|
|
||||||
- name: Release
|
|
||||||
if: github.repository_owner == 'k2-fsa'
|
|
||||||
uses: svenstaro/upload-release-action@v2
|
|
||||||
with:
|
|
||||||
file_glob: true
|
|
||||||
file: ./*.tar.bz2
|
|
||||||
overwrite: true
|
|
||||||
tag: tts-models
|
|
||||||
|
|||||||
@@ -10,7 +10,9 @@ from generate_voices_bin import speaker2id
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
model = onnx.load("./kokoro.onnx")
|
model = onnx.load("./kokoro.onnx")
|
||||||
style = torch.load("./voices/af_alloy.pt", weights_only=True, map_location="cpu")
|
style = torch.load(
|
||||||
|
"./Kokoro-82M/voices/af_alloy.pt", weights_only=True, map_location="cpu"
|
||||||
|
)
|
||||||
|
|
||||||
id2speaker_str = ""
|
id2speaker_str = ""
|
||||||
speaker2id_str = ""
|
speaker2id_str = ""
|
||||||
|
|||||||
42
scripts/kokoro/v1.0/dynamic_quantization.py
Executable file
42
scripts/kokoro/v1.0/dynamic_quantization.py
Executable file
@@ -0,0 +1,42 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
import onnxruntime
|
||||||
|
from onnxruntime.quantization import QuantType, quantize_dynamic
|
||||||
|
|
||||||
|
|
||||||
|
def show(filename):
|
||||||
|
session_opts = onnxruntime.SessionOptions()
|
||||||
|
session_opts.log_severity_level = 3
|
||||||
|
sess = onnxruntime.InferenceSession(filename, session_opts)
|
||||||
|
for i in sess.get_inputs():
|
||||||
|
print(i)
|
||||||
|
|
||||||
|
print("-----")
|
||||||
|
|
||||||
|
for i in sess.get_outputs():
|
||||||
|
print(i)
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
NodeArg(name='tokens', type='tensor(int64)', shape=[1, 'sequence_length'])
|
||||||
|
NodeArg(name='style', type='tensor(float)', shape=[1, 256])
|
||||||
|
NodeArg(name='speed', type='tensor(float)', shape=[1])
|
||||||
|
-----
|
||||||
|
NodeArg(name='audio', type='tensor(float)', shape=['audio_length'])
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
show("./kokoro.onnx")
|
||||||
|
|
||||||
|
quantize_dynamic(
|
||||||
|
model_input="kokoro.onnx",
|
||||||
|
model_output="kokoro.int8.onnx",
|
||||||
|
# op_types_to_quantize=["MatMul"],
|
||||||
|
weight_type=QuantType.QUInt8,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
53
scripts/kokoro/v1.0/export_onnx.py
Executable file
53
scripts/kokoro/v1.0/export_onnx.py
Executable file
@@ -0,0 +1,53 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from kokoro import KModel
|
||||||
|
from kokoro.model import KModelForONNX
|
||||||
|
|
||||||
|
|
||||||
|
@torch.no_grad()
|
||||||
|
def main():
|
||||||
|
with open("Kokoro-82M/config.json") as f:
|
||||||
|
config = json.load(f)
|
||||||
|
|
||||||
|
model = (
|
||||||
|
KModel(
|
||||||
|
repo_id="not-used-any-value-is-ok",
|
||||||
|
model="Kokoro-82M/kokoro-v1_0.pth",
|
||||||
|
config=config,
|
||||||
|
disable_complex=True,
|
||||||
|
)
|
||||||
|
.to("cpu")
|
||||||
|
.eval()
|
||||||
|
)
|
||||||
|
|
||||||
|
x = torch.randint(1, 100, (48,)).numpy()
|
||||||
|
x = torch.LongTensor([[0, *x, 0]])
|
||||||
|
|
||||||
|
style = torch.rand(1, 256, dtype=torch.float32)
|
||||||
|
speed = torch.rand(1)
|
||||||
|
|
||||||
|
print(x.shape, x.dtype)
|
||||||
|
print(style.shape, style.dtype)
|
||||||
|
print(speed, speed.dtype)
|
||||||
|
|
||||||
|
model2 = KModelForONNX(model)
|
||||||
|
|
||||||
|
torch.onnx.export(
|
||||||
|
model2,
|
||||||
|
(x, style, speed),
|
||||||
|
"kokoro.onnx",
|
||||||
|
input_names=["tokens", "style", "speed"],
|
||||||
|
output_names=["audio"],
|
||||||
|
dynamic_axes={
|
||||||
|
"tokens": {1: "sequence_length"},
|
||||||
|
"audio": {0: "audio_length"},
|
||||||
|
},
|
||||||
|
opset_version=14, # minimum working version for this kokoro model is 14
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -6,7 +6,7 @@ import json
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
with open("config.json") as f:
|
with open("Kokoro-82M/config.json") as f:
|
||||||
config = json.load(f)
|
config = json.load(f)
|
||||||
vocab = config["vocab"]
|
vocab = config["vocab"]
|
||||||
|
|
||||||
|
|||||||
@@ -71,7 +71,7 @@ def main():
|
|||||||
with open("voices.bin", "wb") as f:
|
with open("voices.bin", "wb") as f:
|
||||||
for _, speaker in id2speaker.items():
|
for _, speaker in id2speaker.items():
|
||||||
m = torch.load(
|
m = torch.load(
|
||||||
f"voices/{speaker}.pt",
|
f"Kokoro-82M/voices/{speaker}.pt",
|
||||||
weights_only=True,
|
weights_only=True,
|
||||||
map_location="cpu",
|
map_location="cpu",
|
||||||
).numpy()
|
).numpy()
|
||||||
|
|||||||
@@ -3,93 +3,29 @@
|
|||||||
|
|
||||||
set -ex
|
set -ex
|
||||||
|
|
||||||
if [ ! -f kokoro.onnx ]; then
|
git clone https://huggingface.co/hexgrad/Kokoro-82M
|
||||||
# see https://github.com/taylorchu/kokoro-onnx/releases
|
|
||||||
curl -SL -O https://github.com/taylorchu/kokoro-onnx/releases/download/v0.2.0/kokoro.onnx
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -f config.json ]; then
|
|
||||||
# see https://huggingface.co/hexgrad/Kokoro-82M/blob/main/config.json
|
|
||||||
curl -SL -O https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/config.json
|
|
||||||
fi
|
|
||||||
|
|
||||||
# see https://huggingface.co/spaces/hexgrad/Kokoro-TTS/blob/main/app.py#L83
|
|
||||||
# and
|
|
||||||
# https://huggingface.co/hexgrad/Kokoro-82M/tree/main/voices
|
# https://huggingface.co/hexgrad/Kokoro-82M/tree/main/voices
|
||||||
#
|
#
|
||||||
# af -> American female
|
# af -> American female
|
||||||
# am -> American male
|
# am -> American male
|
||||||
# bf -> British female
|
# bf -> British female
|
||||||
# bm -> British male
|
# bm -> British male
|
||||||
voices=(
|
|
||||||
af_alloy
|
|
||||||
af_aoede
|
|
||||||
af_bella
|
|
||||||
af_heart
|
|
||||||
af_jessica
|
|
||||||
af_kore
|
|
||||||
af_nicole
|
|
||||||
af_nova
|
|
||||||
af_river
|
|
||||||
af_sarah
|
|
||||||
af_sky
|
|
||||||
am_adam
|
|
||||||
am_echo
|
|
||||||
am_eric
|
|
||||||
am_fenrir
|
|
||||||
am_liam
|
|
||||||
am_michael
|
|
||||||
am_onyx
|
|
||||||
am_puck
|
|
||||||
am_santa
|
|
||||||
bf_alice
|
|
||||||
bf_emma
|
|
||||||
bf_isabella
|
|
||||||
bf_lily
|
|
||||||
bm_daniel
|
|
||||||
bm_fable
|
|
||||||
bm_george
|
|
||||||
bm_lewis
|
|
||||||
ef_dora
|
|
||||||
em_alex
|
|
||||||
ff_siwis
|
|
||||||
hf_alpha
|
|
||||||
hf_beta
|
|
||||||
hm_omega
|
|
||||||
hm_psi
|
|
||||||
if_sara
|
|
||||||
im_nicola
|
|
||||||
jf_alpha
|
|
||||||
jf_gongitsune
|
|
||||||
jf_nezumi
|
|
||||||
jf_tebukuro
|
|
||||||
jm_kumo
|
|
||||||
pf_dora
|
|
||||||
pm_alex
|
|
||||||
pm_santa
|
|
||||||
zf_xiaobei # 东北话
|
|
||||||
zf_xiaoni
|
|
||||||
zf_xiaoxiao
|
|
||||||
zf_xiaoyi
|
|
||||||
zm_yunjian
|
|
||||||
zm_yunxi
|
|
||||||
zm_yunxia
|
|
||||||
zm_yunyang
|
|
||||||
)
|
|
||||||
|
|
||||||
mkdir -p voices
|
if [ ! -f ./kokoro.onnx ]; then
|
||||||
|
python3 ./export_onnx.py
|
||||||
|
fi
|
||||||
|
|
||||||
for v in ${voices[@]}; do
|
|
||||||
if [ ! -f voices/$v.pt ]; then
|
|
||||||
curl -SL --output voices/$v.pt https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/voices/$v.pt
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
if [ ! -f ./.add-meta-data.done ]; then
|
if [ ! -f ./.add-meta-data.done ]; then
|
||||||
python3 ./add_meta_data.py
|
python3 ./add_meta_data.py
|
||||||
touch ./.add-meta-data.done
|
touch ./.add-meta-data.done
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./kokoro.int8.onnx ]; then
|
||||||
|
python3 ./dynamic_quantization.py
|
||||||
|
fi
|
||||||
|
|
||||||
if [ ! -f us_gold.json ]; then
|
if [ ! -f us_gold.json ]; then
|
||||||
curl -SL -O https://raw.githubusercontent.com/hexgrad/misaki/refs/heads/main/misaki/data/us_gold.json
|
curl -SL -O https://raw.githubusercontent.com/hexgrad/misaki/refs/heads/main/misaki/data/us_gold.json
|
||||||
fi
|
fi
|
||||||
|
|||||||
Reference in New Issue
Block a user