Add int8 quantized whisper large models (#1126)
This commit is contained in:
52
.github/workflows/export-whisper-to-onnx.yaml
vendored
52
.github/workflows/export-whisper-to-onnx.yaml
vendored
@@ -16,7 +16,7 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
os: [macos-latest]
|
os: [macos-latest]
|
||||||
model: ["distil-medium.en", "distil-small.en", "tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "medium-aishell", "large", "large-v1", "large-v2", "distil-large-v2"]
|
model: ["distil-medium.en", "distil-small.en", "tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "medium-aishell", "large", "large-v1", "large-v2", "large-v3", "distil-large-v2"]
|
||||||
# model: ["large", "large-v1", "large-v2", "large-v3", "distil-large-v2"]
|
# model: ["large", "large-v1", "large-v2", "large-v3", "distil-large-v2"]
|
||||||
python-version: ["3.8"]
|
python-version: ["3.8"]
|
||||||
|
|
||||||
@@ -56,11 +56,7 @@ jobs:
|
|||||||
python3 ./export-onnx.py --model ${{ matrix.model }}
|
python3 ./export-onnx.py --model ${{ matrix.model }}
|
||||||
# python3 -m onnxruntime.tools.convert_onnx_models_to_ort --optimization_style=Fixed ./
|
# python3 -m onnxruntime.tools.convert_onnx_models_to_ort --optimization_style=Fixed ./
|
||||||
#
|
#
|
||||||
if [[ $model == medium-aishell ]]; then
|
|
||||||
ls -lh *.onnx
|
|
||||||
rm -fv medium-aishell-encoder.onnx
|
|
||||||
rm -fv medium-aishell-decoder.onnx
|
|
||||||
fi
|
|
||||||
|
|
||||||
ls -lh
|
ls -lh
|
||||||
|
|
||||||
@@ -97,16 +93,34 @@ jobs:
|
|||||||
ls -lh $src
|
ls -lh $src
|
||||||
echo "--------------------"
|
echo "--------------------"
|
||||||
|
|
||||||
if [[ $model == large || $model == large-v1 || $model == large-v2 || $model == distil-large-v2 ]]; then
|
if [[ $model == medium-aishell ]]; then
|
||||||
echo "Don't release model to github for large models. $model"
|
ls -lh *.onnx # the float32 onnx model for medium-aishell is too large to be uploaded to GitHub
|
||||||
|
mkdir -p bak
|
||||||
|
mv -v $src/$model-encoder.onnx ./bak
|
||||||
|
mv -v $src/$model-decoder.onnx ./bak
|
||||||
|
ls -lh $src
|
||||||
|
|
||||||
|
tar cvjf $src.tar.bz2 $src
|
||||||
|
mv -v ./bak/* $src/
|
||||||
|
rm -rf bak
|
||||||
|
elif [[ -f $src/$model-encoder.weights ]]; then
|
||||||
|
# we only publish int8 models to GitHub for large Whisper models
|
||||||
|
mkdir -p bak
|
||||||
|
mv -v $src/*weights ./bak
|
||||||
|
mv -v $src/$model-encoder.onnx ./bak
|
||||||
|
mv -v $src/$model-decoder.onnx ./bak
|
||||||
|
ls -lh $src
|
||||||
|
|
||||||
|
tar cvjf $src.tar.bz2 $src
|
||||||
|
mv -v ./bak/* $src/
|
||||||
|
rm -rf bak
|
||||||
else
|
else
|
||||||
tar cvjf $src.tar.bz2 $src
|
tar cvjf $src.tar.bz2 $src
|
||||||
fi
|
fi
|
||||||
|
|
||||||
ls -lh
|
ls -lh *.tar.bz2
|
||||||
|
|
||||||
- name: Release
|
- name: Release
|
||||||
if: matrix.model != 'large' && matrix.model != 'large-v1' && matrix.model != 'large-v2' && matrix.model != 'large-v3' && matrix.model != 'distil-large-v2'
|
|
||||||
uses: svenstaro/upload-release-action@v2
|
uses: svenstaro/upload-release-action@v2
|
||||||
with:
|
with:
|
||||||
file_glob: true
|
file_glob: true
|
||||||
@@ -132,9 +146,7 @@ jobs:
|
|||||||
|
|
||||||
git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-${{ matrix.model }} huggingface
|
git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-${{ matrix.model }} huggingface
|
||||||
|
|
||||||
if [[ $model != medium-aishell ]]; then
|
rm -rf huggingface/*
|
||||||
rm -rf huggingface/*
|
|
||||||
fi
|
|
||||||
|
|
||||||
cp -av $src/* ./huggingface/
|
cp -av $src/* ./huggingface/
|
||||||
|
|
||||||
@@ -149,11 +161,10 @@ jobs:
|
|||||||
git commit -m "upload ${{ matrix.model }}"
|
git commit -m "upload ${{ matrix.model }}"
|
||||||
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-whisper-${{ matrix.model }} main
|
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-whisper-${{ matrix.model }} main
|
||||||
|
|
||||||
- name: Test ${{ matrix.model }}
|
- name: Test float32 ${{ matrix.model }}
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
python3 -m pip install kaldi-native-fbank
|
python3 -m pip install kaldi-native-fbank
|
||||||
git checkout .
|
|
||||||
model=${{ matrix.model }}
|
model=${{ matrix.model }}
|
||||||
src=sherpa-onnx-whisper-$model
|
src=sherpa-onnx-whisper-$model
|
||||||
time python3 scripts/whisper/test.py \
|
time python3 scripts/whisper/test.py \
|
||||||
@@ -161,3 +172,14 @@ jobs:
|
|||||||
--decoder $src/$model-decoder.onnx \
|
--decoder $src/$model-decoder.onnx \
|
||||||
--tokens $src/$model-tokens.txt \
|
--tokens $src/$model-tokens.txt \
|
||||||
$src/test_wavs/0.wav
|
$src/test_wavs/0.wav
|
||||||
|
|
||||||
|
- name: Test int8 ${{ matrix.model }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
model=${{ matrix.model }}
|
||||||
|
src=sherpa-onnx-whisper-$model
|
||||||
|
time python3 scripts/whisper/test.py \
|
||||||
|
--encoder $src/$model-encoder.int8.onnx \
|
||||||
|
--decoder $src/$model-decoder.int8.onnx \
|
||||||
|
--tokens $src/$model-tokens.txt \
|
||||||
|
$src/test_wavs/0.wav
|
||||||
|
|||||||
@@ -582,9 +582,6 @@ def main():
|
|||||||
location=decoder_external_filename + ".weights",
|
location=decoder_external_filename + ".weights",
|
||||||
)
|
)
|
||||||
|
|
||||||
if "large" in args.model:
|
|
||||||
# it causes errors for large models, so skip it.
|
|
||||||
return
|
|
||||||
# Generate int8 quantization models
|
# Generate int8 quantization models
|
||||||
# See https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html#data-type-selection
|
# See https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html#data-type-selection
|
||||||
|
|
||||||
|
|||||||
@@ -90,6 +90,7 @@ class OnnxModel:
|
|||||||
self.n_text_layer = int(meta["n_text_layer"])
|
self.n_text_layer = int(meta["n_text_layer"])
|
||||||
self.n_text_ctx = int(meta["n_text_ctx"])
|
self.n_text_ctx = int(meta["n_text_ctx"])
|
||||||
self.n_text_state = int(meta["n_text_state"])
|
self.n_text_state = int(meta["n_text_state"])
|
||||||
|
self.n_mels = int(meta["n_mels"])
|
||||||
self.sot = int(meta["sot"])
|
self.sot = int(meta["sot"])
|
||||||
self.eot = int(meta["eot"])
|
self.eot = int(meta["eot"])
|
||||||
self.translate = int(meta["translate"])
|
self.translate = int(meta["translate"])
|
||||||
@@ -294,8 +295,9 @@ def main():
|
|||||||
args = get_args()
|
args = get_args()
|
||||||
|
|
||||||
model = OnnxModel(args.encoder, args.decoder)
|
model = OnnxModel(args.encoder, args.decoder)
|
||||||
dim = 80 if "large-v3" not in args.encoder else 128
|
n_mels = model.n_mels
|
||||||
mel = compute_features(args.sound_file, dim=dim)
|
|
||||||
|
mel = compute_features(args.sound_file, dim=n_mels)
|
||||||
|
|
||||||
n_layer_cross_k, n_layer_cross_v = model.run_encoder(mel)
|
n_layer_cross_k, n_layer_cross_v = model.run_encoder(mel)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user