Support whisper large/large-v1/large-v2/large-v3 and distil-large-v2 (#1114)
This commit is contained in:
63
.github/workflows/export-whisper-to-onnx.yaml
vendored
63
.github/workflows/export-whisper-to-onnx.yaml
vendored
@@ -15,9 +15,9 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
# model: ["distil-medium.en", "distil-small.en", "tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "large", "large-v1", "large-v2", "distil-large-v2"]
|
||||
model: ["distil-medium.en", "distil-small.en", "tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "medium-aishell"]
|
||||
os: [macos-latest]
|
||||
model: ["distil-medium.en", "distil-small.en", "tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "medium-aishell", "large", "large-v1", "large-v2", "distil-large-v2"]
|
||||
# model: ["large", "large-v1", "large-v2", "large-v3", "distil-large-v2"]
|
||||
python-version: ["3.8"]
|
||||
|
||||
steps:
|
||||
@@ -32,7 +32,7 @@ jobs:
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install torch==1.13.0 torchaudio==0.13.0 -f https://download.pytorch.org/whl/cpu/torch_stable.html
|
||||
python3 -m pip install openai-whisper==20230314 onnxruntime onnx
|
||||
python3 -m pip install openai-whisper==20231117 onnxruntime onnx soundfile librosa
|
||||
|
||||
- name: export ${{ matrix.model }}
|
||||
shell: bash
|
||||
@@ -62,7 +62,6 @@ jobs:
|
||||
rm -fv medium-aishell-decoder.onnx
|
||||
fi
|
||||
|
||||
|
||||
ls -lh
|
||||
|
||||
ls -lh ~/.cache/whisper || true
|
||||
@@ -74,7 +73,8 @@ jobs:
|
||||
src=sherpa-onnx-whisper-${{ matrix.model }}
|
||||
|
||||
cd ..
|
||||
mv whisper $src
|
||||
mkdir $src
|
||||
mv -v whisper/$model* $src/
|
||||
|
||||
echo "------------------------------"
|
||||
|
||||
@@ -97,19 +97,16 @@ jobs:
|
||||
ls -lh $src
|
||||
echo "--------------------"
|
||||
|
||||
if [[ $model == large || $model == large-v1 || $model == large-v2 || $model == distil-large-v2 ]]; then
|
||||
#tar cvjf - $src | split --bytes=1024MB - $src.tar.bz2.
|
||||
tar cvjf $src.tar.bz2 $src
|
||||
split -b 1G $src.tar.bz2 $src.tar.bz2.
|
||||
rm $src.tar.bz2
|
||||
# cat $src.tar.gz.* | tar xjf -
|
||||
if [[ $model == large || $model == large-v1 || $model == large-v2 || $model == distil-large-v2 ]]; then
|
||||
echo "Don't release model to github for large models. $model"
|
||||
else
|
||||
tar cvjf $src.tar.bz2 $src
|
||||
fi
|
||||
|
||||
ls -lh
|
||||
|
||||
|
||||
- name: Release
|
||||
if: matrix.model != 'large' && matrix.model != 'large-v1' && matrix.model != 'large-v2' && matrix.model != 'large-v3' && matrix.model != 'distil-large-v2'
|
||||
uses: svenstaro/upload-release-action@v2
|
||||
with:
|
||||
file_glob: true
|
||||
@@ -119,19 +116,6 @@ jobs:
|
||||
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
||||
tag: asr-models
|
||||
|
||||
- name: Test ${{ matrix.model }}
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install kaldi-native-fbank
|
||||
git checkout .
|
||||
model=${{ matrix.model }}
|
||||
src=sherpa-onnx-whisper-$model
|
||||
python3 scripts/whisper/test.py \
|
||||
--encoder $src/$model-encoder.int8.onnx \
|
||||
--decoder $src/$model-decoder.int8.onnx \
|
||||
--tokens $src/$model-tokens.txt \
|
||||
$src/test_wavs/0.wav
|
||||
|
||||
- name: Publish ${{ matrix.model }} to huggingface
|
||||
shell: bash
|
||||
env:
|
||||
@@ -144,27 +128,36 @@ jobs:
|
||||
|
||||
export GIT_CLONE_PROTECTION_ACTIVE=false
|
||||
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-${{ matrix.model }} huggingface
|
||||
export GIT_LFS_SKIP_SMUDGE=1
|
||||
|
||||
git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-${{ matrix.model }} huggingface
|
||||
|
||||
if [[ $model != medium-aishell ]]; then
|
||||
rm -rf huggingface/*
|
||||
fi
|
||||
|
||||
if [[ $model == large || $model == large-v1 || $model == large-v2 || $model == distil-large-v2 ]]; then
|
||||
mv $src.tar* ./huggingface
|
||||
else
|
||||
cp -v $src/*.onnx ./huggingface
|
||||
cp -v $src/*tokens* ./huggingface
|
||||
cp -av $src/test_wavs ./huggingface
|
||||
fi
|
||||
cp -av $src/* ./huggingface/
|
||||
|
||||
cd huggingface
|
||||
|
||||
git status
|
||||
ls -lh
|
||||
git lfs track "*gz*"
|
||||
git lfs track "*onnx*"
|
||||
git lfs track "*weights*"
|
||||
|
||||
git add .
|
||||
git commit -m "upload ${{ matrix.model }}"
|
||||
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-whisper-${{ matrix.model }} main
|
||||
|
||||
- name: Test ${{ matrix.model }}
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install kaldi-native-fbank
|
||||
git checkout .
|
||||
model=${{ matrix.model }}
|
||||
src=sherpa-onnx-whisper-$model
|
||||
time python3 scripts/whisper/test.py \
|
||||
--encoder $src/$model-encoder.onnx \
|
||||
--decoder $src/$model-decoder.onnx \
|
||||
--tokens $src/$model-tokens.txt \
|
||||
$src/test_wavs/0.wav
|
||||
|
||||
Reference in New Issue
Block a user