Remove the 30-second constraint from whisper. (#471)
This commit is contained in:
36
.github/scripts/test-offline-whisper.sh
vendored
36
.github/scripts/test-offline-whisper.sh
vendored
@@ -16,8 +16,12 @@ which $EXE
|
||||
names=(
|
||||
tiny.en
|
||||
base.en
|
||||
# small.en
|
||||
# medium.en
|
||||
small.en
|
||||
medium.en
|
||||
tiny
|
||||
base
|
||||
small
|
||||
medium
|
||||
)
|
||||
|
||||
for name in ${names[@]}; do
|
||||
@@ -33,8 +37,8 @@ for name in ${names[@]}; do
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||
pushd $repo
|
||||
git lfs pull --include "*.onnx"
|
||||
git lfs pull --include "*.ort"
|
||||
ls -lh *.{onnx,ort}
|
||||
# git lfs pull --include "*.ort"
|
||||
ls -lh *.onnx
|
||||
popd
|
||||
|
||||
log "test fp32 onnx"
|
||||
@@ -43,6 +47,7 @@ for name in ${names[@]}; do
|
||||
--tokens=$repo/${name}-tokens.txt \
|
||||
--whisper-encoder=$repo/${name}-encoder.onnx \
|
||||
--whisper-decoder=$repo/${name}-decoder.onnx \
|
||||
--whisper-tail-paddings=500 \
|
||||
--num-threads=2 \
|
||||
$repo/test_wavs/0.wav \
|
||||
$repo/test_wavs/1.wav \
|
||||
@@ -54,28 +59,7 @@ for name in ${names[@]}; do
|
||||
--tokens=$repo/${name}-tokens.txt \
|
||||
--whisper-encoder=$repo/${name}-encoder.int8.onnx \
|
||||
--whisper-decoder=$repo/${name}-decoder.int8.onnx \
|
||||
--num-threads=2 \
|
||||
$repo/test_wavs/0.wav \
|
||||
$repo/test_wavs/1.wav \
|
||||
$repo/test_wavs/8k.wav
|
||||
|
||||
log "test fp32 ort"
|
||||
|
||||
time $EXE \
|
||||
--tokens=$repo/${name}-tokens.txt \
|
||||
--whisper-encoder=$repo/${name}-encoder.ort \
|
||||
--whisper-decoder=$repo/${name}-decoder.ort \
|
||||
--num-threads=2 \
|
||||
$repo/test_wavs/0.wav \
|
||||
$repo/test_wavs/1.wav \
|
||||
$repo/test_wavs/8k.wav
|
||||
|
||||
log "test int8 ort"
|
||||
|
||||
time $EXE \
|
||||
--tokens=$repo/${name}-tokens.txt \
|
||||
--whisper-encoder=$repo/${name}-encoder.int8.ort \
|
||||
--whisper-decoder=$repo/${name}-decoder.int8.ort \
|
||||
--whisper-tail-paddings=500 \
|
||||
--num-threads=2 \
|
||||
$repo/test_wavs/0.wav \
|
||||
$repo/test_wavs/1.wav \
|
||||
|
||||
60
.github/workflows/export-whisper-to-onnx.yaml
vendored
60
.github/workflows/export-whisper-to-onnx.yaml
vendored
@@ -15,7 +15,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [macos-latest]
|
||||
os: [ubuntu-latest]
|
||||
model: ["distil-medium.en", "tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "large", "large-v1", "large-v2"]
|
||||
python-version: ["3.8"]
|
||||
|
||||
@@ -44,7 +44,7 @@ jobs:
|
||||
ls -lh
|
||||
fi
|
||||
python3 ./export-onnx.py --model ${{ matrix.model }}
|
||||
python3 -m onnxruntime.tools.convert_onnx_models_to_ort --optimization_style=Fixed ./
|
||||
# python3 -m onnxruntime.tools.convert_onnx_models_to_ort --optimization_style=Fixed ./
|
||||
|
||||
ls -lh
|
||||
|
||||
@@ -52,41 +52,61 @@ jobs:
|
||||
ls -lh ~/.cache/whisper
|
||||
fi
|
||||
|
||||
src=sherpa-onnx-whisper-${{ matrix.model }}
|
||||
|
||||
mkdir $src
|
||||
cp *.onnx $src/
|
||||
cp *tokens.txt $src
|
||||
|
||||
cd $src
|
||||
mkdir -p test_wavs
|
||||
cd test_wavs
|
||||
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/0.wav
|
||||
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/1.wav
|
||||
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/8k.wav
|
||||
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/trans.txt
|
||||
cd ../..
|
||||
mv $src ../..
|
||||
|
||||
cd ../..
|
||||
echo "--------------------"
|
||||
ls -lh
|
||||
ls -lh $src
|
||||
echo "--------------------"
|
||||
|
||||
tar cjvf ./$src.tar.bz2 $src
|
||||
|
||||
- name: Release
|
||||
uses: svenstaro/upload-release-action@v2
|
||||
with:
|
||||
file_glob: true
|
||||
file: ./*.tar.bz2
|
||||
overwrite: true
|
||||
repo_name: k2-fsa/sherpa-onnx
|
||||
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
||||
tag: asr-models
|
||||
|
||||
- name: Publish ${{ matrix.model }} to huggingface
|
||||
shell: bash
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
run: |
|
||||
model=${{ matrix.model }}
|
||||
|
||||
cd scripts/whisper
|
||||
src=sherpa-onnx-whisper-${{ matrix.model }}
|
||||
|
||||
git config --global user.email "csukuangfj@gmail.com"
|
||||
git config --global user.name "Fangjun Kuang"
|
||||
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-${{ matrix.model }} huggingface
|
||||
rm -rf huggingface/*
|
||||
|
||||
cp *.onnx ./huggingface
|
||||
cp *.ort ./huggingface
|
||||
cp *tokens.txt ./huggingface
|
||||
cp -av $src/* ./huggingface/
|
||||
|
||||
cd huggingface
|
||||
|
||||
if [[ $model == distil-medium.en ]]; then
|
||||
mkdir test_wavs
|
||||
cd test_wavs
|
||||
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/0.wav
|
||||
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/1.wav
|
||||
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/8k.wav
|
||||
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/trans.txt
|
||||
git add .
|
||||
cd ..
|
||||
fi
|
||||
|
||||
git status
|
||||
ls -lh
|
||||
git lfs track "*.onnx"
|
||||
git lfs track "*.ort"
|
||||
# git lfs track "*.ort"
|
||||
git add .
|
||||
git commit -m "upload ${{ matrix.model }}"
|
||||
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-whisper-${{ matrix.model }} main
|
||||
|
||||
20
.github/workflows/linux.yaml
vendored
20
.github/workflows/linux.yaml
vendored
@@ -107,6 +107,16 @@ jobs:
|
||||
name: release-static
|
||||
path: build/bin/*
|
||||
|
||||
- name: Test offline Whisper
|
||||
shell: bash
|
||||
run: |
|
||||
export PATH=$PWD/build/bin:$PATH
|
||||
export EXE=sherpa-onnx-offline
|
||||
|
||||
readelf -d build/bin/sherpa-onnx-offline
|
||||
|
||||
.github/scripts/test-offline-whisper.sh
|
||||
|
||||
- name: Test online CTC
|
||||
shell: bash
|
||||
run: |
|
||||
@@ -139,16 +149,6 @@ jobs:
|
||||
|
||||
.github/scripts/test-online-paraformer.sh
|
||||
|
||||
- name: Test offline Whisper
|
||||
shell: bash
|
||||
run: |
|
||||
export PATH=$PWD/build/bin:$PATH
|
||||
export EXE=sherpa-onnx-offline
|
||||
|
||||
readelf -d build/bin/sherpa-onnx-offline
|
||||
|
||||
.github/scripts/test-offline-whisper.sh
|
||||
|
||||
- name: Test offline transducer
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
14
.github/workflows/windows-x86.yaml
vendored
14
.github/workflows/windows-x86.yaml
vendored
@@ -93,13 +93,13 @@ jobs:
|
||||
|
||||
.github/scripts/test-online-paraformer.sh
|
||||
|
||||
- name: Test offline Whisper for windows x86
|
||||
shell: bash
|
||||
run: |
|
||||
export PATH=$PWD/build/bin/Release:$PATH
|
||||
export EXE=sherpa-onnx-offline.exe
|
||||
|
||||
.github/scripts/test-offline-whisper.sh
|
||||
# - name: Test offline Whisper for windows x86
|
||||
# shell: bash
|
||||
# run: |
|
||||
# export PATH=$PWD/build/bin/Release:$PATH
|
||||
# export EXE=sherpa-onnx-offline.exe
|
||||
#
|
||||
# .github/scripts/test-offline-whisper.sh
|
||||
|
||||
- name: Test offline CTC for windows x86
|
||||
shell: bash
|
||||
|
||||
Reference in New Issue
Block a user