Support spoken language identification with whisper (#694)

This commit is contained in:
Fangjun Kuang
2024-03-24 22:57:00 +08:00
committed by GitHub
parent 3cdad9b5d1
commit 0d258dd150
36 changed files with 1173 additions and 200 deletions

View File

@@ -0,0 +1,98 @@
#!/usr/bin/env bash
set -e
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
echo "EXE is $EXE"
echo "PATH: $PATH"
which $EXE
names=(
tiny
base
small
medium
)
# all_language_codes=bo,ml,tt,fa,sl,bg,sn,sr,tl,km,ln,mr,hr,eu,ro,ba,bs,pl,as,nn,sk,ko,oc,ar,uz,pa,tg,mk,kk,hi,ha,uk,is,de,el,ja,yo,be,so,tk,id,sa,ru,yi,en,am,cs,ne,la,sv,su,pt,mi,ca,sd,hy,haw,fi,et,kn,da,lt,it,nl,he,mg,ur,tr,af,br,bn,ta,no,my,si,mt,th,gl,sw,mn,jw,ms,ps,fo,ka,hu,zh,ht,az,fr,lo,sq,gu,cy,lv,es,lb,te,vi
log "Download test waves"
waves=(
ar-arabic.wav
bg-bulgarian.wav
cs-czech.wav
da-danish.wav
de-german.wav
el-greek.wav
en-english.wav
es-spanish.wav
fa-persian.wav
fi-finnish.wav
fr-french.wav
hi-hindi.wav
hr-croatian.wav
id-indonesian.wav
it-italian.wav
ja-japanese.wav
ko-korean.wav
nl-dutch.wav
no-norwegian.wav
po-polish.wav
pt-portuguese.wav
ro-romanian.wav
ru-russian.wav
sk-slovak.wav
sv-swedish.wav
ta-tamil.wav
tl-tagalog.wav
tr-turkish.wav
uk-ukrainian.wav
zh-chinese.wav
)
for wav in ${waves[@]}; do
echo "Downloading $wav"
curl -SL -O https://hf-mirror.com/spaces/k2-fsa/spoken-language-identification/resolve/main/test_wavs/$wav
ls -lh *.wav
done
for name in ${names[@]}; do
log "------------------------------------------------------------"
log "Run $name"
log "------------------------------------------------------------"
repo_url=https://huggingface.co/csukuangfj/sherpa-onnx-whisper-$name
log "Start testing ${repo_url}"
repo=$(basename $repo_url)
log "Download pretrained model and test-data from $repo_url"
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
pushd $repo
git lfs pull --include "*.onnx"
# git lfs pull --include "*.ort"
ls -lh *.onnx
popd
for wav in ${waves[@]}; do
log "test fp32 onnx"
time $EXE \
--whisper-encoder=$repo/${name}-encoder.onnx \
--whisper-decoder=$repo/${name}-decoder.onnx \
$wav
log "test int8 onnx"
time $EXE \
--whisper-encoder=$repo/${name}-encoder.int8.onnx \
--whisper-decoder=$repo/${name}-decoder.int8.onnx \
$wav
done
rm -rf $repo
done

View File

@@ -82,7 +82,6 @@ jobs:
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v3
shell: bash
with:
max_attempts: 20
timeout_seconds: 200

View File

@@ -21,27 +21,12 @@ jobs:
fail-fast: false
matrix:
os: [macos-latest]
python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"]
python-version: ["cp38", "cp39", "cp310", "cp311", "cp312"]
steps:
- uses: actions/checkout@v4
# see https://cibuildwheel.readthedocs.io/en/stable/changelog/
# for a list of versions
- name: Build wheels
if: matrix.python-version == 'cp37'
uses: pypa/cibuildwheel@v2.11.4
env:
CIBW_BUILD: "${{ matrix.python-version}}-* "
CIBW_ENVIRONMENT: SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES='arm64'"
CIBW_ARCHS: "arm64"
CIBW_BUILD_VERBOSITY: 3
# Don't repair macOS wheels
CIBW_REPAIR_WHEEL_COMMAND_MACOS: ""
- name: Build wheels
if: matrix.python-version != 'cp37'
uses: pypa/cibuildwheel@v2.15.0
env:
CIBW_BUILD: "${{ matrix.python-version}}-* "

View File

@@ -92,6 +92,14 @@ jobs:
file build/bin/sherpa-onnx
readelf -d build/bin/sherpa-onnx
- name: Test spoken language identification
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-language-identification
.github/scripts/test-spoken-language-identification.sh
- name: Test online CTC
shell: bash
run: |
@@ -116,6 +124,7 @@ jobs:
.github/scripts/test-online-paraformer.sh
- name: Test offline Whisper
shell: bash
run: |

View File

@@ -123,6 +123,15 @@ jobs:
name: release-${{ matrix.build_type }}-${{ matrix.shared_lib }}
path: build/bin/*
- name: Test spoken language identification
if: matrix.build_type != 'Debug'
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-language-identification
.github/scripts/test-spoken-language-identification.sh
- name: Test transducer kws
shell: bash
run: |
@@ -140,6 +149,7 @@ jobs:
.github/scripts/test-online-ctc.sh
- name: Test offline Whisper
if: matrix.build_type != 'Debug'
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH

View File

@@ -102,6 +102,15 @@ jobs:
otool -L build/bin/sherpa-onnx
otool -l build/bin/sherpa-onnx
- name: Test spoken language identification
if: matrix.build_type != 'Debug'
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-language-identification
.github/scripts/test-spoken-language-identification.sh
- name: Test transducer kws
shell: bash
run: |
@@ -135,6 +144,7 @@ jobs:
.github/scripts/test-online-paraformer.sh
- name: Test offline Whisper
if: matrix.build_type != 'Debug'
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH

View File

@@ -68,6 +68,14 @@ jobs:
ls -lh ./bin/Release/sherpa-onnx.exe
- name: Test spoken language identification
shell: bash
run: |
export PATH=$PWD/build/bin/Release:$PATH
export EXE=sherpa-onnx-offline-language-identification.exe
.github/scripts/test-spoken-language-identification.sh
- name: Test online CTC
shell: bash
run: |

View File

@@ -68,6 +68,14 @@ jobs:
ls -lh ./bin/Release/sherpa-onnx.exe
- name: Test spoken language identification
shell: bash
run: |
export PATH=$PWD/build/bin/Release:$PATH
export EXE=sherpa-onnx-offline-language-identification.exe
.github/scripts/test-spoken-language-identification.sh
- name: Test online CTC
shell: bash
run: |

View File

@@ -69,6 +69,14 @@ jobs:
ls -lh ./bin/Release/sherpa-onnx.exe
# - name: Test spoken language identification
# shell: bash
# run: |
# export PATH=$PWD/build/bin/Release:$PATH
# export EXE=sherpa-onnx-offline-language-identification.exe
#
# .github/scripts/test-spoken-language-identification.sh
- name: Test online CTC
shell: bash
run: |