Begin to support CTC models (#119)
Please see https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/nemo/index.html for a list of pre-trained CTC models from NeMo.
This commit is contained in:
47
.github/scripts/test-offline-ctc.sh
vendored
Executable file
47
.github/scripts/test-offline-ctc.sh
vendored
Executable file
@@ -0,0 +1,47 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
log() {
|
||||
# This function is from espnet
|
||||
local fname=${BASH_SOURCE[1]##*/}
|
||||
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
||||
}
|
||||
|
||||
echo "EXE is $EXE"
|
||||
echo "PATH: $PATH"
|
||||
|
||||
which $EXE
|
||||
|
||||
log "------------------------------------------------------------"
|
||||
log "Run Citrinet (stt_en_citrinet_512, English)"
|
||||
log "------------------------------------------------------------"
|
||||
|
||||
repo_url=http://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-citrinet-512
|
||||
log "Start testing ${repo_url}"
|
||||
repo=$(basename $repo_url)
|
||||
log "Download pretrained model and test-data from $repo_url"
|
||||
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||
pushd $repo
|
||||
git lfs pull --include "*.onnx"
|
||||
ls -lh *.onnx
|
||||
popd
|
||||
|
||||
time $EXE \
|
||||
--tokens=$repo/tokens.txt \
|
||||
--nemo-ctc-model=$repo/model.onnx \
|
||||
--num-threads=2 \
|
||||
$repo/test_wavs/0.wav \
|
||||
$repo/test_wavs/1.wav \
|
||||
$repo/test_wavs/8k.wav
|
||||
|
||||
time $EXE \
|
||||
--tokens=$repo/tokens.txt \
|
||||
--nemo-ctc-model=$repo/model.int8.onnx \
|
||||
--num-threads=2 \
|
||||
$repo/test_wavs/0.wav \
|
||||
$repo/test_wavs/1.wav \
|
||||
$repo/test_wavs/8k.wav
|
||||
|
||||
rm -rf $repo
|
||||
38
.github/scripts/test-python.sh
vendored
38
.github/scripts/test-python.sh
vendored
@@ -95,6 +95,8 @@ python3 ./python-api-examples/offline-decode-files.py \
|
||||
|
||||
python3 sherpa-onnx/python/tests/test_offline_recognizer.py --verbose
|
||||
|
||||
rm -rf $repo
|
||||
|
||||
log "Test non-streaming paraformer models"
|
||||
|
||||
pushd $dir
|
||||
@@ -128,3 +130,39 @@ python3 ./python-api-examples/offline-decode-files.py \
|
||||
$repo/test_wavs/8k.wav
|
||||
|
||||
python3 sherpa-onnx/python/tests/test_offline_recognizer.py --verbose
|
||||
|
||||
rm -rf $repo
|
||||
|
||||
log "Test non-streaming NeMo CTC models"
|
||||
|
||||
pushd $dir
|
||||
repo_url=http://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-citrinet-512
|
||||
|
||||
log "Start testing ${repo_url}"
|
||||
repo=$dir/$(basename $repo_url)
|
||||
log "Download pretrained model and test-data from $repo_url"
|
||||
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||
cd $repo
|
||||
git lfs pull --include "*.onnx"
|
||||
popd
|
||||
|
||||
ls -lh $repo
|
||||
|
||||
python3 ./python-api-examples/offline-decode-files.py \
|
||||
--tokens=$repo/tokens.txt \
|
||||
--nemo-ctc=$repo/model.onnx \
|
||||
$repo/test_wavs/0.wav \
|
||||
$repo/test_wavs/1.wav \
|
||||
$repo/test_wavs/8k.wav
|
||||
|
||||
python3 ./python-api-examples/offline-decode-files.py \
|
||||
--tokens=$repo/tokens.txt \
|
||||
--nemo-ctc=$repo/model.int8.onnx \
|
||||
$repo/test_wavs/0.wav \
|
||||
$repo/test_wavs/1.wav \
|
||||
$repo/test_wavs/8k.wav
|
||||
|
||||
python3 sherpa-onnx/python/tests/test_offline_recognizer.py --verbose
|
||||
|
||||
rm -rf $repo
|
||||
|
||||
10
.github/workflows/linux.yaml
vendored
10
.github/workflows/linux.yaml
vendored
@@ -8,6 +8,7 @@ on:
|
||||
- '.github/workflows/linux.yaml'
|
||||
- '.github/scripts/test-online-transducer.sh'
|
||||
- '.github/scripts/test-offline-transducer.sh'
|
||||
- '.github/scripts/test-offline-ctc.sh'
|
||||
- 'CMakeLists.txt'
|
||||
- 'cmake/**'
|
||||
- 'sherpa-onnx/csrc/*'
|
||||
@@ -20,6 +21,7 @@ on:
|
||||
- '.github/workflows/linux.yaml'
|
||||
- '.github/scripts/test-online-transducer.sh'
|
||||
- '.github/scripts/test-offline-transducer.sh'
|
||||
- '.github/scripts/test-offline-ctc.sh'
|
||||
- 'CMakeLists.txt'
|
||||
- 'cmake/**'
|
||||
- 'sherpa-onnx/csrc/*'
|
||||
@@ -68,6 +70,14 @@ jobs:
|
||||
file build/bin/sherpa-onnx
|
||||
readelf -d build/bin/sherpa-onnx
|
||||
|
||||
- name: Test offline CTC
|
||||
shell: bash
|
||||
run: |
|
||||
export PATH=$PWD/build/bin:$PATH
|
||||
export EXE=sherpa-onnx-offline
|
||||
|
||||
.github/scripts/test-offline-ctc.sh
|
||||
|
||||
- name: Test offline transducer
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
10
.github/workflows/macos.yaml
vendored
10
.github/workflows/macos.yaml
vendored
@@ -8,6 +8,7 @@ on:
|
||||
- '.github/workflows/macos.yaml'
|
||||
- '.github/scripts/test-online-transducer.sh'
|
||||
- '.github/scripts/test-offline-transducer.sh'
|
||||
- '.github/scripts/test-offline-ctc.sh'
|
||||
- 'CMakeLists.txt'
|
||||
- 'cmake/**'
|
||||
- 'sherpa-onnx/csrc/*'
|
||||
@@ -18,6 +19,7 @@ on:
|
||||
- '.github/workflows/macos.yaml'
|
||||
- '.github/scripts/test-online-transducer.sh'
|
||||
- '.github/scripts/test-offline-transducer.sh'
|
||||
- '.github/scripts/test-offline-ctc.sh'
|
||||
- 'CMakeLists.txt'
|
||||
- 'cmake/**'
|
||||
- 'sherpa-onnx/csrc/*'
|
||||
@@ -67,6 +69,14 @@ jobs:
|
||||
otool -L build/bin/sherpa-onnx
|
||||
otool -l build/bin/sherpa-onnx
|
||||
|
||||
- name: Test offline CTC
|
||||
shell: bash
|
||||
run: |
|
||||
export PATH=$PWD/build/bin:$PATH
|
||||
export EXE=sherpa-onnx-offline
|
||||
|
||||
.github/scripts/test-offline-ctc.sh
|
||||
|
||||
- name: Test offline transducer
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
10
.github/workflows/windows-x64.yaml
vendored
10
.github/workflows/windows-x64.yaml
vendored
@@ -8,6 +8,7 @@ on:
|
||||
- '.github/workflows/windows-x64.yaml'
|
||||
- '.github/scripts/test-online-transducer.sh'
|
||||
- '.github/scripts/test-offline-transducer.sh'
|
||||
- '.github/scripts/test-offline-ctc.sh'
|
||||
- 'CMakeLists.txt'
|
||||
- 'cmake/**'
|
||||
- 'sherpa-onnx/csrc/*'
|
||||
@@ -18,6 +19,7 @@ on:
|
||||
- '.github/workflows/windows-x64.yaml'
|
||||
- '.github/scripts/test-online-transducer.sh'
|
||||
- '.github/scripts/test-offline-transducer.sh'
|
||||
- '.github/scripts/test-offline-ctc.sh'
|
||||
- 'CMakeLists.txt'
|
||||
- 'cmake/**'
|
||||
- 'sherpa-onnx/csrc/*'
|
||||
@@ -73,6 +75,14 @@ jobs:
|
||||
|
||||
ls -lh ./bin/Release/sherpa-onnx.exe
|
||||
|
||||
- name: Test offline CTC for windows x64
|
||||
shell: bash
|
||||
run: |
|
||||
export PATH=$PWD/build/bin/Release:$PATH
|
||||
export EXE=sherpa-onnx-offline.exe
|
||||
|
||||
.github/scripts/test-offline-ctc.sh
|
||||
|
||||
- name: Test offline transducer for Windows x64
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
11
.github/workflows/windows-x86.yaml
vendored
11
.github/workflows/windows-x86.yaml
vendored
@@ -8,6 +8,7 @@ on:
|
||||
- '.github/workflows/windows-x86.yaml'
|
||||
- '.github/scripts/test-online-transducer.sh'
|
||||
- '.github/scripts/test-offline-transducer.sh'
|
||||
- '.github/scripts/test-offline-ctc.sh'
|
||||
- 'CMakeLists.txt'
|
||||
- 'cmake/**'
|
||||
- 'sherpa-onnx/csrc/*'
|
||||
@@ -18,6 +19,7 @@ on:
|
||||
- '.github/workflows/windows-x86.yaml'
|
||||
- '.github/scripts/test-online-transducer.sh'
|
||||
- '.github/scripts/test-offline-transducer.sh'
|
||||
- '.github/scripts/test-offline-ctc.sh'
|
||||
- 'CMakeLists.txt'
|
||||
- 'cmake/**'
|
||||
- 'sherpa-onnx/csrc/*'
|
||||
@@ -31,6 +33,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
windows_x86:
|
||||
if: false # disable windows x86 CI for now
|
||||
runs-on: ${{ matrix.os }}
|
||||
name: ${{ matrix.vs-version }}
|
||||
strategy:
|
||||
@@ -73,6 +76,14 @@ jobs:
|
||||
|
||||
ls -lh ./bin/Release/sherpa-onnx.exe
|
||||
|
||||
- name: Test offline CTC for windows x86
|
||||
shell: bash
|
||||
run: |
|
||||
export PATH=$PWD/build/bin/Release:$PATH
|
||||
export EXE=sherpa-onnx-offline.exe
|
||||
|
||||
.github/scripts/test-offline-ctc.sh
|
||||
|
||||
- name: Test offline transducer for Windows x86
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
Reference in New Issue
Block a user