Add C++ and Python API for Dolphin CTC models (#2085)

This commit is contained in:
Fangjun Kuang
2025-04-02 19:09:00 +08:00
committed by GitHub
parent 1316719e23
commit 0de7e1b9f0
27 changed files with 671 additions and 26 deletions

View File

@@ -15,6 +15,39 @@ echo "PATH: $PATH"
which $EXE
for type in base small; do
log "------------------------------------------------------------"
log "Run Dolphin CTC models ($type int8)"
log "------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2
tar xvf sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2
rm sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2
$EXE \
--dolphin-model=./sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02/model.int8.onnx \
--tokens=./sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02/tokens.txt \
--debug=1 \
./sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav
rm -rf sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02
log "------------------------------------------------------------"
log "Run Dolphin CTC models ($type)"
log "------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2
tar xvf sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2
rm sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2
$EXE \
--dolphin-model=./sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02/model.onnx \
--tokens=./sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02/tokens.txt \
--debug=1 \
./sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02/test_wavs/0.wav
rm -rf sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02
done
log "------------------------------------------------------------"
log "Run NeMo GigaAM Russian models"
log "------------------------------------------------------------"

View File

@@ -8,6 +8,15 @@ log() {
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
log "test offline dolphin ctc"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
python3 ./python-api-examples/offline-dolphin-ctc-decode-files.py
rm -rf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
log "test offline speech enhancement (GTCRN)"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx

View File

@@ -0,0 +1,48 @@
name: export-dolphin-ctc-to-onnx
on:
workflow_dispatch:
concurrency:
group: export-dolphin-ctc-to-onnx-${{ github.ref }}
cancel-in-progress: true
jobs:
export-dolphin-ctc-to-onnx:
if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
name: ${{ matrix.model_type }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [macos-latest]
model_type: [small, base]
steps:
- uses: actions/checkout@v4
- name: Download ${{ matrix.model_type }}
shell: bash
run: |
git lfs install
type=${{ matrix.model_type }}
git clone https://huggingface.co/csukuangfj/sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02
git clone https://huggingface.co/csukuangfj/sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02
rm -rf sherpa-onnx-dolphin-*/.git*
ls -lha sherpa-onnx-dolphin-*/
tar cjfv sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2 sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02
tar cjfv sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2 sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02
- name: Release
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
file: ./*.tar.bz2
overwrite: true
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: asr-models

View File

@@ -205,6 +205,16 @@ jobs:
overwrite: true
file: sherpa-onnx-*.tar.bz2
- name: Test offline CTC
shell: bash
run: |
du -h -d1 .
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline
.github/scripts/test-offline-ctc.sh
du -h -d1 .
- name: Test offline speech denoiser
shell: bash
run: |
@@ -249,16 +259,6 @@ jobs:
.github/scripts/test-offline-moonshine.sh
du -h -d1 .
- name: Test offline CTC
shell: bash
run: |
du -h -d1 .
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline
.github/scripts/test-offline-ctc.sh
du -h -d1 .
- name: Test C++ API
shell: bash
run: |

View File

@@ -162,6 +162,14 @@ jobs:
overwrite: true
file: sherpa-onnx-*osx-universal2*.tar.bz2
- name: Test offline CTC
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline
.github/scripts/test-offline-ctc.sh
- name: Test offline speech denoiser
shell: bash
run: |
@@ -226,14 +234,6 @@ jobs:
.github/scripts/test-online-punctuation.sh
- name: Test offline CTC
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline
.github/scripts/test-offline-ctc.sh
- name: Test online CTC
shell: bash
run: |