Add C++ support for non-streaming NeMo fast conformer hybrid transducer ctc (the ctc branch) (#848)

This commit is contained in:
Fangjun Kuang
2024-05-09 15:32:22 +08:00
committed by GitHub
parent 5ed3ec1c04
commit 5d8c35e44e
8 changed files with 156 additions and 34 deletions

View File

@@ -13,14 +13,111 @@ echo "PATH: $PATH"
which $EXE
log "-----------------------------------------------------------------"
log "Run Nemo fast conformer hybrid transducer ctc models (CTC branch)"
log "-----------------------------------------------------------------"
url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
name=$(basename $url)
curl -SL -O $url
tar xvf $name
rm $name
repo=$(basename -s .tar.bz2 $name)
ls -lh $repo
log "test $repo"
test_wavs=(
de-german.wav
es-spanish.wav
hr-croatian.wav
po-polish.wav
uk-ukrainian.wav
en-english.wav
fr-french.wav
it-italian.wav
ru-russian.wav
)
for w in ${test_wavs[@]}; do
time $EXE \
--tokens=$repo/tokens.txt \
--nemo-ctc-model=$repo/model.onnx \
--debug=1 \
$repo/test_wavs/$w
done
rm -rf $repo
url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-en-24500.tar.bz2
name=$(basename $url)
curl -SL -O $url
tar xvf $name
rm $name
repo=$(basename -s .tar.bz2 $name)
ls -lh $repo
log "Test $repo"
time $EXE \
--tokens=$repo/tokens.txt \
--nemo-ctc-model=$repo/model.onnx \
--debug=1 \
$repo/test_wavs/en-english.wav
rm -rf $repo
url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-es-1424.tar.bz2
name=$(basename $url)
curl -SL -O $url
tar xvf $name
rm $name
repo=$(basename -s .tar.bz2 $name)
ls -lh $repo
log "test $repo"
time $EXE \
--tokens=$repo/tokens.txt \
--nemo-ctc-model=$repo/model.onnx \
--debug=1 \
$repo/test_wavs/es-spanish.wav
rm -rf $repo
url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288.tar.bz2
name=$(basename $url)
curl -SL -O $url
tar xvf $name
rm $name
repo=$(basename -s .tar.bz2 $name)
ls -lh $repo
log "Test $repo"
test_wavs=(
en-english.wav
de-german.wav
fr-french.wav
es-spanish.wav
)
for w in ${test_wavs[@]}; do
time $EXE \
--tokens=$repo/tokens.txt \
--nemo-ctc-model=$repo/model.onnx \
--debug=1 \
$repo/test_wavs/$w
done
rm -rf $repo
log "------------------------------------------------------------"
log "Run Wenet models"
log "------------------------------------------------------------"
wenet_models=(
sherpa-onnx-zh-wenet-aishell
sherpa-onnx-zh-wenet-aishell2
# sherpa-onnx-zh-wenet-aishell2
# sherpa-onnx-zh-wenet-wenetspeech
sherpa-onnx-zh-wenet-multi-cn
# sherpa-onnx-zh-wenet-multi-cn
sherpa-onnx-en-wenet-librispeech
# sherpa-onnx-en-wenet-gigaspeech
)

View File

@@ -62,6 +62,11 @@ for wav in ${waves[@]}; do
ls -lh *.wav
done
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/spoken-language-identification-test-wavs.tar.bz2
tar xvf spoken-language-identification-test-wavs.tar.bz2
rm spoken-language-identification-test-wavs.tar.bz2
data=spoken-language-identification-test-wavs
for name in ${names[@]}; do
log "------------------------------------------------------------"
log "Run $name"
@@ -85,14 +90,14 @@ for name in ${names[@]}; do
time $EXE \
--whisper-encoder=$repo/${name}-encoder.onnx \
--whisper-decoder=$repo/${name}-decoder.onnx \
$wav
$data/$wav
log "test int8 onnx"
time $EXE \
--whisper-encoder=$repo/${name}-encoder.int8.onnx \
--whisper-decoder=$repo/${name}-decoder.int8.onnx \
$wav
$data/$wav
done
rm -rf $repo
done

View File

@@ -128,13 +128,13 @@ jobs:
name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
path: install/*
- name: Test offline punctuation
- name: Test spoken language identification (C++ API)
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-punctuation
export EXE=sherpa-onnx-offline-language-identification
.github/scripts/test-offline-punctuation.sh
.github/scripts/test-spoken-language-identification.sh
- name: Test C API
shell: bash
@@ -147,13 +147,13 @@ jobs:
.github/scripts/test-c-api.sh
- name: Test Audio tagging
- name: Test offline CTC
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-audio-tagging
export EXE=sherpa-onnx-offline
.github/scripts/test-audio-tagging.sh
.github/scripts/test-offline-ctc.sh
- name: Test online CTC
shell: bash
@@ -163,14 +163,21 @@ jobs:
.github/scripts/test-online-ctc.sh
- name: Test spoken language identification (C++ API)
- name: Test offline punctuation
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-language-identification
export EXE=sherpa-onnx-offline-punctuation
.github/scripts/test-spoken-language-identification.sh
.github/scripts/test-offline-punctuation.sh
- name: Test Audio tagging
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-audio-tagging
.github/scripts/test-audio-tagging.sh
- name: Test transducer kws
shell: bash
@@ -180,7 +187,6 @@ jobs:
.github/scripts/test-kws.sh
- name: Test offline Whisper
if: matrix.build_type != 'Debug'
shell: bash
@@ -192,14 +198,6 @@ jobs:
.github/scripts/test-offline-whisper.sh
- name: Test offline CTC
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline
.github/scripts/test-offline-ctc.sh
- name: Test offline TTS
if: matrix.with_tts == 'ON'
shell: bash

View File

@@ -107,6 +107,14 @@ jobs:
otool -L build/bin/sherpa-onnx
otool -l build/bin/sherpa-onnx
- name: Test online CTC
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx
.github/scripts/test-online-ctc.sh
- name: Test offline punctuation
shell: bash
run: |
@@ -150,14 +158,6 @@ jobs:
.github/scripts/test-kws.sh
- name: Test online CTC
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx
.github/scripts/test-online-ctc.sh
- name: Test offline TTS
if: matrix.with_tts == 'ON'
shell: bash