Compare commits
10 Commits
0e738c356c
...
37ad87e75c
| Author | SHA1 | Date | |
|---|---|---|---|
| 37ad87e75c | |||
|
|
0d44df9b67 | ||
|
|
fd9a687ec2 | ||
|
|
e2b2d5ea57 | ||
|
|
f0960342ad | ||
|
|
6122a678f5 | ||
|
|
f1405779cf | ||
|
|
831aff187d | ||
|
|
103e93d9f6 | ||
|
|
df4615ca1d |
5
.github/scripts/test-dot-net.sh
vendored
5
.github/scripts/test-dot-net.sh
vendored
@@ -6,6 +6,11 @@ cd ./version-test
|
||||
./run.sh
|
||||
ls -lh
|
||||
|
||||
cd ../non-streaming-canary-decode-files
|
||||
./run.sh
|
||||
ls -lh
|
||||
rm -rf sherpa-onnx-nemo-*
|
||||
|
||||
cd ../offline-decode-files
|
||||
|
||||
./run-zipformer-ctc.sh
|
||||
|
||||
10
.github/scripts/test-nodejs-addon-npm.sh
vendored
10
.github/scripts/test-nodejs-addon-npm.sh
vendored
@@ -10,6 +10,16 @@ arch=$(node -p "require('os').arch()")
|
||||
platform=$(node -p "require('os').platform()")
|
||||
node_version=$(node -p "process.versions.node.split('.')[0]")
|
||||
|
||||
echo "----------non-streaming ASR NeMo Canary----------"
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
|
||||
node ./test_asr_non_streaming_nemo_canary.js
|
||||
|
||||
rm -rf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8
|
||||
|
||||
echo "----------non-streaming ASR Zipformer CTC----------"
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
|
||||
|
||||
|
||||
8
.github/scripts/test-nodejs-npm.sh
vendored
8
.github/scripts/test-nodejs-npm.sh
vendored
@@ -9,6 +9,14 @@ git status
|
||||
ls -lh
|
||||
ls -lh node_modules
|
||||
|
||||
# asr with offline nemo canary
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
|
||||
node ./test-offline-nemo-canary.js
|
||||
rm -rf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8
|
||||
|
||||
# asr with offline zipformer ctc
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
|
||||
|
||||
|
||||
34
.github/scripts/test-offline-transducer.sh
vendored
34
.github/scripts/test-offline-transducer.sh
vendored
@@ -281,7 +281,39 @@ time $EXE \
|
||||
$repo/test_wavs/1.wav \
|
||||
$repo/test_wavs/8k.wav
|
||||
|
||||
rm -rf $repo
|
||||
lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
|
||||
log "Download pre-trained RNN-LM model from ${lm_repo_url}"
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
|
||||
lm_repo=$(basename $lm_repo_url)
|
||||
pushd $lm_repo
|
||||
git lfs pull --include "exp/no-state-epoch-99-avg-1.onnx"
|
||||
popd
|
||||
|
||||
bigram_repo_url=https://huggingface.co/vsd-vector/librispeech_bigram_sherpa-onnx-zipformer-large-en-2023-06-26
|
||||
log "Download bi-gram LM from ${bigram_repo_url}"
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $bigram_repo_url
|
||||
bigramlm_repo=$(basename $bigram_repo_url)
|
||||
pushd $bigramlm_repo
|
||||
git lfs pull --include "2gram.fst"
|
||||
popd
|
||||
|
||||
log "Start testing with LM and bi-gram LODR"
|
||||
# TODO: find test examples that change with the LODR
|
||||
time $EXE \
|
||||
--tokens=$repo/tokens.txt \
|
||||
--encoder=$repo/encoder-epoch-99-avg-1.onnx \
|
||||
--decoder=$repo/decoder-epoch-99-avg-1.onnx \
|
||||
--joiner=$repo/joiner-epoch-99-avg-1.onnx \
|
||||
--num-threads=2 \
|
||||
--decoding_method="modified_beam_search" \
|
||||
--lm=$lm_repo/exp/no-state-epoch-99-avg-1.onnx \
|
||||
--lodr-fst=$bigramlm_repo/2gram.fst \
|
||||
--lodr-scale=-0.5 \
|
||||
$repo/test_wavs/0.wav \
|
||||
$repo/test_wavs/1.wav \
|
||||
$repo/test_wavs/8k.wav
|
||||
|
||||
rm -rf $repo $lm_repo $bigramlm_repo
|
||||
|
||||
log "------------------------------------------------------------"
|
||||
log "Run Paraformer (Chinese)"
|
||||
|
||||
10
.github/scripts/test-online-ctc.sh
vendored
10
.github/scripts/test-online-ctc.sh
vendored
@@ -77,16 +77,6 @@ time $EXE \
|
||||
$repo/test_wavs/DEV_T0000000001.wav \
|
||||
$repo/test_wavs/DEV_T0000000002.wav
|
||||
|
||||
log "test int8"
|
||||
|
||||
time $EXE \
|
||||
--debug=1 \
|
||||
--zipformer2-ctc-model=$repo/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
|
||||
--tokens=$repo/tokens.txt \
|
||||
$repo/test_wavs/DEV_T0000000000.wav \
|
||||
$repo/test_wavs/DEV_T0000000001.wav \
|
||||
$repo/test_wavs/DEV_T0000000002.wav
|
||||
|
||||
rm -rf $repo
|
||||
|
||||
log "------------------------------------------------------------"
|
||||
|
||||
55
.github/scripts/test-online-transducer.sh
vendored
55
.github/scripts/test-online-transducer.sh
vendored
@@ -174,7 +174,60 @@ for wave in ${waves[@]}; do
|
||||
$wave
|
||||
done
|
||||
|
||||
rm -rf $repo
|
||||
lm_repo_url=https://huggingface.co/vsd-vector/icefall-librispeech-rnn-lm
|
||||
log "Download pre-trained RNN-LM model from ${lm_repo_url}"
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
|
||||
lm_repo=$(basename $lm_repo_url)
|
||||
pushd $lm_repo
|
||||
git lfs pull --include "with-state-epoch-99-avg-1.onnx"
|
||||
popd
|
||||
|
||||
bigram_repo_url=https://huggingface.co/vsd-vector/librispeech_bigram_sherpa-onnx-zipformer-large-en-2023-06-26
|
||||
log "Download bi-gram LM from ${bigram_repo_url}"
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $bigram_repo_url
|
||||
bigramlm_repo=$(basename $bigram_repo_url)
|
||||
pushd $bigramlm_repo
|
||||
git lfs pull --include "2gram.fst"
|
||||
popd
|
||||
|
||||
log "Start testing LODR"
|
||||
|
||||
waves=(
|
||||
$repo/test_wavs/0.wav
|
||||
$repo/test_wavs/1.wav
|
||||
$repo/test_wavs/8k.wav
|
||||
)
|
||||
|
||||
for wave in ${waves[@]}; do
|
||||
time $EXE \
|
||||
--tokens=$repo/tokens.txt \
|
||||
--encoder=$repo/encoder-epoch-99-avg-1.onnx \
|
||||
--decoder=$repo/decoder-epoch-99-avg-1.onnx \
|
||||
--joiner=$repo/joiner-epoch-99-avg-1.onnx \
|
||||
--num-threads=2 \
|
||||
--decoding_method="modified_beam_search" \
|
||||
--lm=$lm_repo/with-state-epoch-99-avg-1.onnx \
|
||||
--lodr-fst=$bigramlm_repo/2gram.fst \
|
||||
--lodr-scale=-0.5 \
|
||||
$wave
|
||||
done
|
||||
|
||||
for wave in ${waves[@]}; do
|
||||
time $EXE \
|
||||
--tokens=$repo/tokens.txt \
|
||||
--encoder=$repo/encoder-epoch-99-avg-1.onnx \
|
||||
--decoder=$repo/decoder-epoch-99-avg-1.onnx \
|
||||
--joiner=$repo/joiner-epoch-99-avg-1.onnx \
|
||||
--num-threads=2 \
|
||||
--decoding_method="modified_beam_search" \
|
||||
--lm=$lm_repo/with-state-epoch-99-avg-1.onnx \
|
||||
--lodr-fst=$bigramlm_repo/2gram.fst \
|
||||
--lodr-scale=-0.5 \
|
||||
--lm-shallow-fusion=true \
|
||||
$wave
|
||||
done
|
||||
|
||||
rm -rf $repo $bigramlm_repo $lm_repo
|
||||
|
||||
log "------------------------------------------------------------"
|
||||
log "Run streaming Zipformer transducer (Bilingual, Chinese + English)"
|
||||
|
||||
32
.github/scripts/test-python.sh
vendored
32
.github/scripts/test-python.sh
vendored
@@ -562,9 +562,39 @@ python3 ./python-api-examples/offline-decode-files.py \
|
||||
$repo/test_wavs/1.wav \
|
||||
$repo/test_wavs/8k.wav
|
||||
|
||||
lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
|
||||
log "Download pre-trained RNN-LM model from ${lm_repo_url}"
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
|
||||
lm_repo=$(basename $lm_repo_url)
|
||||
pushd $lm_repo
|
||||
git lfs pull --include "exp/no-state-epoch-99-avg-1.onnx"
|
||||
popd
|
||||
|
||||
bigram_repo_url=https://huggingface.co/vsd-vector/librispeech_bigram_sherpa-onnx-zipformer-large-en-2023-06-26
|
||||
log "Download bi-gram LM from ${bigram_repo_url}"
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $bigram_repo_url
|
||||
bigramlm_repo=$(basename $bigram_repo_url)
|
||||
pushd $bigramlm_repo
|
||||
git lfs pull --include "2gram.fst"
|
||||
popd
|
||||
|
||||
log "Perform offline decoding with RNN-LM and LODR"
|
||||
python3 ./python-api-examples/offline-decode-files.py \
|
||||
--tokens=$repo/tokens.txt \
|
||||
--encoder=$repo/encoder-epoch-99-avg-1.onnx \
|
||||
--decoder=$repo/decoder-epoch-99-avg-1.onnx \
|
||||
--joiner=$repo/joiner-epoch-99-avg-1.onnx \
|
||||
--decoding-method=modified_beam_search \
|
||||
--lm=$lm_repo/exp/no-state-epoch-99-avg-1.onnx \
|
||||
--lodr-fst=$bigramlm_repo/2gram.fst \
|
||||
--lodr-scale=-0.5 \
|
||||
$repo/test_wavs/0.wav \
|
||||
$repo/test_wavs/1.wav \
|
||||
$repo/test_wavs/8k.wav
|
||||
|
||||
python3 sherpa-onnx/python/tests/test_offline_recognizer.py --verbose
|
||||
|
||||
rm -rf $repo
|
||||
rm -rf $repo $lm_repo $bigramlm_repo
|
||||
|
||||
log "Test non-streaming paraformer models"
|
||||
|
||||
|
||||
30
.github/workflows/c-api.yaml
vendored
30
.github/workflows/c-api.yaml
vendored
@@ -127,6 +127,36 @@ jobs:
|
||||
rm -rf dict lexicon.txt test-hr.wav replace.fst
|
||||
rm -v $name
|
||||
|
||||
- name: Test NeMo Canary
|
||||
shell: bash
|
||||
run: |
|
||||
name=nemo-canary-c-api
|
||||
gcc -o $name ./c-api-examples/$name.c \
|
||||
-I ./build/install/include \
|
||||
-L ./build/install/lib/ \
|
||||
-l sherpa-onnx-c-api \
|
||||
-l onnxruntime
|
||||
|
||||
ls -lh $name
|
||||
|
||||
if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
|
||||
ldd ./$name
|
||||
echo "----"
|
||||
readelf -d ./$name
|
||||
fi
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
|
||||
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
|
||||
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
|
||||
|
||||
./$name
|
||||
|
||||
rm $name
|
||||
rm -rf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8
|
||||
|
||||
- name: Test Dolphin CTC
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
34
.github/workflows/cxx-api.yaml
vendored
34
.github/workflows/cxx-api.yaml
vendored
@@ -87,6 +87,40 @@ jobs:
|
||||
otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib
|
||||
fi
|
||||
|
||||
- name: Test NeMo Canary
|
||||
shell: bash
|
||||
run: |
|
||||
name=nemo-canary-cxx-api
|
||||
g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
|
||||
-I ./build/install/include \
|
||||
-L ./build/install/lib/ \
|
||||
-l sherpa-onnx-cxx-api \
|
||||
-l sherpa-onnx-c-api \
|
||||
-l onnxruntime
|
||||
|
||||
ls -lh $name
|
||||
|
||||
if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
|
||||
ldd ./$name
|
||||
echo "----"
|
||||
readelf -d ./$name
|
||||
fi
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
|
||||
ls -lh sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8
|
||||
echo "---"
|
||||
|
||||
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
|
||||
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
|
||||
|
||||
./$name
|
||||
|
||||
rm -rf sherpa-onnx-nemo-canary-*
|
||||
rm -v ./$name
|
||||
|
||||
- name: Test streaming zipformer with Homophone replacer
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
@@ -61,6 +61,11 @@ jobs:
|
||||
sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288
|
||||
sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k
|
||||
sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000
|
||||
sherpa-onnx-nemo-fast-conformer-ctc-en-24500-int8
|
||||
sherpa-onnx-nemo-fast-conformer-ctc-es-1424-int8
|
||||
sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288-int8
|
||||
sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
|
||||
sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8
|
||||
)
|
||||
|
||||
for m in ${models[@]}; do
|
||||
@@ -89,6 +94,11 @@ jobs:
|
||||
sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288
|
||||
sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k
|
||||
sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000
|
||||
sherpa-onnx-nemo-fast-conformer-ctc-en-24500-int8
|
||||
sherpa-onnx-nemo-fast-conformer-ctc-es-1424-int8
|
||||
sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288-int8
|
||||
sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
|
||||
sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8
|
||||
)
|
||||
for d in ${dirs[@]}; do
|
||||
tar cjvf ${d}.tar.bz2 ./$d
|
||||
|
||||
@@ -54,13 +54,18 @@ jobs:
|
||||
curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/trans.txt
|
||||
popd
|
||||
|
||||
cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms
|
||||
cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms
|
||||
cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms
|
||||
|
||||
tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms
|
||||
tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms
|
||||
tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms
|
||||
names=(
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms-int8
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms-int8
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms-int8
|
||||
)
|
||||
for d in ${names[@]}; do
|
||||
cp -av test_wavs $d/
|
||||
tar cjvf $d.tar.bz2 $d
|
||||
done
|
||||
|
||||
- name: Release
|
||||
uses: svenstaro/upload-release-action@v2
|
||||
@@ -71,3 +76,41 @@ jobs:
|
||||
repo_name: k2-fsa/sherpa-onnx
|
||||
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
||||
tag: asr-models
|
||||
|
||||
- name: Publish to huggingface
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
uses: nick-fields/retry@v3
|
||||
with:
|
||||
max_attempts: 20
|
||||
timeout_seconds: 200
|
||||
shell: bash
|
||||
command: |
|
||||
git config --global user.email "csukuangfj@gmail.com"
|
||||
git config --global user.name "Fangjun Kuang"
|
||||
|
||||
models=(
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms-int8
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms-int8
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms-int8
|
||||
)
|
||||
|
||||
for m in ${models[@]}; do
|
||||
rm -rf huggingface
|
||||
export GIT_LFS_SKIP_SMUDGE=1
|
||||
export GIT_CLONE_PROTECTION_ACTIVE=false
|
||||
git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
|
||||
cp -av $m/* huggingface
|
||||
cd huggingface
|
||||
git lfs track "*.onnx"
|
||||
git lfs track "*.wav"
|
||||
git status
|
||||
git add .
|
||||
git status
|
||||
git commit -m "first commit"
|
||||
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main
|
||||
cd ..
|
||||
done
|
||||
|
||||
@@ -61,6 +61,11 @@ jobs:
|
||||
sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288
|
||||
sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k
|
||||
sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000
|
||||
sherpa-onnx-nemo-fast-conformer-transducer-en-24500-int8
|
||||
sherpa-onnx-nemo-fast-conformer-transducer-es-1424-int8
|
||||
sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288-int8
|
||||
sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
|
||||
sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000-int8
|
||||
)
|
||||
|
||||
for m in ${models[@]}; do
|
||||
@@ -88,6 +93,11 @@ jobs:
|
||||
sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288
|
||||
sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k
|
||||
sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000
|
||||
sherpa-onnx-nemo-fast-conformer-transducer-en-24500-int8
|
||||
sherpa-onnx-nemo-fast-conformer-transducer-es-1424-int8
|
||||
sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288-int8
|
||||
sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
|
||||
sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000-int8
|
||||
)
|
||||
for d in ${dirs[@]}; do
|
||||
tar cjvf ${d}.tar.bz2 ./$d
|
||||
|
||||
@@ -54,13 +54,18 @@ jobs:
|
||||
curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/trans.txt
|
||||
popd
|
||||
|
||||
cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms
|
||||
cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms
|
||||
cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms
|
||||
|
||||
tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms
|
||||
tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms
|
||||
tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms
|
||||
models=(
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms-int8
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms-int8
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms-int8
|
||||
)
|
||||
for m in ${models[@]}; do
|
||||
cp -av test_wavs $m
|
||||
tar cjvf $m.tar.bz2 $m
|
||||
done
|
||||
|
||||
- name: Release
|
||||
uses: svenstaro/upload-release-action@v2
|
||||
@@ -71,3 +76,41 @@ jobs:
|
||||
repo_name: k2-fsa/sherpa-onnx
|
||||
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
||||
tag: asr-models
|
||||
|
||||
- name: Publish to huggingface
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
uses: nick-fields/retry@v3
|
||||
with:
|
||||
max_attempts: 20
|
||||
timeout_seconds: 200
|
||||
shell: bash
|
||||
command: |
|
||||
git config --global user.email "csukuangfj@gmail.com"
|
||||
git config --global user.name "Fangjun Kuang"
|
||||
|
||||
models=(
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms-int8
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms-int8
|
||||
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms-int8
|
||||
)
|
||||
|
||||
for m in ${models[@]}; do
|
||||
rm -rf huggingface
|
||||
export GIT_LFS_SKIP_SMUDGE=1
|
||||
export GIT_CLONE_PROTECTION_ACTIVE=false
|
||||
git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
|
||||
cp -av $m/* huggingface
|
||||
cd huggingface
|
||||
git lfs track "*.onnx"
|
||||
git lfs track "*.wav"
|
||||
git status
|
||||
git add .
|
||||
git status
|
||||
git commit -m "first commit"
|
||||
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main
|
||||
cd ..
|
||||
done
|
||||
|
||||
105
.github/workflows/export-nemo-parakeet-tdt.yaml
vendored
Normal file
105
.github/workflows/export-nemo-parakeet-tdt.yaml
vendored
Normal file
@@ -0,0 +1,105 @@
|
||||
name: export-nemo-parakeet-tdt
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- refactor-export-nemo
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: export-nemo-parakeet-tdt-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
export-nemo-parakeet-tdt-0_6b-v2:
|
||||
if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
|
||||
name: parakeet tdt
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [macos-latest]
|
||||
python-version: ["3.10"]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install python dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
pip install \
|
||||
nemo_toolkit['asr'] \
|
||||
"numpy<2" \
|
||||
ipython \
|
||||
kaldi-native-fbank \
|
||||
librosa \
|
||||
onnx==1.17.0 \
|
||||
onnxmltools==1.13.0 \
|
||||
onnxruntime==1.17.1 \
|
||||
soundfile
|
||||
|
||||
- name: Run
|
||||
shell: bash
|
||||
run: |
|
||||
cd scripts/nemo/parakeet-tdt_ctc-0.6b-ja
|
||||
./run-ctc.sh
|
||||
|
||||
- name: Collect files
|
||||
shell: bash
|
||||
run: |
|
||||
models=(
|
||||
sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8
|
||||
)
|
||||
for m in ${models[@]}; do
|
||||
mv -v scripts/nemo/parakeet-tdt_ctc-0.6b-ja/$m .
|
||||
tar cjfv $m.tar.bz2 $m
|
||||
done
|
||||
|
||||
|
||||
- name: Publish to huggingface
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
uses: nick-fields/retry@v3
|
||||
with:
|
||||
max_attempts: 20
|
||||
timeout_seconds: 200
|
||||
shell: bash
|
||||
command: |
|
||||
git config --global user.email "csukuangfj@gmail.com"
|
||||
git config --global user.name "Fangjun Kuang"
|
||||
|
||||
models=(
|
||||
sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8
|
||||
)
|
||||
|
||||
for m in ${models[@]}; do
|
||||
rm -rf huggingface
|
||||
export GIT_LFS_SKIP_SMUDGE=1
|
||||
export GIT_CLONE_PROTECTION_ACTIVE=false
|
||||
git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
|
||||
cp -av $m/* huggingface
|
||||
cd huggingface
|
||||
git lfs track "*.onnx"
|
||||
git lfs track "*.wav"
|
||||
git status
|
||||
git add .
|
||||
git status
|
||||
git commit -m "first commit"
|
||||
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main
|
||||
cd ..
|
||||
done
|
||||
|
||||
- name: Release
|
||||
uses: svenstaro/upload-release-action@v2
|
||||
with:
|
||||
file_glob: true
|
||||
file: ./*.tar.bz2
|
||||
overwrite: true
|
||||
repo_name: k2-fsa/sherpa-onnx
|
||||
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
||||
tag: asr-models
|
||||
4
.github/workflows/pascal.yaml
vendored
4
.github/workflows/pascal.yaml
vendored
@@ -156,6 +156,10 @@ jobs:
|
||||
|
||||
pushd non-streaming-asr
|
||||
|
||||
./run-nemo-canary.sh
|
||||
rm -rf sherpa-onnx-*
|
||||
echo "---"
|
||||
|
||||
./run-zipformer-ctc.sh
|
||||
rm -rf sherpa-onnx-*
|
||||
echo "---"
|
||||
|
||||
7
.github/workflows/run-java-test.yaml
vendored
7
.github/workflows/run-java-test.yaml
vendored
@@ -117,6 +117,13 @@ jobs:
|
||||
cd ./java-api-examples
|
||||
./run-version-test.sh
|
||||
|
||||
- name: Run java test (Nemo Canary)
|
||||
shell: bash
|
||||
run: |
|
||||
cd ./java-api-examples
|
||||
./run-non-streaming-decode-file-nemo-canary.sh
|
||||
rm -rf sherpa-onnx-nemo-*
|
||||
|
||||
- name: Run java test (Non-streaming SenseVoice with homophone replacer)
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
8
.github/workflows/test-go-package.yaml
vendored
8
.github/workflows/test-go-package.yaml
vendored
@@ -76,6 +76,14 @@ jobs:
|
||||
run: |
|
||||
gcc --version
|
||||
|
||||
- name: Test NeMo Canary ASR
|
||||
if: matrix.os != 'windows-latest'
|
||||
shell: bash
|
||||
run: |
|
||||
cd go-api-examples/non-streaming-canary-decode-files
|
||||
./run.sh
|
||||
rm -rf sherpa-onnx-nemo-*
|
||||
|
||||
- name: Test speech enhancement (GTCRN)
|
||||
if: matrix.os != 'windows-latest'
|
||||
shell: bash
|
||||
|
||||
14
.github/workflows/test-go.yaml
vendored
14
.github/workflows/test-go.yaml
vendored
@@ -108,6 +108,7 @@ jobs:
|
||||
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/add-punctuation
|
||||
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/audio-tagging
|
||||
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/keyword-spotting-from-file/
|
||||
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-canary-decode-files/
|
||||
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-decode-files/
|
||||
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-speaker-diarization/
|
||||
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-tts/
|
||||
@@ -148,6 +149,19 @@ jobs:
|
||||
name: ${{ matrix.os }}-libs
|
||||
path: to-upload/
|
||||
|
||||
- name: Test non-streaming decoding files with NeMo Canary
|
||||
shell: bash
|
||||
run: |
|
||||
cd scripts/go/_internal/non-streaming-canary-decode-files/
|
||||
ls -lh
|
||||
go mod tidy
|
||||
cat go.mod
|
||||
go build
|
||||
ls -lh
|
||||
|
||||
./run.sh
|
||||
rm -rf sherpa-onnx-nemo-*
|
||||
|
||||
- name: Test streaming decoding files
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
38
.github/workflows/upload-models.yaml
vendored
38
.github/workflows/upload-models.yaml
vendored
@@ -24,7 +24,45 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: git config
|
||||
shell: bash
|
||||
run: |
|
||||
git config --global user.email "csukuangfj@gmail.com"
|
||||
git config --global user.name "Fangjun Kuang"
|
||||
|
||||
- name: FireRed ASR fp16
|
||||
shell: bash
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
run: |
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16 hf
|
||||
|
||||
git lfs install
|
||||
git clone https://www.modelscope.cn/csukuangfj/sherpa-onnx-fire-red-asr-large-zh_en-fp16-2025-02-16.git ms
|
||||
|
||||
d=sherpa-onnx-fire-red-asr-large-zh_en-fp16-2025-02-16
|
||||
git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d
|
||||
mv -v hf/test_wavs $d
|
||||
mv -v hf/README.md $d
|
||||
mv -v hf/tokens.txt $d
|
||||
mv -v ms/*.onnx $d
|
||||
|
||||
pushd $d
|
||||
git lfs track "*.onnx"
|
||||
git lfs track "*.wav"
|
||||
git status
|
||||
git add .
|
||||
git commit -m "add models"
|
||||
ls -lh
|
||||
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
|
||||
popd
|
||||
|
||||
rm -rf $d/.git
|
||||
rm -rf $d/.gitattributes
|
||||
tar cjvf $d.tar.bz2 $d
|
||||
|
||||
- name: Zipformer CTC (non-streaming)
|
||||
if: false
|
||||
shell: bash
|
||||
run: |
|
||||
git lfs install
|
||||
|
||||
19
CHANGELOG.md
19
CHANGELOG.md
@@ -1,3 +1,22 @@
|
||||
## 1.12.5
|
||||
|
||||
* Fix typo CMAKE_EXECUTBLE_LINKER_FLAGS -> CMAKE_EXECUTABLE_LINKER_FLAGS (#2344)
|
||||
* Fix testing dart packages (#2345)
|
||||
* fix(canary): use dynamo export, single input_ids and avoid 0/1 specialization (#2348)
|
||||
* Fix TTS for Unreal Engine (#2349)
|
||||
* Update readme to include https://github.com/mawwalker/stt-server (#2350)
|
||||
* Add meta data to NeMo canary ONNX models (#2351)
|
||||
* Update README to include https://github.com/bbeyondllove/asr_server (#2353)
|
||||
* Add C++ runtime and Python API for NeMo Canary models (#2352)
|
||||
* Add C/CXX/JavaScript API for NeMo Canary models (#2357)
|
||||
* Add Java and Kotlin API for NeMo Canary models (#2359)
|
||||
* Upload fp16 onnx model files for FireRedASR (#2360)
|
||||
* Fix nemo feature normalization in test code (#2361)
|
||||
* Refactor exporting NeMo models (#2362)
|
||||
* Add LODR support to online and offline recognizers (#2026)
|
||||
* Add CXX examples for NeMo TDT ASR. (#2363)
|
||||
* Add Pascal/Go/C#/Dart API for NeMo Canary ASR models (#2367)
|
||||
|
||||
## 1.12.4
|
||||
|
||||
* Refactor release scripts. (#2323)
|
||||
|
||||
@@ -14,7 +14,7 @@ project(sherpa-onnx)
|
||||
# Remember to update
|
||||
# ./CHANGELOG.md
|
||||
# ./new-release.sh
|
||||
set(SHERPA_ONNX_VERSION "1.12.4")
|
||||
set(SHERPA_ONNX_VERSION "1.12.5")
|
||||
|
||||
# Disable warning about
|
||||
#
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
git clone https://github.com/k2-fsa/sherpa-onnx
|
||||
cd sherpa-onnx
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.4/sherpa-onnx-v1.12.4-android.tar.bz2
|
||||
tar xvf sherpa-onnx-v1.12.4-android.tar.bz2
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.5/sherpa-onnx-v1.12.5-android.tar.bz2
|
||||
tar xvf sherpa-onnx-v1.12.5-android.tar.bz2
|
||||
|
||||
cp -v jniLibs/arm64-v8a/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/arm64-v8a/
|
||||
cp -v jniLibs/armeabi-v7a/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/armeabi-v7a/
|
||||
@@ -16,5 +16,5 @@ cd android/SherpaOnnxAar
|
||||
|
||||
./gradlew :sherpa_onnx:assembleRelease
|
||||
ls -lh ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar
|
||||
cp ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar ../../sherpa-onnx-1.12.4.aar
|
||||
cp ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar ../../sherpa-onnx-1.12.5.aar
|
||||
```
|
||||
|
||||
@@ -34,5 +34,5 @@ dependencies {
|
||||
implementation 'pub.devrel:easypermissions:3.0.0'
|
||||
implementation 'androidx.core:core-ktx:1.7.0'
|
||||
// implementation files('/Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxAar/sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar')
|
||||
implementation 'com.github.k2-fsa:sherpa-onnx:v1.12.4'
|
||||
implementation 'com.github.k2-fsa:sherpa-onnx:v1.12.5'
|
||||
}
|
||||
|
||||
@@ -242,7 +242,7 @@ for d in ios-arm64_x86_64-simulator ios-arm64; do
|
||||
<key>CFBundlePackageType</key>
|
||||
<string>FMWK</string>
|
||||
<key>CFBundleShortVersionString</key>
|
||||
<string>1.12.4</string>
|
||||
<string>1.12.5</string>
|
||||
<key>CFBundleSupportedPlatforms</key>
|
||||
<array>
|
||||
<string>iPhoneOS</string>
|
||||
|
||||
@@ -53,6 +53,9 @@ target_link_libraries(whisper-c-api sherpa-onnx-c-api)
|
||||
add_executable(fire-red-asr-c-api fire-red-asr-c-api.c)
|
||||
target_link_libraries(fire-red-asr-c-api sherpa-onnx-c-api)
|
||||
|
||||
add_executable(nemo-canary-c-api nemo-canary-c-api.c)
|
||||
target_link_libraries(nemo-canary-c-api sherpa-onnx-c-api)
|
||||
|
||||
add_executable(sense-voice-c-api sense-voice-c-api.c)
|
||||
target_link_libraries(sense-voice-c-api sherpa-onnx-c-api)
|
||||
|
||||
|
||||
115
c-api-examples/nemo-canary-c-api.c
Normal file
115
c-api-examples/nemo-canary-c-api.c
Normal file
@@ -0,0 +1,115 @@
|
||||
// c-api-examples/nemo-canary-c-api.c
|
||||
//
|
||||
// Copyright (c) 2025 Xiaomi Corporation
|
||||
|
||||
// We assume you have pre-downloaded the Nemo Canary model
|
||||
// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||
// An example is given below:
|
||||
//
|
||||
// clang-format off
|
||||
//
|
||||
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
// tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
// rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
//
|
||||
// clang-format on
|
||||
//
|
||||
// see https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html
|
||||
// for details
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sherpa-onnx/c-api/c-api.h"
|
||||
|
||||
int32_t main() {
|
||||
const char *wav_filename =
|
||||
"./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/de.wav";
|
||||
const char *encoder_filename =
|
||||
"sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx";
|
||||
const char *decoder_filename =
|
||||
"sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx";
|
||||
const char *tokens_filename =
|
||||
"sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt";
|
||||
const char *provider = "cpu";
|
||||
|
||||
const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
|
||||
if (wave == NULL) {
|
||||
fprintf(stderr, "Failed to read %s\n", wav_filename);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Offline model config
|
||||
SherpaOnnxOfflineModelConfig offline_model_config;
|
||||
memset(&offline_model_config, 0, sizeof(offline_model_config));
|
||||
|
||||
// set debug to 1 to view more logs
|
||||
offline_model_config.debug = 0;
|
||||
|
||||
offline_model_config.num_threads = 1;
|
||||
offline_model_config.provider = provider;
|
||||
offline_model_config.tokens = tokens_filename;
|
||||
offline_model_config.canary.encoder = encoder_filename;
|
||||
offline_model_config.canary.decoder = decoder_filename;
|
||||
|
||||
// so it output punctuations and cases
|
||||
offline_model_config.canary.use_pnc = 1;
|
||||
|
||||
offline_model_config.canary.src_lang = "de";
|
||||
|
||||
// since there is a German audio, you can set tgt_lang to en or de
|
||||
offline_model_config.canary.tgt_lang = "en";
|
||||
|
||||
// Recognizer config
|
||||
SherpaOnnxOfflineRecognizerConfig recognizer_config;
|
||||
memset(&recognizer_config, 0, sizeof(recognizer_config));
|
||||
recognizer_config.decoding_method = "greedy_search";
|
||||
recognizer_config.model_config = offline_model_config;
|
||||
|
||||
const SherpaOnnxOfflineRecognizer *recognizer =
|
||||
SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
|
||||
|
||||
if (recognizer == NULL) {
|
||||
fprintf(stderr, "Please check your config!\n");
|
||||
|
||||
SherpaOnnxFreeWave(wave);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
const SherpaOnnxOfflineStream *stream =
|
||||
SherpaOnnxCreateOfflineStream(recognizer);
|
||||
|
||||
SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
|
||||
wave->num_samples);
|
||||
SherpaOnnxDecodeOfflineStream(recognizer, stream);
|
||||
const SherpaOnnxOfflineRecognizerResult *result =
|
||||
SherpaOnnxGetOfflineStreamResult(stream);
|
||||
|
||||
fprintf(stderr, "Decoded text (English): %s\n", result->text);
|
||||
|
||||
SherpaOnnxDestroyOfflineRecognizerResult(result);
|
||||
SherpaOnnxDestroyOfflineStream(stream);
|
||||
|
||||
// now output German text
|
||||
recognizer_config.model_config.canary.tgt_lang = "de";
|
||||
SherpaOnnxOfflineRecognizerSetConfig(recognizer, &recognizer_config);
|
||||
|
||||
stream = SherpaOnnxCreateOfflineStream(recognizer);
|
||||
|
||||
SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
|
||||
wave->num_samples);
|
||||
SherpaOnnxDecodeOfflineStream(recognizer, stream);
|
||||
result = SherpaOnnxGetOfflineStreamResult(stream);
|
||||
|
||||
fprintf(stderr, "Decoded text (German): %s\n", result->text);
|
||||
|
||||
SherpaOnnxDestroyOfflineRecognizerResult(result);
|
||||
SherpaOnnxDestroyOfflineStream(stream);
|
||||
|
||||
SherpaOnnxDestroyOfflineRecognizer(recognizer);
|
||||
SherpaOnnxFreeWave(wave);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -54,7 +54,7 @@ int32_t main() {
|
||||
"DEV_T0000000000.wav";
|
||||
const char *model_filename =
|
||||
"sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/"
|
||||
"ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx";
|
||||
"ctc-epoch-20-avg-1-chunk-16-left-128.onnx";
|
||||
const char *tokens_filename =
|
||||
"sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt";
|
||||
const char *provider = "cpu";
|
||||
|
||||
@@ -180,8 +180,8 @@ class BuildExtension(build_ext):
|
||||
if make_args == "" and system_make_args == "":
|
||||
print("for fast compilation, run:")
|
||||
print('export SHERPA_ONNX_MAKE_ARGS="-j"; python setup.py install')
|
||||
print('Setting make_args to "-j4"')
|
||||
make_args = "-j4"
|
||||
print('Setting make_args to "-j8"')
|
||||
make_args = "-j8"
|
||||
|
||||
if "-G Ninja" in cmake_args:
|
||||
build_cmd = f"""
|
||||
|
||||
@@ -19,9 +19,9 @@ if(NOT SHERPA_ONNX_ENABLE_GPU)
|
||||
endif()
|
||||
|
||||
|
||||
set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-x64-gpu-1.17.1-patched.zip")
|
||||
set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-x64-gpu-1.17.1-patched.zip")
|
||||
set(onnxruntime_HASH "SHA256=1261de176e8d9d4d2019f8fa8c732c6d11494f3c6e73168ab6d2cc0903f22551")
|
||||
set(onnxruntime_URL "ftp://ftp.4pd.io/pub/iluvatar/mr_v100/onnxruntime-linux-x64-gpu-1.17.1-patched.zip")
|
||||
set(onnxruntime_URL2 "ftp://ftp.4pd.io/pub/iluvatar/mr_v100/onnxruntime-linux-x64-gpu-1.17.1-patched.zip")
|
||||
set(onnxruntime_HASH "SHA256=8ae0625c2a9b110ff70768733c92e6585c875e16c50abd1015f0358dd41498ee")
|
||||
|
||||
# If you don't have access to the Internet,
|
||||
# please download onnxruntime to one of the following locations.
|
||||
@@ -70,7 +70,7 @@ add_library(onnxruntime SHARED IMPORTED)
|
||||
|
||||
set_target_properties(onnxruntime PROPERTIES
|
||||
IMPORTED_LOCATION ${location_onnxruntime}
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include/onnxruntime"
|
||||
)
|
||||
|
||||
find_library(location_onnxruntime_cuda_lib onnxruntime_providers_cuda
|
||||
|
||||
@@ -27,6 +27,9 @@ target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api)
|
||||
add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc)
|
||||
target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api)
|
||||
|
||||
add_executable(nemo-canary-cxx-api ./nemo-canary-cxx-api.cc)
|
||||
target_link_libraries(nemo-canary-cxx-api sherpa-onnx-cxx-api)
|
||||
|
||||
if(SHERPA_ONNX_ENABLE_PORTAUDIO)
|
||||
add_executable(sense-voice-simulate-streaming-microphone-cxx-api
|
||||
./sense-voice-simulate-streaming-microphone-cxx-api.cc
|
||||
@@ -46,6 +49,15 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO)
|
||||
portaudio_static
|
||||
)
|
||||
|
||||
add_executable(parakeet-tdt-ctc-simulate-streaming-microphone-cxx-api
|
||||
./parakeet-tdt-ctc-simulate-streaming-microphone-cxx-api.cc
|
||||
${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/microphone.cc
|
||||
)
|
||||
target_link_libraries(parakeet-tdt-ctc-simulate-streaming-microphone-cxx-api
|
||||
sherpa-onnx-cxx-api
|
||||
portaudio_static
|
||||
)
|
||||
|
||||
add_executable(zipformer-ctc-simulate-streaming-microphone-cxx-api
|
||||
./zipformer-ctc-simulate-streaming-microphone-cxx-api.cc
|
||||
${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/microphone.cc
|
||||
@@ -54,6 +66,15 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO)
|
||||
sherpa-onnx-cxx-api
|
||||
portaudio_static
|
||||
)
|
||||
|
||||
add_executable(zipformer-transducer-simulate-streaming-microphone-cxx-api
|
||||
./zipformer-transducer-simulate-streaming-microphone-cxx-api.cc
|
||||
${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/microphone.cc
|
||||
)
|
||||
target_link_libraries(zipformer-transducer-simulate-streaming-microphone-cxx-api
|
||||
sherpa-onnx-cxx-api
|
||||
portaudio_static
|
||||
)
|
||||
endif()
|
||||
|
||||
if(SHERPA_ONNX_HAS_ALSA)
|
||||
|
||||
101
cxx-api-examples/nemo-canary-cxx-api.cc
Normal file
101
cxx-api-examples/nemo-canary-cxx-api.cc
Normal file
@@ -0,0 +1,101 @@
|
||||
// cxx-api-examples/nemo-canary-cxx-api.cc
|
||||
//
|
||||
// Copyright (c) 2025 Xiaomi Corporation
|
||||
|
||||
//
|
||||
// This file demonstrates how to use NeMo Canary models with
|
||||
// sherpa-onnx's C++ API.
|
||||
//
|
||||
// clang-format off
|
||||
//
|
||||
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
// tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
// rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
//
|
||||
// clang-format on
|
||||
//
|
||||
// see https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html
|
||||
// for details
|
||||
|
||||
#include <chrono> // NOLINT
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "sherpa-onnx/c-api/cxx-api.h"
|
||||
|
||||
int32_t main() {
|
||||
using namespace sherpa_onnx::cxx; // NOLINT
|
||||
OfflineRecognizerConfig config;
|
||||
|
||||
config.model_config.canary.encoder =
|
||||
"sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx";
|
||||
config.model_config.canary.decoder =
|
||||
"sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx";
|
||||
|
||||
// our input audio is German, so we set src_lang to "de"
|
||||
config.model_config.canary.src_lang = "de";
|
||||
|
||||
// we can set tgt_lang either to de or en in this specific case
|
||||
config.model_config.canary.tgt_lang = "en";
|
||||
config.model_config.tokens =
|
||||
"sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt";
|
||||
|
||||
config.model_config.num_threads = 1;
|
||||
|
||||
std::cout << "Loading model\n";
|
||||
OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
|
||||
if (!recognizer.Get()) {
|
||||
std::cerr << "Please check your config\n";
|
||||
return -1;
|
||||
}
|
||||
std::cout << "Loading model done\n";
|
||||
|
||||
std::string wave_filename =
|
||||
"./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/de.wav";
|
||||
|
||||
Wave wave = ReadWave(wave_filename);
|
||||
if (wave.samples.empty()) {
|
||||
std::cerr << "Failed to read: '" << wave_filename << "'\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::cout << "Start recognition\n";
|
||||
const auto begin = std::chrono::steady_clock::now();
|
||||
|
||||
OfflineStream stream = recognizer.CreateStream();
|
||||
stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
|
||||
wave.samples.size());
|
||||
|
||||
recognizer.Decode(&stream);
|
||||
|
||||
OfflineRecognizerResult result = recognizer.GetResult(&stream);
|
||||
|
||||
const auto end = std::chrono::steady_clock::now();
|
||||
const float elapsed_seconds =
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
|
||||
.count() /
|
||||
1000.;
|
||||
float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
|
||||
float rtf = elapsed_seconds / duration;
|
||||
|
||||
std::cout << "text (English): " << result.text << "\n";
|
||||
printf("Number of threads: %d\n", config.model_config.num_threads);
|
||||
printf("Duration: %.3fs\n", duration);
|
||||
printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
|
||||
printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
|
||||
duration, rtf);
|
||||
|
||||
// now output text in German
|
||||
config.model_config.canary.tgt_lang = "de";
|
||||
recognizer.SetConfig(config);
|
||||
stream = recognizer.CreateStream();
|
||||
stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
|
||||
wave.samples.size());
|
||||
|
||||
recognizer.Decode(&stream);
|
||||
|
||||
result = recognizer.GetResult(&stream);
|
||||
std::cout << "text (German): " << result.text << "\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,238 @@
|
||||
// cxx-api-examples/parakeet-tdt-simulate-streaming-microphone-cxx-api.cc
|
||||
// Copyright (c) 2025 Xiaomi Corporation
|
||||
|
||||
//
|
||||
// This file demonstrates how to use parakeet-tdt with sherpa-onnx's C++ API
|
||||
// for streaming speech recognition from a microphone.
|
||||
//
|
||||
// clang-format off
|
||||
//
|
||||
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||
//
|
||||
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8.tar.bz2
|
||||
// tar xvf sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8.tar.bz2
|
||||
// rm sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8.tar.bz2
|
||||
//
|
||||
// clang-format on
|
||||
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <chrono> // NOLINT
|
||||
#include <condition_variable> // NOLINT
|
||||
#include <iostream>
|
||||
#include <mutex> // NOLINT
|
||||
#include <queue>
|
||||
#include <vector>
|
||||
|
||||
#include "portaudio.h" // NOLINT
|
||||
#include "sherpa-display.h" // NOLINT
|
||||
#include "sherpa-onnx/c-api/cxx-api.h"
|
||||
#include "sherpa-onnx/csrc/microphone.h"
|
||||
|
||||
std::queue<std::vector<float>> samples_queue;
|
||||
std::condition_variable condition_variable;
|
||||
std::mutex mutex;
|
||||
bool stop = false;
|
||||
|
||||
static void Handler(int32_t /*sig*/) {
|
||||
stop = true;
|
||||
condition_variable.notify_one();
|
||||
fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
|
||||
}
|
||||
|
||||
static int32_t RecordCallback(const void *input_buffer,
|
||||
void * /*output_buffer*/,
|
||||
unsigned long frames_per_buffer, // NOLINT
|
||||
const PaStreamCallbackTimeInfo * /*time_info*/,
|
||||
PaStreamCallbackFlags /*status_flags*/,
|
||||
void * /*user_data*/) {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
samples_queue.emplace(
|
||||
reinterpret_cast<const float *>(input_buffer),
|
||||
reinterpret_cast<const float *>(input_buffer) + frames_per_buffer);
|
||||
condition_variable.notify_one();
|
||||
|
||||
return stop ? paComplete : paContinue;
|
||||
}
|
||||
|
||||
static sherpa_onnx::cxx::VoiceActivityDetector CreateVad() {
|
||||
using namespace sherpa_onnx::cxx; // NOLINT
|
||||
VadModelConfig config;
|
||||
config.silero_vad.model = "./silero_vad.onnx";
|
||||
config.silero_vad.threshold = 0.25;
|
||||
config.silero_vad.min_silence_duration = 0.25;
|
||||
config.silero_vad.min_speech_duration = 0.25;
|
||||
config.silero_vad.max_speech_duration = 5;
|
||||
config.sample_rate = 16000;
|
||||
config.debug = false;
|
||||
|
||||
VoiceActivityDetector vad = VoiceActivityDetector::Create(config, 60);
|
||||
if (!vad.Get()) {
|
||||
std::cerr << "Failed to create VAD. Please check your config\n";
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
return vad;
|
||||
}
|
||||
|
||||
static sherpa_onnx::cxx::OfflineRecognizer CreateOfflineRecognizer() {
|
||||
using namespace sherpa_onnx::cxx; // NOLINT
|
||||
OfflineRecognizerConfig config;
|
||||
|
||||
config.model_config.nemo_ctc.model =
|
||||
"./sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8/model.int8.onnx";
|
||||
config.model_config.tokens =
|
||||
"./sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8/tokens.txt";
|
||||
|
||||
config.model_config.num_threads = 2;
|
||||
config.model_config.debug = false;
|
||||
|
||||
std::cout << "Loading model\n";
|
||||
OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
|
||||
if (!recognizer.Get()) {
|
||||
std::cerr << "Please check your config\n";
|
||||
exit(-1);
|
||||
}
|
||||
std::cout << "Loading model done\n";
|
||||
return recognizer;
|
||||
}
|
||||
|
||||
int32_t main() {
|
||||
signal(SIGINT, Handler);
|
||||
|
||||
using namespace sherpa_onnx::cxx; // NOLINT
|
||||
|
||||
auto vad = CreateVad();
|
||||
auto recognizer = CreateOfflineRecognizer();
|
||||
|
||||
sherpa_onnx::Microphone mic;
|
||||
|
||||
PaDeviceIndex num_devices = Pa_GetDeviceCount();
|
||||
if (num_devices == 0) {
|
||||
std::cerr << " If you are using Linux, please try to modify "
|
||||
"./build/bin/sense-voice-simulate-streaming-alsa-cxx-api\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
int32_t device_index = Pa_GetDefaultInputDevice();
|
||||
const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
|
||||
if (pDeviceIndex) {
|
||||
fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
|
||||
device_index = atoi(pDeviceIndex);
|
||||
}
|
||||
mic.PrintDevices(device_index);
|
||||
|
||||
float mic_sample_rate = 16000;
|
||||
const char *sample_rate_str = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
|
||||
if (sample_rate_str) {
|
||||
mic_sample_rate = atof(sample_rate_str);
|
||||
fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
|
||||
}
|
||||
|
||||
float sample_rate = 16000;
|
||||
LinearResampler resampler;
|
||||
if (mic_sample_rate != sample_rate) {
|
||||
float min_freq = std::min(mic_sample_rate, sample_rate);
|
||||
float lowpass_cutoff = 0.99 * 0.5 * min_freq;
|
||||
|
||||
int32_t lowpass_filter_width = 6;
|
||||
resampler = LinearResampler::Create(mic_sample_rate, sample_rate,
|
||||
lowpass_cutoff, lowpass_filter_width);
|
||||
}
|
||||
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
|
||||
nullptr)) {
|
||||
std::cerr << "Failed to open microphone device\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
int32_t window_size = 512; // samples, please don't change
|
||||
|
||||
int32_t offset = 0;
|
||||
std::vector<float> buffer;
|
||||
bool speech_started = false;
|
||||
|
||||
auto started_time = std::chrono::steady_clock::now();
|
||||
|
||||
SherpaDisplay display;
|
||||
|
||||
std::cout << "Started! Please speak\n";
|
||||
|
||||
while (!stop) {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
while (samples_queue.empty() && !stop) {
|
||||
condition_variable.wait(lock);
|
||||
}
|
||||
|
||||
const auto &s = samples_queue.front();
|
||||
if (!resampler.Get()) {
|
||||
buffer.insert(buffer.end(), s.begin(), s.end());
|
||||
} else {
|
||||
auto resampled = resampler.Resample(s.data(), s.size(), false);
|
||||
buffer.insert(buffer.end(), resampled.begin(), resampled.end());
|
||||
}
|
||||
|
||||
samples_queue.pop();
|
||||
}
|
||||
|
||||
for (; offset + window_size < buffer.size(); offset += window_size) {
|
||||
vad.AcceptWaveform(buffer.data() + offset, window_size);
|
||||
if (!speech_started && vad.IsDetected()) {
|
||||
speech_started = true;
|
||||
started_time = std::chrono::steady_clock::now();
|
||||
}
|
||||
}
|
||||
if (!speech_started) {
|
||||
if (buffer.size() > 10 * window_size) {
|
||||
offset -= buffer.size() - 10 * window_size;
|
||||
buffer = {buffer.end() - 10 * window_size, buffer.end()};
|
||||
}
|
||||
}
|
||||
|
||||
auto current_time = std::chrono::steady_clock::now();
|
||||
const float elapsed_seconds =
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(current_time -
|
||||
started_time)
|
||||
.count() /
|
||||
1000.;
|
||||
|
||||
if (speech_started && elapsed_seconds > 0.2) {
|
||||
OfflineStream stream = recognizer.CreateStream();
|
||||
stream.AcceptWaveform(sample_rate, buffer.data(), buffer.size());
|
||||
|
||||
recognizer.Decode(&stream);
|
||||
|
||||
OfflineRecognizerResult result = recognizer.GetResult(&stream);
|
||||
display.UpdateText(result.text);
|
||||
display.Display();
|
||||
|
||||
started_time = std::chrono::steady_clock::now();
|
||||
}
|
||||
|
||||
while (!vad.IsEmpty()) {
|
||||
auto segment = vad.Front();
|
||||
|
||||
vad.Pop();
|
||||
|
||||
OfflineStream stream = recognizer.CreateStream();
|
||||
stream.AcceptWaveform(sample_rate, segment.samples.data(),
|
||||
segment.samples.size());
|
||||
|
||||
recognizer.Decode(&stream);
|
||||
|
||||
OfflineRecognizerResult result = recognizer.GetResult(&stream);
|
||||
|
||||
display.UpdateText(result.text);
|
||||
display.FinalizeCurrentSentence();
|
||||
display.Display();
|
||||
|
||||
buffer.clear();
|
||||
offset = 0;
|
||||
speech_started = false;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -136,11 +136,7 @@ int32_t main() {
|
||||
fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
|
||||
mic_sample_rate = atof(sample_rate_str);
|
||||
}
|
||||
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
|
||||
nullptr) == false) {
|
||||
std::cerr << "Failed to open microphone device\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
float sample_rate = 16000;
|
||||
LinearResampler resampler;
|
||||
if (mic_sample_rate != sample_rate) {
|
||||
@@ -152,6 +148,12 @@ int32_t main() {
|
||||
lowpass_cutoff, lowpass_filter_width);
|
||||
}
|
||||
|
||||
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
|
||||
nullptr)) {
|
||||
std::cerr << "Failed to open microphone device\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
int32_t window_size = 512; // samples, please don't change
|
||||
|
||||
int32_t offset = 0;
|
||||
|
||||
@@ -142,8 +142,8 @@ int32_t main() {
|
||||
resampler = LinearResampler::Create(mic_sample_rate, sample_rate,
|
||||
lowpass_cutoff, lowpass_filter_width);
|
||||
}
|
||||
if (mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
|
||||
nullptr) == false) {
|
||||
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
|
||||
nullptr)) {
|
||||
std::cerr << "Failed to open microphone device\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -140,8 +140,8 @@ int32_t main() {
|
||||
resampler = LinearResampler::Create(mic_sample_rate, sample_rate,
|
||||
lowpass_cutoff, lowpass_filter_width);
|
||||
}
|
||||
if (mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
|
||||
nullptr) == false) {
|
||||
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
|
||||
nullptr)) {
|
||||
std::cerr << "Failed to open microphone device\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,245 @@
|
||||
// cxx-api-examples/zipformer-transducer-simulate-streaming-microphone-cxx-api.cc
|
||||
// Copyright (c) 2025 Xiaomi Corporation
|
||||
//
|
||||
// This file demonstrates how to use Zipformer transducer with sherpa-onnx's C++ API
|
||||
// for streaming speech recognition from a microphone.
|
||||
//
|
||||
// clang-format off
|
||||
//
|
||||
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||
//
|
||||
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01.tar.bz2
|
||||
// tar xvf sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01.tar.bz2
|
||||
// rm sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01.tar.bz2
|
||||
//
|
||||
// clang-format on
|
||||
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <chrono> // NOLINT
|
||||
#include <condition_variable> // NOLINT
|
||||
#include <iostream>
|
||||
#include <mutex> // NOLINT
|
||||
#include <queue>
|
||||
#include <vector>
|
||||
|
||||
#include "portaudio.h" // NOLINT
|
||||
#include "sherpa-display.h" // NOLINT
|
||||
#include "sherpa-onnx/c-api/cxx-api.h"
|
||||
#include "sherpa-onnx/csrc/microphone.h"
|
||||
|
||||
std::queue<std::vector<float>> samples_queue;
|
||||
std::condition_variable condition_variable;
|
||||
std::mutex mutex;
|
||||
bool stop = false;
|
||||
|
||||
static void Handler(int32_t /*sig*/) {
|
||||
stop = true;
|
||||
condition_variable.notify_one();
|
||||
fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
|
||||
}
|
||||
|
||||
static int32_t RecordCallback(const void *input_buffer,
|
||||
void * /*output_buffer*/,
|
||||
unsigned long frames_per_buffer, // NOLINT
|
||||
const PaStreamCallbackTimeInfo * /*time_info*/,
|
||||
PaStreamCallbackFlags /*status_flags*/,
|
||||
void * /*user_data*/) {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
samples_queue.emplace(
|
||||
reinterpret_cast<const float *>(input_buffer),
|
||||
reinterpret_cast<const float *>(input_buffer) + frames_per_buffer);
|
||||
condition_variable.notify_one();
|
||||
|
||||
return stop ? paComplete : paContinue;
|
||||
}
|
||||
|
||||
static sherpa_onnx::cxx::VoiceActivityDetector CreateVad() {
|
||||
using namespace sherpa_onnx::cxx; // NOLINT
|
||||
VadModelConfig config;
|
||||
config.silero_vad.model = "./silero_vad.onnx";
|
||||
config.silero_vad.threshold = 0.5;
|
||||
config.silero_vad.min_silence_duration = 0.1;
|
||||
config.silero_vad.min_speech_duration = 0.25;
|
||||
config.silero_vad.max_speech_duration = 8;
|
||||
config.sample_rate = 16000;
|
||||
config.debug = false;
|
||||
|
||||
VoiceActivityDetector vad = VoiceActivityDetector::Create(config, 20);
|
||||
if (!vad.Get()) {
|
||||
std::cerr << "Failed to create VAD. Please check your config\n";
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
return vad;
|
||||
}
|
||||
|
||||
static sherpa_onnx::cxx::OfflineRecognizer CreateOfflineRecognizer() {
|
||||
using namespace sherpa_onnx::cxx; // NOLINT
|
||||
OfflineRecognizerConfig config;
|
||||
|
||||
config.model_config.transducer.encoder =
|
||||
"./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/"
|
||||
"encoder-epoch-99-avg-1.int8.onnx";
|
||||
|
||||
config.model_config.transducer.decoder =
|
||||
"./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/"
|
||||
"decoder-epoch-99-avg-1.onnx";
|
||||
|
||||
config.model_config.transducer.joiner =
|
||||
"./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/"
|
||||
"joiner-epoch-99-avg-1.int8.onnx";
|
||||
config.model_config.tokens =
|
||||
"./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/tokens.txt";
|
||||
|
||||
config.model_config.num_threads = 2;
|
||||
config.model_config.debug = false;
|
||||
|
||||
std::cout << "Loading model\n";
|
||||
OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
|
||||
if (!recognizer.Get()) {
|
||||
std::cerr << "Please check your config\n";
|
||||
exit(-1);
|
||||
}
|
||||
std::cout << "Loading model done\n";
|
||||
return recognizer;
|
||||
}
|
||||
|
||||
int32_t main() {
|
||||
signal(SIGINT, Handler);
|
||||
|
||||
using namespace sherpa_onnx::cxx; // NOLINT
|
||||
|
||||
auto vad = CreateVad();
|
||||
auto recognizer = CreateOfflineRecognizer();
|
||||
|
||||
sherpa_onnx::Microphone mic;
|
||||
|
||||
PaDeviceIndex num_devices = Pa_GetDeviceCount();
|
||||
if (num_devices == 0) {
|
||||
std::cerr << " If you are using Linux, please try "
|
||||
"./build/bin/zipformer-ctc-simulate-streaming-alsa-cxx-api\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
int32_t device_index = Pa_GetDefaultInputDevice();
|
||||
const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
|
||||
if (pDeviceIndex) {
|
||||
fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
|
||||
device_index = atoi(pDeviceIndex);
|
||||
}
|
||||
mic.PrintDevices(device_index);
|
||||
|
||||
float mic_sample_rate = 16000;
|
||||
const char *sample_rate_str = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
|
||||
if (sample_rate_str) {
|
||||
fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
|
||||
mic_sample_rate = atof(sample_rate_str);
|
||||
}
|
||||
float sample_rate = 16000;
|
||||
LinearResampler resampler;
|
||||
if (mic_sample_rate != sample_rate) {
|
||||
float min_freq = std::min(mic_sample_rate, sample_rate);
|
||||
float lowpass_cutoff = 0.99 * 0.5 * min_freq;
|
||||
|
||||
int32_t lowpass_filter_width = 6;
|
||||
resampler = LinearResampler::Create(mic_sample_rate, sample_rate,
|
||||
lowpass_cutoff, lowpass_filter_width);
|
||||
}
|
||||
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
|
||||
nullptr)) {
|
||||
std::cerr << "Failed to open microphone device\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
int32_t window_size = 512; // samples, please don't change
|
||||
|
||||
int32_t offset = 0;
|
||||
std::vector<float> buffer;
|
||||
bool speech_started = false;
|
||||
|
||||
auto started_time = std::chrono::steady_clock::now();
|
||||
|
||||
SherpaDisplay display;
|
||||
|
||||
std::cout << "Started! Please speak\n";
|
||||
|
||||
while (!stop) {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
while (samples_queue.empty() && !stop) {
|
||||
condition_variable.wait(lock);
|
||||
}
|
||||
|
||||
const auto &s = samples_queue.front();
|
||||
if (!resampler.Get()) {
|
||||
buffer.insert(buffer.end(), s.begin(), s.end());
|
||||
} else {
|
||||
auto resampled = resampler.Resample(s.data(), s.size(), false);
|
||||
buffer.insert(buffer.end(), resampled.begin(), resampled.end());
|
||||
}
|
||||
|
||||
samples_queue.pop();
|
||||
}
|
||||
|
||||
for (; offset + window_size < buffer.size(); offset += window_size) {
|
||||
vad.AcceptWaveform(buffer.data() + offset, window_size);
|
||||
if (!speech_started && vad.IsDetected()) {
|
||||
speech_started = true;
|
||||
started_time = std::chrono::steady_clock::now();
|
||||
}
|
||||
}
|
||||
if (!speech_started) {
|
||||
if (buffer.size() > 10 * window_size) {
|
||||
offset -= buffer.size() - 10 * window_size;
|
||||
buffer = {buffer.end() - 10 * window_size, buffer.end()};
|
||||
}
|
||||
}
|
||||
|
||||
auto current_time = std::chrono::steady_clock::now();
|
||||
const float elapsed_seconds =
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(current_time -
|
||||
started_time)
|
||||
.count() /
|
||||
1000.;
|
||||
|
||||
if (speech_started && elapsed_seconds > 0.2) {
|
||||
OfflineStream stream = recognizer.CreateStream();
|
||||
stream.AcceptWaveform(sample_rate, buffer.data(), buffer.size());
|
||||
|
||||
recognizer.Decode(&stream);
|
||||
|
||||
OfflineRecognizerResult result = recognizer.GetResult(&stream);
|
||||
display.UpdateText(result.text);
|
||||
display.Display();
|
||||
|
||||
started_time = std::chrono::steady_clock::now();
|
||||
}
|
||||
|
||||
while (!vad.IsEmpty()) {
|
||||
auto segment = vad.Front();
|
||||
|
||||
vad.Pop();
|
||||
|
||||
OfflineStream stream = recognizer.CreateStream();
|
||||
stream.AcceptWaveform(sample_rate, segment.samples.data(),
|
||||
segment.samples.size());
|
||||
|
||||
recognizer.Decode(&stream);
|
||||
|
||||
OfflineRecognizerResult result = recognizer.GetResult(&stream);
|
||||
|
||||
display.UpdateText(result.text);
|
||||
display.FinalizeCurrentSentence();
|
||||
display.Display();
|
||||
|
||||
buffer.clear();
|
||||
offset = 0;
|
||||
speech_started = false;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -9,7 +9,7 @@ environment:
|
||||
sdk: ">=3.0.0 <4.0.0"
|
||||
|
||||
dependencies:
|
||||
sherpa_onnx: ^1.12.4
|
||||
sherpa_onnx: ^1.12.5
|
||||
path: ^1.9.0
|
||||
args: ^2.5.0
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ environment:
|
||||
sdk: ">=3.0.0 <4.0.0"
|
||||
|
||||
dependencies:
|
||||
sherpa_onnx: ^1.12.4
|
||||
sherpa_onnx: ^1.12.5
|
||||
path: ^1.9.0
|
||||
args: ^2.5.0
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ environment:
|
||||
sdk: ">=3.0.0 <4.0.0"
|
||||
|
||||
dependencies:
|
||||
sherpa_onnx: ^1.12.4
|
||||
sherpa_onnx: ^1.12.5
|
||||
# sherpa_onnx:
|
||||
# path: ../../flutter/sherpa_onnx
|
||||
path: ^1.9.0
|
||||
|
||||
84
dart-api-examples/non-streaming-asr/bin/nemo-canary.dart
Normal file
84
dart-api-examples/non-streaming-asr/bin/nemo-canary.dart
Normal file
@@ -0,0 +1,84 @@
|
||||
// Copyright (c) 2025 Xiaomi Corporation
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:args/args.dart';
|
||||
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
|
||||
|
||||
import './init.dart';
|
||||
|
||||
void main(List<String> arguments) async {
|
||||
await initSherpaOnnx();
|
||||
|
||||
final parser = ArgParser()
|
||||
..addOption('encoder', help: 'Path to the NeMo Canary encoder model')
|
||||
..addOption('decoder', help: 'Path to the NeMo Canary decoder model')
|
||||
..addOption('src-lang', help: 'Language of the input audio')
|
||||
..addOption('tgt-lang', help: 'Language of the recognition result')
|
||||
..addOption('tokens', help: 'Path to tokens.txt')
|
||||
..addOption('input-wav', help: 'Path to input.wav to transcribe');
|
||||
|
||||
final res = parser.parse(arguments);
|
||||
if (res['encoder'] == null ||
|
||||
res['decoder'] == null ||
|
||||
res['src-lang'] == null ||
|
||||
res['tgt-lang'] == null ||
|
||||
res['tokens'] == null ||
|
||||
res['input-wav'] == null) {
|
||||
print(parser.usage);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
final encoder = res['encoder'] as String;
|
||||
final decoder = res['decoder'] as String;
|
||||
final srcLang = res['src-lang'] as String;
|
||||
final tgtLang = res['tgt-lang'] as String;
|
||||
final tokens = res['tokens'] as String;
|
||||
final inputWav = res['input-wav'] as String;
|
||||
|
||||
final canary = sherpa_onnx.OfflineCanaryModelConfig(
|
||||
encoder: encoder, decoder: decoder, srcLang: srcLang, tgtLang: tgtLang);
|
||||
|
||||
final modelConfig = sherpa_onnx.OfflineModelConfig(
|
||||
canary: canary,
|
||||
tokens: tokens,
|
||||
debug: false,
|
||||
numThreads: 1,
|
||||
);
|
||||
var config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
|
||||
final recognizer = sherpa_onnx.OfflineRecognizer(config);
|
||||
|
||||
final waveData = sherpa_onnx.readWave(inputWav);
|
||||
final stream = recognizer.createStream();
|
||||
|
||||
stream.acceptWaveform(
|
||||
samples: waveData.samples, sampleRate: waveData.sampleRate);
|
||||
recognizer.decode(stream);
|
||||
|
||||
final result = recognizer.getResult(stream);
|
||||
print('Result in $tgtLang: ${result.text}');
|
||||
|
||||
stream.free();
|
||||
|
||||
// Example to change the target language to de
|
||||
if (tgtLang != 'en') {
|
||||
var json = config.toJson();
|
||||
|
||||
((json['model'] as Map<String, dynamic>)!['canary']
|
||||
as Map<String, dynamic>)!['tgtLang'] = 'en';
|
||||
|
||||
config = sherpa_onnx.OfflineRecognizerConfig.fromJson(json);
|
||||
recognizer.setConfig(config);
|
||||
|
||||
final stream = recognizer.createStream();
|
||||
|
||||
stream.acceptWaveform(
|
||||
samples: waveData.samples, sampleRate: waveData.sampleRate);
|
||||
recognizer.decode(stream);
|
||||
|
||||
final result = recognizer.getResult(stream);
|
||||
print('Result in English: ${result.text}');
|
||||
stream.free();
|
||||
}
|
||||
|
||||
recognizer.free();
|
||||
}
|
||||
@@ -10,7 +10,7 @@ environment:
|
||||
|
||||
# Add regular dependencies here.
|
||||
dependencies:
|
||||
sherpa_onnx: ^1.12.4
|
||||
sherpa_onnx: ^1.12.5
|
||||
path: ^1.9.0
|
||||
args: ^2.5.0
|
||||
|
||||
|
||||
33
dart-api-examples/non-streaming-asr/run-nemo-canary.sh
Executable file
33
dart-api-examples/non-streaming-asr/run-nemo-canary.sh
Executable file
@@ -0,0 +1,33 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
dart pub get
|
||||
|
||||
if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
fi
|
||||
|
||||
for tgt_lang in en de es fr; do
|
||||
dart run \
|
||||
./bin/nemo-canary.dart \
|
||||
--encoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx \
|
||||
--decoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx \
|
||||
--tokens ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt \
|
||||
--src-lang en \
|
||||
--tgt-lang $tgt_lang \
|
||||
--input-wav ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav
|
||||
done
|
||||
|
||||
for tgt_lang in en de; do
|
||||
dart run \
|
||||
./bin/nemo-canary.dart \
|
||||
--encoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx \
|
||||
--decoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx \
|
||||
--tokens ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt \
|
||||
--src-lang de \
|
||||
--tgt-lang $tgt_lang \
|
||||
--input-wav ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/de.wav
|
||||
done
|
||||
@@ -8,7 +8,7 @@ environment:
|
||||
sdk: ">=3.0.0 <4.0.0"
|
||||
|
||||
dependencies:
|
||||
sherpa_onnx: ^1.12.4
|
||||
sherpa_onnx: ^1.12.5
|
||||
# sherpa_onnx:
|
||||
# path: ../../flutter/sherpa_onnx
|
||||
path: ^1.9.0
|
||||
|
||||
@@ -9,7 +9,7 @@ environment:
|
||||
sdk: ">=3.0.0 <4.0.0"
|
||||
|
||||
dependencies:
|
||||
sherpa_onnx: ^1.12.4
|
||||
sherpa_onnx: ^1.12.5
|
||||
path: ^1.9.0
|
||||
args: ^2.5.0
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ environment:
|
||||
|
||||
# Add regular dependencies here.
|
||||
dependencies:
|
||||
sherpa_onnx: ^1.12.4
|
||||
sherpa_onnx: ^1.12.5
|
||||
# sherpa_onnx:
|
||||
# path: ../../flutter/sherpa_onnx
|
||||
path: ^1.9.0
|
||||
|
||||
@@ -11,7 +11,7 @@ environment:
|
||||
|
||||
# Add regular dependencies here.
|
||||
dependencies:
|
||||
sherpa_onnx: ^1.12.4
|
||||
sherpa_onnx: ^1.12.5
|
||||
path: ^1.9.0
|
||||
args: ^2.5.0
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ environment:
|
||||
|
||||
# Add regular dependencies here.
|
||||
dependencies:
|
||||
sherpa_onnx: ^1.12.4
|
||||
sherpa_onnx: ^1.12.5
|
||||
path: ^1.9.0
|
||||
args: ^2.5.0
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ environment:
|
||||
sdk: ">=3.0.0 <4.0.0"
|
||||
|
||||
dependencies:
|
||||
sherpa_onnx: ^1.12.4
|
||||
sherpa_onnx: ^1.12.5
|
||||
path: ^1.9.0
|
||||
args: ^2.5.0
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ environment:
|
||||
sdk: ">=3.0.0 <4.0.0"
|
||||
|
||||
dependencies:
|
||||
sherpa_onnx: ^1.12.4
|
||||
sherpa_onnx: ^1.12.5
|
||||
path: ^1.9.0
|
||||
args: ^2.5.0
|
||||
|
||||
|
||||
44
dotnet-examples/non-streaming-canary-decode-files/Program.cs
Normal file
44
dotnet-examples/non-streaming-canary-decode-files/Program.cs
Normal file
@@ -0,0 +1,44 @@
|
||||
// Copyright (c) 2025 Xiaomi Corporation
|
||||
//
|
||||
// This file shows how to use a NeMo Canary model for speech recognition.
|
||||
//
|
||||
// You can find the model doc at
|
||||
// https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html
|
||||
using SherpaOnnx;
|
||||
|
||||
class NonStreamingAsrCanary
|
||||
{
|
||||
static void Main(string[] args)
|
||||
{
|
||||
// please download model files from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||
var config = new OfflineRecognizerConfig();
|
||||
config.ModelConfig.Canary.Encoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx";
|
||||
config.ModelConfig.Canary.Decoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx";
|
||||
config.ModelConfig.Canary.SrcLang = "en";
|
||||
config.ModelConfig.Canary.TgtLang = "en";
|
||||
config.ModelConfig.Tokens = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt";
|
||||
config.ModelConfig.Debug = 0;
|
||||
var recognizer = new OfflineRecognizer(config);
|
||||
|
||||
var testWaveFilename = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav";
|
||||
var reader = new WaveReader(testWaveFilename);
|
||||
var stream = recognizer.CreateStream();
|
||||
stream.AcceptWaveform(reader.SampleRate, reader.Samples);
|
||||
recognizer.Decode(stream);
|
||||
var text = stream.Result.Text;
|
||||
Console.WriteLine("Text (English): {0}", text);
|
||||
|
||||
// Now output text in German
|
||||
config.ModelConfig.Canary.TgtLang = "de";
|
||||
recognizer.SetConfig(config);
|
||||
|
||||
stream = recognizer.CreateStream();
|
||||
stream.AcceptWaveform(reader.SampleRate, reader.Samples);
|
||||
recognizer.Decode(stream);
|
||||
text = stream.Result.Text;
|
||||
Console.WriteLine("Text (German): {0}", text);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net8.0</TargetFramework>
|
||||
<RootNamespace>non_streaming_canary_decode_files</RootNamespace>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Common\Common.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
11
dotnet-examples/non-streaming-canary-decode-files/run.sh
Executable file
11
dotnet-examples/non-streaming-canary-decode-files/run.sh
Executable file
@@ -0,0 +1,11 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
fi
|
||||
|
||||
dotnet run
|
||||
@@ -39,6 +39,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speech-enhancement-gtcrn",
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "version-test", "version-test\version-test.csproj", "{E57711E5-6546-4BA0-B627-79C94F415BC5}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "non-streaming-canary-decode-files", "non-streaming-canary-decode-files\non-streaming-canary-decode-files.csproj", "{925779DB-4429-4366-87C3-B14DD44AE1D4}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
@@ -117,6 +119,10 @@ Global
|
||||
{E57711E5-6546-4BA0-B627-79C94F415BC5}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{E57711E5-6546-4BA0-B627-79C94F415BC5}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{E57711E5-6546-4BA0-B627-79C94F415BC5}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{925779DB-4429-4366-87C3-B14DD44AE1D4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{925779DB-4429-4366-87C3-B14DD44AE1D4}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{925779DB-4429-4366-87C3-B14DD44AE1D4}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{925779DB-4429-4366-87C3-B14DD44AE1D4}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
||||
@@ -5,7 +5,7 @@ description: >
|
||||
|
||||
publish_to: 'none'
|
||||
|
||||
version: 1.12.4
|
||||
version: 1.12.5
|
||||
|
||||
topics:
|
||||
- speech-recognition
|
||||
@@ -31,7 +31,7 @@ dependencies:
|
||||
record: ^5.1.0
|
||||
url_launcher: ^6.2.6
|
||||
|
||||
sherpa_onnx: ^1.12.4
|
||||
sherpa_onnx: ^1.12.5
|
||||
# sherpa_onnx:
|
||||
# path: ../../flutter/sherpa_onnx
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ description: >
|
||||
|
||||
publish_to: 'none' # Remove this line if you wish to publish to pub.dev
|
||||
|
||||
version: 1.12.4
|
||||
version: 1.12.5
|
||||
|
||||
environment:
|
||||
sdk: ">=2.17.0 <4.0.0"
|
||||
@@ -18,7 +18,7 @@ dependencies:
|
||||
cupertino_icons: ^1.0.6
|
||||
path_provider: ^2.1.3
|
||||
path: ^1.9.0
|
||||
sherpa_onnx: ^1.12.4
|
||||
sherpa_onnx: ^1.12.5
|
||||
# sherpa_onnx:
|
||||
# path: ../../flutter/sherpa_onnx
|
||||
url_launcher: 6.2.6
|
||||
|
||||
@@ -163,6 +163,44 @@ class OfflineWhisperModelConfig {
|
||||
final int tailPaddings;
|
||||
}
|
||||
|
||||
class OfflineCanaryModelConfig {
|
||||
const OfflineCanaryModelConfig(
|
||||
{this.encoder = '',
|
||||
this.decoder = '',
|
||||
this.srcLang = 'en',
|
||||
this.tgtLang = 'en',
|
||||
this.usePnc = true});
|
||||
|
||||
factory OfflineCanaryModelConfig.fromJson(Map<String, dynamic> json) {
|
||||
return OfflineCanaryModelConfig(
|
||||
encoder: json['encoder'] as String? ?? '',
|
||||
decoder: json['decoder'] as String? ?? '',
|
||||
srcLang: json['srcLang'] as String? ?? 'en',
|
||||
tgtLang: json['tgtLang'] as String? ?? 'en',
|
||||
usePnc: json['usePnc'] as bool? ?? true,
|
||||
);
|
||||
}
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'OfflineCanaryModelConfig(encoder: $encoder, decoder: $decoder, srcLang: $srcLang, tgtLang: $tgtLang, usePnc: $usePnc)';
|
||||
}
|
||||
|
||||
Map<String, dynamic> toJson() => {
|
||||
'encoder': encoder,
|
||||
'decoder': decoder,
|
||||
'srcLang': srcLang,
|
||||
'tgtLang': tgtLang,
|
||||
'usePnc': usePnc,
|
||||
};
|
||||
|
||||
final String encoder;
|
||||
final String decoder;
|
||||
final String srcLang;
|
||||
final String tgtLang;
|
||||
final bool usePnc;
|
||||
}
|
||||
|
||||
class OfflineFireRedAsrModelConfig {
|
||||
const OfflineFireRedAsrModelConfig({this.encoder = '', this.decoder = ''});
|
||||
|
||||
@@ -310,6 +348,7 @@ class OfflineModelConfig {
|
||||
this.fireRedAsr = const OfflineFireRedAsrModelConfig(),
|
||||
this.dolphin = const OfflineDolphinModelConfig(),
|
||||
this.zipformerCtc = const OfflineZipformerCtcModelConfig(),
|
||||
this.canary = const OfflineCanaryModelConfig(),
|
||||
required this.tokens,
|
||||
this.numThreads = 1,
|
||||
this.debug = true,
|
||||
@@ -362,6 +401,10 @@ class OfflineModelConfig {
|
||||
? OfflineZipformerCtcModelConfig.fromJson(
|
||||
json['zipformerCtc'] as Map<String, dynamic>)
|
||||
: const OfflineZipformerCtcModelConfig(),
|
||||
canary: json['canary'] != null
|
||||
? OfflineCanaryModelConfig.fromJson(
|
||||
json['canary'] as Map<String, dynamic>)
|
||||
: const OfflineCanaryModelConfig(),
|
||||
tokens: json['tokens'] as String,
|
||||
numThreads: json['numThreads'] as int? ?? 1,
|
||||
debug: json['debug'] as bool? ?? true,
|
||||
@@ -375,7 +418,7 @@ class OfflineModelConfig {
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, fireRedAsr: $fireRedAsr, dolphin: $dolphin, zipformerCtc: $zipformerCtc, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
|
||||
return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, fireRedAsr: $fireRedAsr, dolphin: $dolphin, zipformerCtc: $zipformerCtc, canary: $canary, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
|
||||
}
|
||||
|
||||
Map<String, dynamic> toJson() => {
|
||||
@@ -389,6 +432,7 @@ class OfflineModelConfig {
|
||||
'fireRedAsr': fireRedAsr.toJson(),
|
||||
'dolphin': dolphin.toJson(),
|
||||
'zipformerCtc': zipformerCtc.toJson(),
|
||||
'canary': canary.toJson(),
|
||||
'tokens': tokens,
|
||||
'numThreads': numThreads,
|
||||
'debug': debug,
|
||||
@@ -409,6 +453,7 @@ class OfflineModelConfig {
|
||||
final OfflineFireRedAsrModelConfig fireRedAsr;
|
||||
final OfflineDolphinModelConfig dolphin;
|
||||
final OfflineZipformerCtcModelConfig zipformerCtc;
|
||||
final OfflineCanaryModelConfig canary;
|
||||
|
||||
final String tokens;
|
||||
final int numThreads;
|
||||
@@ -549,7 +594,28 @@ class OfflineRecognizer {
|
||||
|
||||
/// The user is responsible to call the OfflineRecognizer.free()
|
||||
/// method of the returned instance to avoid memory leak.
|
||||
|
||||
factory OfflineRecognizer(OfflineRecognizerConfig config) {
|
||||
final c = convertConfig(config);
|
||||
|
||||
final ptr = SherpaOnnxBindings.createOfflineRecognizer?.call(c) ?? nullptr;
|
||||
|
||||
freeConfig(c);
|
||||
|
||||
return OfflineRecognizer._(ptr: ptr, config: config);
|
||||
}
|
||||
|
||||
void setConfig(OfflineRecognizerConfig config) {
|
||||
final c = convertConfig(config);
|
||||
|
||||
SherpaOnnxBindings.offlineRecognizerSetConfig?.call(ptr, c);
|
||||
|
||||
freeConfig(c);
|
||||
// we don't update this.config
|
||||
}
|
||||
|
||||
static Pointer<SherpaOnnxOfflineRecognizerConfig> convertConfig(
|
||||
OfflineRecognizerConfig config) {
|
||||
final c = calloc<SherpaOnnxOfflineRecognizerConfig>();
|
||||
|
||||
c.ref.feat.sampleRate = config.feat.sampleRate;
|
||||
@@ -609,6 +675,12 @@ class OfflineRecognizer {
|
||||
c.ref.model.zipformerCtc.model =
|
||||
config.model.zipformerCtc.model.toNativeUtf8();
|
||||
|
||||
c.ref.model.canary.encoder = config.model.canary.encoder.toNativeUtf8();
|
||||
c.ref.model.canary.decoder = config.model.canary.decoder.toNativeUtf8();
|
||||
c.ref.model.canary.srcLang = config.model.canary.srcLang.toNativeUtf8();
|
||||
c.ref.model.canary.tgtLang = config.model.canary.tgtLang.toNativeUtf8();
|
||||
c.ref.model.canary.usePnc = config.model.canary.usePnc ? 1 : 0;
|
||||
|
||||
c.ref.model.tokens = config.model.tokens.toNativeUtf8();
|
||||
|
||||
c.ref.model.numThreads = config.model.numThreads;
|
||||
@@ -637,8 +709,10 @@ class OfflineRecognizer {
|
||||
c.ref.hr.lexicon = config.hr.lexicon.toNativeUtf8();
|
||||
c.ref.hr.ruleFsts = config.hr.ruleFsts.toNativeUtf8();
|
||||
|
||||
final ptr = SherpaOnnxBindings.createOfflineRecognizer?.call(c) ?? nullptr;
|
||||
return c;
|
||||
}
|
||||
|
||||
static void freeConfig(Pointer<SherpaOnnxOfflineRecognizerConfig> c) {
|
||||
calloc.free(c.ref.hr.dictDir);
|
||||
calloc.free(c.ref.hr.lexicon);
|
||||
calloc.free(c.ref.hr.ruleFsts);
|
||||
@@ -653,6 +727,10 @@ class OfflineRecognizer {
|
||||
calloc.free(c.ref.model.modelType);
|
||||
calloc.free(c.ref.model.provider);
|
||||
calloc.free(c.ref.model.tokens);
|
||||
calloc.free(c.ref.model.canary.tgtLang);
|
||||
calloc.free(c.ref.model.canary.srcLang);
|
||||
calloc.free(c.ref.model.canary.decoder);
|
||||
calloc.free(c.ref.model.canary.encoder);
|
||||
calloc.free(c.ref.model.zipformerCtc.model);
|
||||
calloc.free(c.ref.model.dolphin.model);
|
||||
calloc.free(c.ref.model.fireRedAsr.decoder);
|
||||
@@ -674,8 +752,6 @@ class OfflineRecognizer {
|
||||
calloc.free(c.ref.model.transducer.decoder);
|
||||
calloc.free(c.ref.model.transducer.joiner);
|
||||
calloc.free(c);
|
||||
|
||||
return OfflineRecognizer._(ptr: ptr, config: config);
|
||||
}
|
||||
|
||||
/// The user has to invoke stream.free() on the returned instance
|
||||
|
||||
@@ -280,6 +280,16 @@ final class SherpaOnnxOfflineWhisperModelConfig extends Struct {
|
||||
external int tailPaddings;
|
||||
}
|
||||
|
||||
final class SherpaOnnxOfflineCanaryModelConfig extends Struct {
|
||||
external Pointer<Utf8> encoder;
|
||||
external Pointer<Utf8> decoder;
|
||||
external Pointer<Utf8> srcLang;
|
||||
external Pointer<Utf8> tgtLang;
|
||||
|
||||
@Int32()
|
||||
external int usePnc;
|
||||
}
|
||||
|
||||
final class SherpaOnnxOfflineMoonshineModelConfig extends Struct {
|
||||
external Pointer<Utf8> preprocessor;
|
||||
external Pointer<Utf8> encoder;
|
||||
@@ -338,6 +348,7 @@ final class SherpaOnnxOfflineModelConfig extends Struct {
|
||||
external SherpaOnnxOfflineFireRedAsrModelConfig fireRedAsr;
|
||||
external SherpaOnnxOfflineDolphinModelConfig dolphin;
|
||||
external SherpaOnnxOfflineZipformerCtcModelConfig zipformerCtc;
|
||||
external SherpaOnnxOfflineCanaryModelConfig canary;
|
||||
}
|
||||
|
||||
final class SherpaOnnxOfflineRecognizerConfig extends Struct {
|
||||
@@ -876,6 +887,14 @@ typedef CreateOfflineRecognizerNative = Pointer<SherpaOnnxOfflineRecognizer>
|
||||
|
||||
typedef CreateOfflineRecognizer = CreateOfflineRecognizerNative;
|
||||
|
||||
typedef OfflineRecognizerSetConfigNative = Void Function(
|
||||
Pointer<SherpaOnnxOfflineRecognizer>,
|
||||
Pointer<SherpaOnnxOfflineRecognizerConfig>);
|
||||
|
||||
typedef OfflineRecognizerSetConfig = void Function(
|
||||
Pointer<SherpaOnnxOfflineRecognizer>,
|
||||
Pointer<SherpaOnnxOfflineRecognizerConfig>);
|
||||
|
||||
typedef DestroyOfflineRecognizerNative = Void Function(
|
||||
Pointer<SherpaOnnxOfflineRecognizer>);
|
||||
|
||||
@@ -1341,6 +1360,7 @@ class SherpaOnnxBindings {
|
||||
|
||||
static CreateOfflineRecognizer? createOfflineRecognizer;
|
||||
static DestroyOfflineRecognizer? destroyOfflineRecognizer;
|
||||
static OfflineRecognizerSetConfig? offlineRecognizerSetConfig;
|
||||
static CreateOfflineStream? createOfflineStream;
|
||||
static DestroyOfflineStream? destroyOfflineStream;
|
||||
static AcceptWaveformOffline? acceptWaveformOffline;
|
||||
@@ -1741,6 +1761,11 @@ class SherpaOnnxBindings {
|
||||
'SherpaOnnxDestroyOfflineRecognizer')
|
||||
.asFunction();
|
||||
|
||||
offlineRecognizerSetConfig ??= dynamicLibrary
|
||||
.lookup<NativeFunction<OfflineRecognizerSetConfigNative>>(
|
||||
'SherpaOnnxOfflineRecognizerSetConfig')
|
||||
.asFunction();
|
||||
|
||||
createOfflineStream ??= dynamicLibrary
|
||||
.lookup<NativeFunction<CreateOfflineStreamNative>>(
|
||||
'SherpaOnnxCreateOfflineStream')
|
||||
|
||||
@@ -17,7 +17,7 @@ topics:
|
||||
- voice-activity-detection
|
||||
|
||||
# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
|
||||
version: 1.12.4
|
||||
version: 1.12.5
|
||||
|
||||
homepage: https://github.com/k2-fsa/sherpa-onnx
|
||||
|
||||
@@ -30,23 +30,23 @@ dependencies:
|
||||
flutter:
|
||||
sdk: flutter
|
||||
|
||||
sherpa_onnx_android: ^1.12.4
|
||||
sherpa_onnx_android: ^1.12.5
|
||||
# sherpa_onnx_android:
|
||||
# path: ../sherpa_onnx_android
|
||||
|
||||
sherpa_onnx_macos: ^1.12.4
|
||||
sherpa_onnx_macos: ^1.12.5
|
||||
# sherpa_onnx_macos:
|
||||
# path: ../sherpa_onnx_macos
|
||||
|
||||
sherpa_onnx_linux: ^1.12.4
|
||||
sherpa_onnx_linux: ^1.12.5
|
||||
# sherpa_onnx_linux:
|
||||
# path: ../sherpa_onnx_linux
|
||||
|
||||
sherpa_onnx_windows: ^1.12.4
|
||||
sherpa_onnx_windows: ^1.12.5
|
||||
# sherpa_onnx_windows:
|
||||
# path: ../sherpa_onnx_windows
|
||||
|
||||
sherpa_onnx_ios: ^1.12.4
|
||||
sherpa_onnx_ios: ^1.12.5
|
||||
# sherpa_onnx_ios:
|
||||
# path: ../sherpa_onnx_ios
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c
|
||||
Pod::Spec.new do |s|
|
||||
s.name = 'sherpa_onnx_ios'
|
||||
s.version = '1.12.4'
|
||||
s.version = '1.12.5'
|
||||
s.summary = 'A new Flutter FFI plugin project.'
|
||||
s.description = <<-DESC
|
||||
A new Flutter FFI plugin project.
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
#
|
||||
Pod::Spec.new do |s|
|
||||
s.name = 'sherpa_onnx_macos'
|
||||
s.version = '1.12.4'
|
||||
s.version = '1.12.5'
|
||||
s.summary = 'sherpa-onnx Flutter FFI plugin project.'
|
||||
s.description = <<-DESC
|
||||
sherpa-onnx Flutter FFI plugin project.
|
||||
|
||||
17
go-api-examples/non-streaming-canary-decode-files/go.mod
Normal file
17
go-api-examples/non-streaming-canary-decode-files/go.mod
Normal file
@@ -0,0 +1,17 @@
|
||||
module non-streaming-canary-decode-files
|
||||
|
||||
go 1.17
|
||||
|
||||
require (
|
||||
github.com/k2-fsa/sherpa-onnx-go v1.12.4
|
||||
github.com/spf13/pflag v1.0.6
|
||||
github.com/youpy/go-wav v0.3.2
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/k2-fsa/sherpa-onnx-go-linux v1.12.4 // indirect
|
||||
github.com/k2-fsa/sherpa-onnx-go-macos v1.12.4 // indirect
|
||||
github.com/k2-fsa/sherpa-onnx-go-windows v1.12.4 // indirect
|
||||
github.com/youpy/go-riff v0.1.0 // indirect
|
||||
github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b // indirect
|
||||
)
|
||||
113
go-api-examples/non-streaming-canary-decode-files/main.go
Normal file
113
go-api-examples/non-streaming-canary-decode-files/main.go
Normal file
@@ -0,0 +1,113 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
|
||||
"github.com/youpy/go-wav"
|
||||
)
|
||||
|
||||
func main() {
|
||||
log.SetFlags(log.LstdFlags | log.Lmicroseconds)
|
||||
|
||||
config := sherpa.OfflineRecognizerConfig{}
|
||||
|
||||
config.ModelConfig.Canary.Encoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx"
|
||||
config.ModelConfig.Canary.Decoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx"
|
||||
config.ModelConfig.Canary.SrcLang = "en"
|
||||
config.ModelConfig.Canary.TgtLang = "en"
|
||||
config.ModelConfig.Canary.UsePnc = 1
|
||||
config.ModelConfig.Tokens = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt"
|
||||
|
||||
waveFilename := "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav"
|
||||
|
||||
samples, sampleRate := readWave(waveFilename)
|
||||
|
||||
log.Println("Initializing recognizer (may take several seconds)")
|
||||
recognizer := sherpa.NewOfflineRecognizer(&config)
|
||||
log.Println("Recognizer created!")
|
||||
defer sherpa.DeleteOfflineRecognizer(recognizer)
|
||||
|
||||
log.Println("Start decoding!")
|
||||
stream := sherpa.NewOfflineStream(recognizer)
|
||||
defer sherpa.DeleteOfflineStream(stream)
|
||||
|
||||
stream.AcceptWaveform(sampleRate, samples)
|
||||
|
||||
recognizer.Decode(stream)
|
||||
log.Println("Decoding done!")
|
||||
result := stream.GetResult()
|
||||
|
||||
log.Println("Text in English: " + strings.ToLower(result.Text))
|
||||
|
||||
s := sherpa.NewOfflineStream(recognizer)
|
||||
defer sherpa.DeleteOfflineStream(s)
|
||||
|
||||
s.AcceptWaveform(sampleRate, samples)
|
||||
|
||||
config.ModelConfig.Canary.TgtLang = "de"
|
||||
recognizer.SetConfig(&config)
|
||||
recognizer.Decode(s)
|
||||
result = s.GetResult()
|
||||
|
||||
log.Println("Text in German: " + strings.ToLower(result.Text))
|
||||
}
|
||||
|
||||
func readWave(filename string) (samples []float32, sampleRate int) {
|
||||
file, _ := os.Open(filename)
|
||||
defer file.Close()
|
||||
|
||||
reader := wav.NewReader(file)
|
||||
format, err := reader.Format()
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to read wave format")
|
||||
}
|
||||
|
||||
if format.AudioFormat != 1 {
|
||||
log.Fatalf("Support only PCM format. Given: %v\n", format.AudioFormat)
|
||||
}
|
||||
|
||||
if format.NumChannels != 1 {
|
||||
log.Fatalf("Support only 1 channel wave file. Given: %v\n", format.NumChannels)
|
||||
}
|
||||
|
||||
if format.BitsPerSample != 16 {
|
||||
log.Fatalf("Support only 16-bit per sample. Given: %v\n", format.BitsPerSample)
|
||||
}
|
||||
|
||||
reader.Duration() // so that it initializes reader.Size
|
||||
|
||||
buf := make([]byte, reader.Size)
|
||||
n, err := reader.Read(buf)
|
||||
if n != int(reader.Size) {
|
||||
log.Fatalf("Failed to read %v bytes. Returned %v bytes\n", reader.Size, n)
|
||||
}
|
||||
|
||||
samples = samplesInt16ToFloat(buf)
|
||||
sampleRate = int(format.SampleRate)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func samplesInt16ToFloat(inSamples []byte) []float32 {
|
||||
numSamples := len(inSamples) / 2
|
||||
outSamples := make([]float32, numSamples)
|
||||
|
||||
for i := 0; i != numSamples; i++ {
|
||||
s := inSamples[i*2 : (i+1)*2]
|
||||
|
||||
var s16 int16
|
||||
buf := bytes.NewReader(s)
|
||||
err := binary.Read(buf, binary.LittleEndian, &s16)
|
||||
if err != nil {
|
||||
log.Fatal("Failed to parse 16-bit sample")
|
||||
}
|
||||
outSamples[i] = float32(s16) / 32768
|
||||
}
|
||||
|
||||
return outSamples
|
||||
}
|
||||
13
go-api-examples/non-streaming-canary-decode-files/run.sh
Executable file
13
go-api-examples/non-streaming-canary-decode-files/run.sh
Executable file
@@ -0,0 +1,13 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
fi
|
||||
|
||||
go mod tidy
|
||||
go build
|
||||
./non-streaming-canary-decode-files
|
||||
@@ -1,7 +1,7 @@
|
||||
/**
|
||||
* Use these variables when you tailor your ArkTS code. They must be of the const type.
|
||||
*/
|
||||
export const HAR_VERSION = '1.12.4';
|
||||
export const HAR_VERSION = '1.12.5';
|
||||
export const BUILD_MODE_NAME = 'debug';
|
||||
export const DEBUG = true;
|
||||
export const TARGET_NAME = 'default';
|
||||
|
||||
@@ -7,6 +7,7 @@ export { Samples,
|
||||
OfflineStream,
|
||||
FeatureConfig,
|
||||
HomophoneReplacerConfig,
|
||||
OfflineCanaryModelConfig,
|
||||
OfflineDolphinModelConfig,
|
||||
OfflineTransducerModelConfig,
|
||||
OfflineParaformerModelConfig,
|
||||
|
||||
@@ -23,7 +23,7 @@ or update your `oh-package.json5` to include the following:
|
||||
|
||||
```
|
||||
"dependencies": {
|
||||
"sherpa_onnx": "1.12.4",
|
||||
"sherpa_onnx": "1.12.5",
|
||||
},
|
||||
```
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "sherpa_onnx",
|
||||
"version": "1.12.4",
|
||||
"version": "1.12.5",
|
||||
"description": "On-device speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without Internet connection",
|
||||
"main": "Index.ets",
|
||||
"author": "The next-gen Kaldi team",
|
||||
|
||||
@@ -93,6 +93,27 @@ static SherpaOnnxOfflineNemoEncDecCtcModelConfig GetOfflineNeMoCtcModelConfig(
|
||||
return c;
|
||||
}
|
||||
|
||||
static SherpaOnnxOfflineCanaryModelConfig GetOfflineCanaryModelConfig(
|
||||
Napi::Object obj) {
|
||||
SherpaOnnxOfflineCanaryModelConfig c;
|
||||
memset(&c, 0, sizeof(c));
|
||||
c.use_pnc = 1; // Align default with JS default
|
||||
|
||||
if (!obj.Has("canary") || !obj.Get("canary").IsObject()) {
|
||||
return c;
|
||||
}
|
||||
|
||||
Napi::Object o = obj.Get("canary").As<Napi::Object>();
|
||||
|
||||
SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_STR(src_lang, srcLang);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_STR(tgt_lang, tgtLang);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_INT32(use_pnc, usePnc);
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
static SherpaOnnxOfflineWhisperModelConfig GetOfflineWhisperModelConfig(
|
||||
Napi::Object obj) {
|
||||
SherpaOnnxOfflineWhisperModelConfig c;
|
||||
@@ -203,6 +224,7 @@ static SherpaOnnxOfflineModelConfig GetOfflineModelConfig(Napi::Object obj) {
|
||||
c.fire_red_asr = GetOfflineFireRedAsrModelConfig(o);
|
||||
c.dolphin = GetOfflineDolphinModelConfig(o);
|
||||
c.zipformer_ctc = GetOfflineZipformerCtcModelConfig(o);
|
||||
c.canary = GetOfflineCanaryModelConfig(o);
|
||||
|
||||
SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
|
||||
@@ -241,6 +263,78 @@ static SherpaOnnxOfflineLMConfig GetOfflineLMConfig(Napi::Object obj) {
|
||||
return c;
|
||||
}
|
||||
|
||||
static SherpaOnnxOfflineRecognizerConfig ParseConfig(Napi::Object o) {
|
||||
SherpaOnnxOfflineRecognizerConfig c;
|
||||
memset(&c, 0, sizeof(c));
|
||||
c.feat_config = GetFeatureConfig(o);
|
||||
c.model_config = GetOfflineModelConfig(o);
|
||||
c.lm_config = GetOfflineLMConfig(o);
|
||||
c.hr = GetHomophoneReplacerConfig(o);
|
||||
|
||||
SHERPA_ONNX_ASSIGN_ATTR_STR(decoding_method, decodingMethod);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active_paths, maxActivePaths);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_STR(hotwords_file, hotwordsFile);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(hotwords_score, hotwordsScore);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fsts, ruleFsts);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(blank_penalty, blankPenalty);
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
static void FreeConfig(const SherpaOnnxOfflineRecognizerConfig &c) {
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.encoder);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.decoder);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.joiner);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.model);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.nemo_ctc.model);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.encoder);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.decoder);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.language);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.task);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.tdnn.model);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.sense_voice.model);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.sense_voice.language);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.preprocessor);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.encoder);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.uncached_decoder);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.cached_decoder);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.fire_red_asr.encoder);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.fire_red_asr.decoder);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.dolphin.model);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.zipformer_ctc.model);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.encoder);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.decoder);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.src_lang);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.tgt_lang);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.provider);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.model_type);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.modeling_unit);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.bpe_vocab);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.telespeech_ctc);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.lm_config.model);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.decoding_method);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.hotwords_file);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.rule_fsts);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.rule_fars);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.hr.dict_dir);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.hr.lexicon);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.hr.rule_fsts);
|
||||
}
|
||||
|
||||
static Napi::External<SherpaOnnxOfflineRecognizer>
|
||||
CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {
|
||||
Napi::Env env = info.Env();
|
||||
@@ -274,20 +368,7 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {
|
||||
|
||||
Napi::Object o = info[0].As<Napi::Object>();
|
||||
|
||||
SherpaOnnxOfflineRecognizerConfig c;
|
||||
memset(&c, 0, sizeof(c));
|
||||
c.feat_config = GetFeatureConfig(o);
|
||||
c.model_config = GetOfflineModelConfig(o);
|
||||
c.lm_config = GetOfflineLMConfig(o);
|
||||
c.hr = GetHomophoneReplacerConfig(o);
|
||||
|
||||
SHERPA_ONNX_ASSIGN_ATTR_STR(decoding_method, decodingMethod);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active_paths, maxActivePaths);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_STR(hotwords_file, hotwordsFile);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(hotwords_score, hotwordsScore);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fsts, ruleFsts);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(blank_penalty, blankPenalty);
|
||||
SherpaOnnxOfflineRecognizerConfig c = ParseConfig(o);
|
||||
|
||||
#if __OHOS__
|
||||
std::unique_ptr<NativeResourceManager,
|
||||
@@ -302,51 +383,7 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {
|
||||
SherpaOnnxCreateOfflineRecognizer(&c);
|
||||
#endif
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.encoder);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.decoder);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.joiner);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.model);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.nemo_ctc.model);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.encoder);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.decoder);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.language);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.task);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.tdnn.model);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.sense_voice.model);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.sense_voice.language);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.preprocessor);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.encoder);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.uncached_decoder);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.cached_decoder);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.fire_red_asr.encoder);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.fire_red_asr.decoder);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.dolphin.model);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.zipformer_ctc.model);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.provider);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.model_type);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.modeling_unit);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.bpe_vocab);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.telespeech_ctc);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.lm_config.model);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.decoding_method);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.hotwords_file);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.rule_fsts);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.rule_fars);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.hr.dict_dir);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.hr.lexicon);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.hr.rule_fsts);
|
||||
FreeConfig(c);
|
||||
|
||||
if (!recognizer) {
|
||||
Napi::TypeError::New(env, "Please check your config!")
|
||||
@@ -470,6 +507,43 @@ static void AcceptWaveformOfflineWrapper(const Napi::CallbackInfo &info) {
|
||||
#endif
|
||||
}
|
||||
|
||||
static void OfflineRecognizerSetConfigWrapper(const Napi::CallbackInfo &info) {
|
||||
Napi::Env env = info.Env();
|
||||
if (info.Length() != 2) {
|
||||
std::ostringstream os;
|
||||
os << "Expect only 2 arguments. Given: " << info.Length();
|
||||
|
||||
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (!info[0].IsExternal()) {
|
||||
Napi::TypeError::New(env,
|
||||
"Argument 0 should be an offline recognizer pointer.")
|
||||
.ThrowAsJavaScriptException();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (!info[1].IsObject()) {
|
||||
Napi::TypeError::New(env, "Expect an object as the second argument")
|
||||
.ThrowAsJavaScriptException();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
Napi::Object o = info[1].As<Napi::Object>();
|
||||
SherpaOnnxOfflineRecognizerConfig c = ParseConfig(o);
|
||||
|
||||
const SherpaOnnxOfflineRecognizer *recognizer =
|
||||
info[0].As<Napi::External<SherpaOnnxOfflineRecognizer>>().Data();
|
||||
|
||||
SherpaOnnxOfflineRecognizerSetConfig(recognizer, &c);
|
||||
|
||||
FreeConfig(c);
|
||||
}
|
||||
|
||||
static void DecodeOfflineStreamWrapper(const Napi::CallbackInfo &info) {
|
||||
Napi::Env env = info.Env();
|
||||
if (info.Length() != 2) {
|
||||
@@ -548,6 +622,9 @@ void InitNonStreamingAsr(Napi::Env env, Napi::Object exports) {
|
||||
exports.Set(Napi::String::New(env, "decodeOfflineStream"),
|
||||
Napi::Function::New(env, DecodeOfflineStreamWrapper));
|
||||
|
||||
exports.Set(Napi::String::New(env, "offlineRecognizerSetConfig"),
|
||||
Napi::Function::New(env, OfflineRecognizerSetConfigWrapper));
|
||||
|
||||
exports.Set(Napi::String::New(env, "getOfflineStreamResultAsJson"),
|
||||
Napi::Function::New(env, GetOfflineStreamResultAsJsonWrapper));
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ export const voiceActivityDetectorFlush: (handle: object) => void;
|
||||
|
||||
export const createOfflineRecognizer: (config: object, mgr?: object) => object;
|
||||
export const createOfflineStream: (handle: object) => object;
|
||||
export const offlineRecognizerSetConfig: (handle: object, config: object) => void;
|
||||
export const acceptWaveformOffline: (handle: object, audio: object) => void;
|
||||
export const decodeOfflineStream: (handle: object, streamHandle: object) => void;
|
||||
export const getOfflineStreamResultAsJson: (streamHandle: object) => string;
|
||||
|
||||
@@ -4,6 +4,7 @@ import {
|
||||
createOfflineStream,
|
||||
decodeOfflineStream,
|
||||
getOfflineStreamResultAsJson,
|
||||
offlineRecognizerSetConfig,
|
||||
} from 'libsherpa_onnx.so';
|
||||
|
||||
export interface Samples {
|
||||
@@ -67,6 +68,14 @@ export class OfflineWhisperModelConfig {
|
||||
public tailPaddings: number = -1;
|
||||
}
|
||||
|
||||
export class OfflineCanaryModelConfig {
|
||||
public encoder: string = '';
|
||||
public decoder: string = '';
|
||||
public srcLang: string = '';
|
||||
public tgtLang: string = '';
|
||||
public usePnc: number = 1;
|
||||
}
|
||||
|
||||
export class OfflineTdnnModelConfig {
|
||||
public model: string = '';
|
||||
}
|
||||
@@ -102,6 +111,7 @@ export class OfflineModelConfig {
|
||||
public moonshine: OfflineMoonshineModelConfig = new OfflineMoonshineModelConfig();
|
||||
public dolphin: OfflineDolphinModelConfig = new OfflineDolphinModelConfig();
|
||||
public zipformerCtc: OfflineZipformerCtcModelConfig = new OfflineZipformerCtcModelConfig();
|
||||
public canary: OfflineCanaryModelConfig = new OfflineCanaryModelConfig();
|
||||
}
|
||||
|
||||
export class OfflineLMConfig {
|
||||
@@ -151,6 +161,10 @@ export class OfflineRecognizer {
|
||||
this.config = config
|
||||
}
|
||||
|
||||
setConfig(config: OfflineRecognizerConfig) {
|
||||
offlineRecognizerSetConfig(this.handle, config);
|
||||
}
|
||||
|
||||
createStream(): OfflineStream {
|
||||
const handle: object = createOfflineStream(this.handle);
|
||||
return new OfflineStream(handle);
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
"author": "",
|
||||
"license": "",
|
||||
"dependencies": {
|
||||
"sherpa_onnx": "1.12.4"
|
||||
"sherpa_onnx": "1.12.5"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
"author": "",
|
||||
"license": "",
|
||||
"dependencies": {
|
||||
"sherpa_onnx": "1.12.4",
|
||||
"sherpa_onnx": "1.12.5",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
"author": "",
|
||||
"license": "",
|
||||
"dependencies": {
|
||||
"sherpa_onnx": "1.12.4",
|
||||
"sherpa_onnx": "1.12.5",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
"author": "",
|
||||
"license": "",
|
||||
"dependencies": {
|
||||
"sherpa_onnx": "1.12.4",
|
||||
"sherpa_onnx": "1.12.5",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Introduction
|
||||
|
||||
Please download ./sherpa_onnx-v1.12.4.har
|
||||
Please download ./sherpa_onnx-v1.12.5.har
|
||||
from <https://huggingface.co/csukuangfj/sherpa-onnx-harmony-os/tree/main/har>
|
||||
|
||||
Hint: For users who have no access to huggingface, please use
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
"license": "",
|
||||
"dependencies": {
|
||||
// please see https://ohpm.openharmony.cn/#/cn/detail/sherpa_onnx
|
||||
"sherpa_onnx": "1.12.4",
|
||||
"sherpa_onnx": "1.12.5",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
56
java-api-examples/NonStreamingDecodeFileNemoCanary.java
Normal file
56
java-api-examples/NonStreamingDecodeFileNemoCanary.java
Normal file
@@ -0,0 +1,56 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
// This file shows how to use an offline NeMo Canary model, i.e.,
|
||||
// non-streaming NeMo Canary model, to decode files.
|
||||
import com.k2fsa.sherpa.onnx.*;
|
||||
|
||||
public class NonStreamingDecodeFileNemoCanary {
|
||||
public static void main(String[] args) {
|
||||
// please refer to
|
||||
// https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html
|
||||
// to download model files
|
||||
String encoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx";
|
||||
String decoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx";
|
||||
String tokens = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt";
|
||||
|
||||
String waveFilename = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav";
|
||||
|
||||
WaveReader reader = new WaveReader(waveFilename);
|
||||
|
||||
OfflineCanaryModelConfig canary =
|
||||
OfflineCanaryModelConfig.builder()
|
||||
.setEncoder(encoder)
|
||||
.setDecoder(decoder)
|
||||
.setSrcLang("en")
|
||||
.setTgtLang("en")
|
||||
.setUsePnc(true)
|
||||
.build();
|
||||
|
||||
OfflineModelConfig modelConfig =
|
||||
OfflineModelConfig.builder()
|
||||
.setCanary(canary)
|
||||
.setTokens(tokens)
|
||||
.setNumThreads(1)
|
||||
.setDebug(true)
|
||||
.build();
|
||||
|
||||
OfflineRecognizerConfig config =
|
||||
OfflineRecognizerConfig.builder()
|
||||
.setOfflineModelConfig(modelConfig)
|
||||
.setDecodingMethod("greedy_search")
|
||||
.build();
|
||||
|
||||
OfflineRecognizer recognizer = new OfflineRecognizer(config);
|
||||
OfflineStream stream = recognizer.createStream();
|
||||
stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
|
||||
|
||||
recognizer.decode(stream);
|
||||
|
||||
String text = recognizer.getResult(stream).getText();
|
||||
|
||||
System.out.printf("filename:%s\nresult(English):%s\n", waveFilename, text);
|
||||
|
||||
stream.release();
|
||||
recognizer.release();
|
||||
}
|
||||
}
|
||||
@@ -24,11 +24,18 @@ This directory contains examples for the JAVA API of sherpa-onnx.
|
||||
|
||||
```bash
|
||||
./run-non-streaming-decode-file-dolphin-ctc.sh
|
||||
./run-non-streaming-decode-file-fire-red-asr.sh
|
||||
./run-non-streaming-decode-file-moonshine.sh
|
||||
./run-non-streaming-decode-file-nemo-canary.sh
|
||||
./run-non-streaming-decode-file-nemo.sh
|
||||
./run-non-streaming-decode-file-paraformer.sh
|
||||
./run-non-streaming-decode-file-sense-voice.sh
|
||||
./run-non-streaming-decode-file-tele-speech-ctc.sh
|
||||
./run-non-streaming-decode-file-transducer-hotwords.sh
|
||||
./run-non-streaming-decode-file-transducer.sh
|
||||
./run-non-streaming-decode-file-whisper-multiple.sh
|
||||
./run-non-streaming-decode-file-whisper.sh
|
||||
./run-non-streaming-decode-file-nemo.sh
|
||||
./run-non-streaming-decode-file-zipformer-ctc.sh
|
||||
```
|
||||
|
||||
## Non-Streaming Speech recognition with homophone replacer
|
||||
|
||||
37
java-api-examples/run-non-streaming-decode-file-nemo-canary.sh
Executable file
37
java-api-examples/run-non-streaming-decode-file-nemo-canary.sh
Executable file
@@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
|
||||
mkdir -p ../build
|
||||
pushd ../build
|
||||
cmake \
|
||||
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_JNI=ON \
|
||||
..
|
||||
|
||||
make -j4
|
||||
ls -lh lib
|
||||
popd
|
||||
fi
|
||||
|
||||
if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
|
||||
pushd ../sherpa-onnx/java-api
|
||||
make
|
||||
popd
|
||||
fi
|
||||
|
||||
if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
fi
|
||||
|
||||
java \
|
||||
-Djava.library.path=$PWD/../build/lib \
|
||||
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
|
||||
NonStreamingDecodeFileNemoCanary.java
|
||||
@@ -2,8 +2,8 @@ jdk:
|
||||
- openjdk17
|
||||
|
||||
before_install:
|
||||
- wget https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.4/sherpa-onnx-1.12.4.aar
|
||||
- wget https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.5/sherpa-onnx-1.12.5.aar
|
||||
|
||||
install:
|
||||
- FILE="-Dfile=sherpa-onnx-1.12.4.aar"
|
||||
- mvn install:install-file $FILE -DgroupId=com.k2fsa.sherpa.onnx -DartifactId=sherpa-onnx -Dversion=1.12.4 -Dpackaging=aar -DgeneratePom=true
|
||||
- FILE="-Dfile=sherpa-onnx-1.12.5.aar"
|
||||
- mvn install:install-file $FILE -DgroupId=com.k2fsa.sherpa.onnx -DartifactId=sherpa-onnx -Dversion=1.12.5 -Dpackaging=aar -DgeneratePom=true
|
||||
|
||||
@@ -455,8 +455,31 @@ function testOfflineSenseVoiceWithHr() {
|
||||
ls -lh $out_filename
|
||||
java -Djava.library.path=../build/lib -jar $out_filename
|
||||
}
|
||||
testVersion
|
||||
|
||||
function testOfflineNeMoCanary() {
|
||||
if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
fi
|
||||
|
||||
out_filename=test_offline_nemo_canary.jar
|
||||
kotlinc-jvm -include-runtime -d $out_filename \
|
||||
test_offline_nemo_canary.kt \
|
||||
FeatureConfig.kt \
|
||||
HomophoneReplacerConfig.kt \
|
||||
OfflineRecognizer.kt \
|
||||
OfflineStream.kt \
|
||||
WaveReader.kt \
|
||||
faked-asset-manager.kt
|
||||
|
||||
ls -lh $out_filename
|
||||
java -Djava.library.path=../build/lib -jar $out_filename
|
||||
}
|
||||
|
||||
# testVersion
|
||||
|
||||
testOfflineNeMoCanary
|
||||
testOfflineSenseVoiceWithHr
|
||||
testOfflineSpeechDenoiser
|
||||
testOfflineSpeakerDiarization
|
||||
|
||||
48
kotlin-api-examples/test_offline_nemo_canary.kt
Normal file
48
kotlin-api-examples/test_offline_nemo_canary.kt
Normal file
@@ -0,0 +1,48 @@
|
||||
package com.k2fsa.sherpa.onnx
|
||||
|
||||
fun main() {
|
||||
val recognizer = createOfflineRecognizer()
|
||||
val waveFilename = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav"
|
||||
|
||||
val objArray = WaveReader.readWaveFromFile(
|
||||
filename = waveFilename,
|
||||
)
|
||||
val samples: FloatArray = objArray[0] as FloatArray
|
||||
val sampleRate: Int = objArray[1] as Int
|
||||
|
||||
var stream = recognizer.createStream()
|
||||
stream.acceptWaveform(samples, sampleRate=sampleRate)
|
||||
recognizer.decode(stream)
|
||||
|
||||
var result = recognizer.getResult(stream)
|
||||
println("English: $result")
|
||||
|
||||
stream.release()
|
||||
|
||||
// now output text in German
|
||||
val config = recognizer.config.copy(modelConfig=recognizer.config.modelConfig.copy(
|
||||
canary=recognizer.config.modelConfig.canary.copy(
|
||||
tgtLang="de"
|
||||
)
|
||||
))
|
||||
recognizer.setConfig(config)
|
||||
|
||||
stream = recognizer.createStream()
|
||||
stream.acceptWaveform(samples, sampleRate=sampleRate)
|
||||
recognizer.decode(stream)
|
||||
|
||||
result = recognizer.getResult(stream)
|
||||
println("German: $result")
|
||||
|
||||
stream.release()
|
||||
recognizer.release()
|
||||
}
|
||||
|
||||
|
||||
fun createOfflineRecognizer(): OfflineRecognizer {
|
||||
val config = OfflineRecognizerConfig(
|
||||
modelConfig = getOfflineModelConfig(type = 32)!!,
|
||||
)
|
||||
|
||||
return OfflineRecognizer(config = config)
|
||||
}
|
||||
@@ -5,9 +5,9 @@ for speech recognition.
|
||||
|
||||
|Directory| Pre-built exe (x64)|Pre-built exe (x86)| Description|
|
||||
|---------|--------------------|-------------------|------------|
|
||||
|[./NonStreamingSpeechRecognition](./NonStreamingSpeechRecognition)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.4/sherpa-onnx-non-streaming-asr-x64-v1.12.4.exe)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.4/sherpa-onnx-non-streaming-asr-x86-v1.12.4.exe)| Non-streaming speech recognition|
|
||||
|[./StreamingSpeechRecognition](./StreamingSpeechRecognition)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.4/sherpa-onnx-streaming-asr-x64-v1.12.4.exe)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.4/sherpa-onnx-streaming-asr-x86-v1.12.4.exe)| Streaming speech recognition|
|
||||
|[./NonStreamingTextToSpeech](./NonStreamingTextToSpeech)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.4/sherpa-onnx-non-streaming-tts-x64-v1.12.4.exe)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.4/sherpa-onnx-non-streaming-tts-x86-v1.12.4.exe)| Non-streaming text to speech|
|
||||
|[./NonStreamingSpeechRecognition](./NonStreamingSpeechRecognition)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.5/sherpa-onnx-non-streaming-asr-x64-v1.12.5.exe)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.5/sherpa-onnx-non-streaming-asr-x86-v1.12.5.exe)| Non-streaming speech recognition|
|
||||
|[./StreamingSpeechRecognition](./StreamingSpeechRecognition)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.5/sherpa-onnx-streaming-asr-x64-v1.12.5.exe)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.5/sherpa-onnx-streaming-asr-x86-v1.12.5.exe)| Streaming speech recognition|
|
||||
|[./NonStreamingTextToSpeech](./NonStreamingTextToSpeech)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.5/sherpa-onnx-non-streaming-tts-x64-v1.12.5.exe)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.5/sherpa-onnx-non-streaming-tts-x86-v1.12.5.exe)| Non-streaming text to speech|
|
||||
|
||||
Caution: You need to use Windows and install Visual Studio 2022 in order to
|
||||
compile it.
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
old_version="1\.12\.3"
|
||||
new_version="1\.12\.4"
|
||||
old_version="1\.12\.4"
|
||||
new_version="1\.12\.5"
|
||||
replace_str="s/$old_version/$new_version/g"
|
||||
|
||||
sed -i.bak "$replace_str" ./sherpa-onnx/csrc/version.cc
|
||||
|
||||
@@ -123,6 +123,7 @@ The following tables list the examples in this folder.
|
||||
|[./test_asr_non_streaming_moonshine.js](./test_asr_non_streaming_moonshine.js)|Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine)|
|
||||
|[./test_vad_with_non_streaming_asr_moonshine.js](./test_vad_with_non_streaming_asr_moonshine.js)| Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine) + [Silero VAD](https://github.com/snakers4/silero-vad)|
|
||||
|[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search|
|
||||
|[./test_asr_non_streaming_nemo_canary.js](./test_asr_non_streaming_nemo_canary.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [Canary](https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html#sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8-english-spanish-german-french) model|
|
||||
|[./test_asr_non_streaming_zipformer_ctc.js](./test_asr_non_streaming_zipformer_ctc.js)|Non-streaming speech recognition from a file using a Zipformer CTC model with greedy search|
|
||||
|[./test_asr_non_streaming_nemo_parakeet_tdt_v2.js](./test_asr_non_streaming_nemo_parakeet_tdt_v2.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [parakeet-tdt-0.6b-v2](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/nemo-transducer-models.html#sherpa-onnx-nemo-parakeet-tdt-0-6b-v2-int8-english) model with greedy search|
|
||||
|[./test_asr_non_streaming_dolphin_ctc.js](./test_asr_non_streaming_dolphin_ctc.js)|Non-streaming speech recognition from a file using a [Dolphinhttps://github.com/DataoceanAI/Dolphin]) CTC model with greedy search|
|
||||
@@ -389,6 +390,16 @@ npm install naudiodon2
|
||||
node ./test_vad_asr_non_streaming_zipformer_ctc_microphone.js
|
||||
```
|
||||
|
||||
### Non-streaming speech recognition with NeMo Canary models
|
||||
|
||||
```bash
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
|
||||
node ./test_asr_non_streaming_nemo_canary.js
|
||||
```
|
||||
|
||||
### Non-streaming speech recognition with NeMo CTC models
|
||||
|
||||
```bash
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"dependencies": {
|
||||
"sherpa-onnx-node": "^1.12.4"
|
||||
"sherpa-onnx-node": "^1.12.5"
|
||||
}
|
||||
}
|
||||
|
||||
62
nodejs-addon-examples/test_asr_non_streaming_nemo_canary.js
Normal file
62
nodejs-addon-examples/test_asr_non_streaming_nemo_canary.js
Normal file
@@ -0,0 +1,62 @@
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
const sherpa_onnx = require('sherpa-onnx-node');
|
||||
|
||||
// Please download test files from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||
const config = {
|
||||
'featConfig': {
|
||||
'sampleRate': 16000,
|
||||
'featureDim': 80,
|
||||
},
|
||||
'modelConfig': {
|
||||
'canary': {
|
||||
'encoder':
|
||||
'./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx',
|
||||
'decoder':
|
||||
'./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx',
|
||||
'srcLang': 'en',
|
||||
'tgtLang': 'en',
|
||||
'usePnc': 1,
|
||||
},
|
||||
'tokens':
|
||||
'./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt',
|
||||
'numThreads': 2,
|
||||
'provider': 'cpu',
|
||||
'debug': 0,
|
||||
}
|
||||
};
|
||||
|
||||
const waveFilename =
|
||||
'./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav';
|
||||
|
||||
const recognizer = new sherpa_onnx.OfflineRecognizer(config);
|
||||
console.log('Started')
|
||||
let start = Date.now();
|
||||
let stream = recognizer.createStream();
|
||||
const wave = sherpa_onnx.readWave(waveFilename);
|
||||
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
|
||||
|
||||
recognizer.decode(stream);
|
||||
let result = recognizer.getResult(stream)
|
||||
let stop = Date.now();
|
||||
console.log('Done')
|
||||
|
||||
const elapsed_seconds = (stop - start) / 1000;
|
||||
const duration = wave.samples.length / wave.sampleRate;
|
||||
const real_time_factor = elapsed_seconds / duration;
|
||||
console.log('Wave duration', duration.toFixed(3), 'seconds')
|
||||
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds')
|
||||
console.log(
|
||||
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
|
||||
real_time_factor.toFixed(3))
|
||||
console.log(waveFilename)
|
||||
console.log('result (English)\n', result)
|
||||
|
||||
stream = recognizer.createStream();
|
||||
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
|
||||
recognizer.config.modelConfig.canary.tgtLang = 'de';
|
||||
recognizer.setConfig(recognizer.config);
|
||||
|
||||
recognizer.decode(stream);
|
||||
result = recognizer.getResult(stream)
|
||||
console.log('result (German)\n', result)
|
||||
@@ -63,7 +63,7 @@ for text-to-speech.
|
||||
You can use the following command to run it:
|
||||
|
||||
```bash
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
|
||||
tar xf kokoro-en-v0_19.tar.bz2
|
||||
rm kokoro-en-v0_19.tar.bz2
|
||||
|
||||
@@ -154,6 +154,22 @@ rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
|
||||
node ./test-offline-dolphin-ctc.js
|
||||
```
|
||||
|
||||
## ./test-offline-nemo-canary.js
|
||||
|
||||
[./test-offline-nemo-canary.js](./test-offline-nemo-canary.js) demonstrates
|
||||
how to decode a file with a NeMo Canary model. In the code we use
|
||||
[sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8](https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html#sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8-english-spanish-german-french).
|
||||
|
||||
You can use the following command to run it:
|
||||
|
||||
```bash
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
|
||||
node ./test-offline-nemo-canary.js
|
||||
```
|
||||
|
||||
## ./test-offline-zipformer-ctc.js
|
||||
|
||||
[./test-offline-zipformer-ctc.js](./test-offline-zipformer-ctc.js) demonstrates
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
"dependencies": {
|
||||
"mic": "^2.1.2",
|
||||
"naudiodon2": "^2.4.0",
|
||||
"sherpa-onnx": "^1.12.4",
|
||||
"sherpa-onnx": "^1.12.5",
|
||||
"wav": "^1.0.2"
|
||||
}
|
||||
}
|
||||
|
||||
56
nodejs-examples/test-offline-nemo-canary.js
Normal file
56
nodejs-examples/test-offline-nemo-canary.js
Normal file
@@ -0,0 +1,56 @@
|
||||
// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
//
|
||||
const fs = require('fs');
|
||||
const {Readable} = require('stream');
|
||||
const wav = require('wav');
|
||||
|
||||
const sherpa_onnx = require('sherpa-onnx');
|
||||
|
||||
function createOfflineRecognizer() {
|
||||
let config = {
|
||||
modelConfig: {
|
||||
canary: {
|
||||
encoder:
|
||||
'./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx',
|
||||
decoder:
|
||||
'./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx',
|
||||
srcLang: 'en',
|
||||
tgtLang: 'en',
|
||||
usePnc: 1,
|
||||
},
|
||||
debug: 0,
|
||||
tokens:
|
||||
'./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt',
|
||||
}
|
||||
};
|
||||
|
||||
return sherpa_onnx.createOfflineRecognizer(config);
|
||||
}
|
||||
|
||||
const recognizer = createOfflineRecognizer();
|
||||
let stream = recognizer.createStream();
|
||||
|
||||
const waveFilename =
|
||||
'./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav';
|
||||
const wave = sherpa_onnx.readWave(waveFilename);
|
||||
stream.acceptWaveform(wave.sampleRate, wave.samples);
|
||||
|
||||
recognizer.decode(stream);
|
||||
let text = recognizer.getResult(stream).text;
|
||||
console.log(`text in English: ${text}`);
|
||||
|
||||
stream.free();
|
||||
|
||||
// now output German text
|
||||
recognizer.config.modelConfig.canary.tgtLang = 'de';
|
||||
recognizer.setConfig(recognizer.config);
|
||||
|
||||
stream = recognizer.createStream();
|
||||
stream.acceptWaveform(wave.sampleRate, wave.samples);
|
||||
recognizer.decode(stream);
|
||||
text = recognizer.getResult(stream).text;
|
||||
|
||||
console.log(`text in German: ${text}`);
|
||||
|
||||
stream.free();
|
||||
recognizer.free();
|
||||
@@ -10,3 +10,4 @@ telespeech_ctc
|
||||
moonshine
|
||||
dolphin_ctc
|
||||
zipformer_ctc
|
||||
nemo_canary
|
||||
|
||||
107
pascal-api-examples/non-streaming-asr/nemo_canary.pas
Normal file
107
pascal-api-examples/non-streaming-asr/nemo_canary.pas
Normal file
@@ -0,0 +1,107 @@
|
||||
{ Copyright (c) 2025 Xiaomi Corporation }
|
||||
|
||||
{
|
||||
This file shows how to use a non-streaming NeMo Canary model
|
||||
to decode files.
|
||||
|
||||
You can download the model files from
|
||||
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||
}
|
||||
|
||||
program nemo_canary;
|
||||
|
||||
{$mode objfpc}
|
||||
|
||||
uses
|
||||
sherpa_onnx,
|
||||
DateUtils,
|
||||
SysUtils;
|
||||
|
||||
var
|
||||
Wave: TSherpaOnnxWave;
|
||||
WaveFilename: AnsiString;
|
||||
|
||||
Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||
Recognizer: TSherpaOnnxOfflineRecognizer;
|
||||
Stream: TSherpaOnnxOfflineStream;
|
||||
RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
|
||||
|
||||
Start: TDateTime;
|
||||
Stop: TDateTime;
|
||||
|
||||
Elapsed: Single;
|
||||
Duration: Single;
|
||||
RealTimeFactor: Single;
|
||||
begin
|
||||
Initialize(Config);
|
||||
|
||||
Config.ModelConfig.Canary.Encoder := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx';
|
||||
Config.ModelConfig.Canary.Decoder := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx';
|
||||
Config.ModelConfig.Canary.SrcLang := 'en';
|
||||
Config.ModelConfig.Canary.TgtLang := 'en';
|
||||
Config.ModelConfig.Canary.UsePnc := True;
|
||||
Config.ModelConfig.Tokens := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt';
|
||||
Config.ModelConfig.Provider := 'cpu';
|
||||
Config.ModelConfig.NumThreads := 1;
|
||||
Config.ModelConfig.Debug := False;
|
||||
|
||||
WaveFilename := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav';
|
||||
|
||||
Wave := SherpaOnnxReadWave(WaveFilename);
|
||||
|
||||
Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
|
||||
Stream := Recognizer.CreateStream();
|
||||
Start := Now;
|
||||
|
||||
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
|
||||
Recognizer.Decode(Stream);
|
||||
|
||||
RecognitionResult := Recognizer.GetResult(Stream);
|
||||
|
||||
Stop := Now;
|
||||
|
||||
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
|
||||
Duration := Length(Wave.Samples) / Wave.SampleRate;
|
||||
RealTimeFactor := Elapsed / Duration;
|
||||
|
||||
WriteLn(RecognitionResult.ToString);
|
||||
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
|
||||
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
|
||||
WriteLn(Format('Wave duration %.3f s', [Duration]));
|
||||
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
|
||||
|
||||
FreeAndNil(Stream);
|
||||
|
||||
WriteLn('-----------Output German-----');
|
||||
|
||||
Stream := Recognizer.CreateStream();
|
||||
Start := Now;
|
||||
|
||||
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
|
||||
|
||||
Config.ModelConfig.Canary.TgtLang := 'de';
|
||||
Recognizer.SetConfig(Config);
|
||||
Recognizer.Decode(Stream);
|
||||
|
||||
RecognitionResult := Recognizer.GetResult(Stream);
|
||||
|
||||
Stop := Now;
|
||||
|
||||
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
|
||||
Duration := Length(Wave.Samples) / Wave.SampleRate;
|
||||
RealTimeFactor := Elapsed / Duration;
|
||||
|
||||
WriteLn(RecognitionResult.ToString);
|
||||
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
|
||||
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
|
||||
WriteLn(Format('Wave duration %.3f s', [Duration]));
|
||||
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
|
||||
|
||||
{Free resources to avoid memory leak.
|
||||
|
||||
Note: You don't need to invoke them for this simple script.
|
||||
However, you have to invoke them in your own large/complex project.
|
||||
}
|
||||
FreeAndNil(Stream);
|
||||
FreeAndNil(Recognizer);
|
||||
end.
|
||||
42
pascal-api-examples/non-streaming-asr/run-nemo-canary.sh
Executable file
42
pascal-api-examples/non-streaming-asr/run-nemo-canary.sh
Executable file
@@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
|
||||
|
||||
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||
|
||||
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
|
||||
mkdir -p ../../build
|
||||
pushd ../../build
|
||||
cmake \
|
||||
-DCMAKE_INSTALL_PREFIX=./install \
|
||||
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||
..
|
||||
|
||||
cmake --build . --target install --config Release
|
||||
ls -lh lib
|
||||
popd
|
||||
fi
|
||||
|
||||
if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||
fi
|
||||
|
||||
fpc \
|
||||
-dSHERPA_ONNX_USE_SHARED_LIBS \
|
||||
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
||||
-Fl$SHERPA_ONNX_DIR/build/install/lib \
|
||||
./nemo_canary.pas
|
||||
|
||||
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
|
||||
|
||||
./nemo_canary
|
||||
2
pom.xml
2
pom.xml
@@ -4,7 +4,7 @@
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>com.k2fsa.sherpa.onnx</groupId>
|
||||
<artifactId>sherpa-onnx-android</artifactId>
|
||||
<version>1.12.4</version>
|
||||
<version>1.12.5</version>
|
||||
<url>https://github.com/k2-fsa/sherpa-onnx</url>
|
||||
<packaging>pom</packaging>
|
||||
<description>First Android Library</description>
|
||||
|
||||
@@ -35,6 +35,25 @@ file(s) with a non-streaming model.
|
||||
/path/to/0.wav \
|
||||
/path/to/1.wav
|
||||
|
||||
also with RNN LM rescoring and LODR (optional):
|
||||
|
||||
./python-api-examples/offline-decode-files.py \
|
||||
--tokens=/path/to/tokens.txt \
|
||||
--encoder=/path/to/encoder.onnx \
|
||||
--decoder=/path/to/decoder.onnx \
|
||||
--joiner=/path/to/joiner.onnx \
|
||||
--num-threads=2 \
|
||||
--decoding-method=modified_beam_search \
|
||||
--debug=false \
|
||||
--sample-rate=16000 \
|
||||
--feature-dim=80 \
|
||||
--lm=/path/to/lm.onnx \
|
||||
--lm-scale=0.1 \
|
||||
--lodr-fst=/path/to/lodr.fst \
|
||||
--lodr-scale=-0.1 \
|
||||
/path/to/0.wav \
|
||||
/path/to/1.wav
|
||||
|
||||
(3) For CTC models from NeMo
|
||||
|
||||
python3 ./python-api-examples/offline-decode-files.py \
|
||||
@@ -269,6 +288,39 @@ def get_args():
|
||||
default="greedy_search",
|
||||
help="Valid values are greedy_search and modified_beam_search",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--lm",
|
||||
metavar="file",
|
||||
type=str,
|
||||
default="",
|
||||
help="Path to RNN LM model",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--lm-scale",
|
||||
metavar="lm_scale",
|
||||
type=float,
|
||||
default=0.1,
|
||||
help="LM model scale for rescoring",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--lodr-fst",
|
||||
metavar="file",
|
||||
type=str,
|
||||
default="",
|
||||
help="Path to LODR FST model. Used only when --lm is given.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--lodr-scale",
|
||||
metavar="lodr_scale",
|
||||
type=float,
|
||||
default=-0.1,
|
||||
help="LODR scale for rescoring.Used only when --lodr_fst is given.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--debug",
|
||||
type=bool,
|
||||
@@ -364,6 +416,10 @@ def main():
|
||||
num_threads=args.num_threads,
|
||||
sample_rate=args.sample_rate,
|
||||
feature_dim=args.feature_dim,
|
||||
lm=args.lm,
|
||||
lm_scale=args.lm_scale,
|
||||
lodr_fst=args.lodr_fst,
|
||||
lodr_scale=args.lodr_scale,
|
||||
decoding_method=args.decoding_method,
|
||||
hotwords_file=args.hotwords_file,
|
||||
hotwords_score=args.hotwords_score,
|
||||
|
||||
@@ -21,6 +21,22 @@ rm sherpa-onnx-streaming-zipformer-en-2023-06-26.tar.bz2
|
||||
./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/1.wav \
|
||||
./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/8k.wav
|
||||
|
||||
or with RNN LM rescoring and LODR:
|
||||
|
||||
./python-api-examples/online-decode-files.py \
|
||||
--tokens=./sherpa-onnx-streaming-zipformer-en-2023-06-26/tokens.txt \
|
||||
--encoder=./sherpa-onnx-streaming-zipformer-en-2023-06-26/encoder-epoch-99-avg-1-chunk-16-left-64.onnx \
|
||||
--decoder=./sherpa-onnx-streaming-zipformer-en-2023-06-26/decoder-epoch-99-avg-1-chunk-16-left-64.onnx \
|
||||
--joiner=./sherpa-onnx-streaming-zipformer-en-2023-06-26/joiner-epoch-99-avg-1-chunk-16-left-64.onnx \
|
||||
--decoding-method=modified_beam_search \
|
||||
--lm=/path/to/lm.onnx \
|
||||
--lm-scale=0.1 \
|
||||
--lodr-fst=/path/to/lodr.fst \
|
||||
--lodr-scale=-0.1 \
|
||||
./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/0.wav \
|
||||
./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/1.wav \
|
||||
./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/8k.wav
|
||||
|
||||
(2) Streaming paraformer
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
|
||||
@@ -186,6 +202,22 @@ def get_args():
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--lodr-fst",
|
||||
metavar="file",
|
||||
type=str,
|
||||
default="",
|
||||
help="Path to LODR FST model. Used only when --lm is given.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--lodr-scale",
|
||||
metavar="lodr_scale",
|
||||
type=float,
|
||||
default=-0.1,
|
||||
help="LODR scale for rescoring.Used only when --lodr_fst is given.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--provider",
|
||||
type=str,
|
||||
@@ -320,6 +352,8 @@ def main():
|
||||
max_active_paths=args.max_active_paths,
|
||||
lm=args.lm,
|
||||
lm_scale=args.lm_scale,
|
||||
lodr_fst=args.lodr_fst,
|
||||
lodr_scale=args.lodr_scale,
|
||||
hotwords_file=args.hotwords_file,
|
||||
hotwords_score=args.hotwords_score,
|
||||
modeling_unit=args.modeling_unit,
|
||||
|
||||
@@ -565,6 +565,38 @@ def get_models():
|
||||
|
||||
ls -lh
|
||||
|
||||
popd
|
||||
""",
|
||||
),
|
||||
Model(
|
||||
model_name="sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8",
|
||||
idx=33,
|
||||
lang="en",
|
||||
lang2="English",
|
||||
short_name="parakeet_tdt_ctc_110m",
|
||||
cmd="""
|
||||
pushd $model_name
|
||||
|
||||
rm -rfv test_wavs
|
||||
|
||||
ls -lh
|
||||
|
||||
popd
|
||||
""",
|
||||
),
|
||||
Model(
|
||||
model_name="sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8",
|
||||
idx=34,
|
||||
lang="ja",
|
||||
lang2="Japanese",
|
||||
short_name="parakeet-tdt_ctc_0.6b_ja",
|
||||
cmd="""
|
||||
pushd $model_name
|
||||
|
||||
rm -rfv test_wavs
|
||||
|
||||
ls -lh
|
||||
|
||||
popd
|
||||
""",
|
||||
),
|
||||
|
||||
32
scripts/dotnet/OfflineCanaryModelConfig.cs
Normal file
32
scripts/dotnet/OfflineCanaryModelConfig.cs
Normal file
@@ -0,0 +1,32 @@
|
||||
/// Copyright (c) 2024.5 by 东风破
|
||||
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace SherpaOnnx
|
||||
{
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public struct OfflineCanaryModelConfig
|
||||
{
|
||||
public OfflineCanaryModelConfig()
|
||||
{
|
||||
Encoder = "";
|
||||
Decoder = "";
|
||||
SrcLang = "en";
|
||||
TgtLang = "en";
|
||||
UsePnc = 1;
|
||||
}
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Encoder;
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Decoder;
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string SrcLang;
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string TgtLang;
|
||||
|
||||
public int UsePnc;
|
||||
}
|
||||
}
|
||||
@@ -28,6 +28,7 @@ namespace SherpaOnnx
|
||||
FireRedAsr = new OfflineFireRedAsrModelConfig();
|
||||
Dolphin = new OfflineDolphinModelConfig();
|
||||
ZipformerCtc = new OfflineZipformerCtcModelConfig();
|
||||
Canary = new OfflineCanaryModelConfig();
|
||||
}
|
||||
public OfflineTransducerModelConfig Transducer;
|
||||
public OfflineParaformerModelConfig Paraformer;
|
||||
@@ -62,5 +63,6 @@ namespace SherpaOnnx
|
||||
public OfflineFireRedAsrModelConfig FireRedAsr;
|
||||
public OfflineDolphinModelConfig Dolphin;
|
||||
public OfflineZipformerCtcModelConfig ZipformerCtc;
|
||||
public OfflineCanaryModelConfig Canary;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,6 +14,11 @@ namespace SherpaOnnx
|
||||
_handle = new HandleRef(this, h);
|
||||
}
|
||||
|
||||
public void SetConfig(OfflineRecognizerConfig config)
|
||||
{
|
||||
SherpaOnnxOfflineRecognizerSetConfig(_handle.Handle, ref config);
|
||||
}
|
||||
|
||||
public OfflineStream CreateStream()
|
||||
{
|
||||
IntPtr p = SherpaOnnxCreateOfflineStream(_handle.Handle);
|
||||
@@ -65,6 +70,9 @@ namespace SherpaOnnx
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern IntPtr SherpaOnnxCreateOfflineRecognizer(ref OfflineRecognizerConfig config);
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern void SherpaOnnxOfflineRecognizerSetConfig(IntPtr handle, ref OfflineRecognizerConfig config);
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern void SherpaOnnxDestroyOfflineRecognizer(IntPtr handle);
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user