Compare commits

...

10 Commits

Author SHA1 Message Date
37ad87e75c code change for mr_v100 2025-08-06 11:35:49 +08:00
Fangjun Kuang
0d44df9b67 Release v1.12.5 (#2368) 2025-07-10 15:31:26 +08:00
Fangjun Kuang
fd9a687ec2 Add Pascal/Go/C#/Dart API for NeMo Canary ASR models (#2367)
Add support for the new NeMo Canary ASR model across multiple language bindings by introducing a Canary model configuration and setter method on the offline recognizer.

- Define Canary model config in Pascal, Go, C#, Dart and update converter functions
- Add SetConfig API for offline recognizer (Pascal, Go, C#, Dart)
- Extend CI/workflows and example scripts to test non-streaming Canary decoding
2025-07-10 14:53:33 +08:00
Fangjun Kuang
e2b2d5ea57 Add CXX examples for NeMo TDT ASR. (#2363)
# New Features
- Added new example programs demonstrating streaming speech recognition from a microphone using Parakeet-TDT CTC and Zipformer Transducer models with voice activity detection.
- These examples support microphone input via PortAudio and display recognized text incrementally.

# Bug Fixes
- Improved error handling and logic when opening microphone devices in several example programs for more reliable device initialization.

# Chores
- Updated build configuration to include new executable examples when PortAudio support is enabled.
2025-07-09 18:30:42 +08:00
Askars Salimbajevs
f0960342ad Add LODR support to online and offline recognizers (#2026)
This PR integrates LODR (Level-Ordered Deterministic Rescoring) support from Icefall into both online and offline recognizers, enabling LODR for LM shallow fusion and LM rescore.

- Extended OnlineLMConfig and OfflineLMConfig to include lodr_fst, lodr_scale, and lodr_backoff_id.
- Implemented LodrFst and LodrStateCost classes and wired them into RNN LM scoring in both online and offline code paths.
- Updated Python bindings, CLI entry points, examples, and CI test scripts to accept and exercise the new LODR options.
2025-07-09 16:23:46 +08:00
Fangjun Kuang
6122a678f5 Refactor exporting NeMo models (#2362)
Refactors and extends model export support to include new NeMo Parakeet TDT int8 variants for English and Japanese, updating the Kotlin API, export scripts, test runners, and CI workflows.

- Added support for two new int8 model types in OfflineRecognizer.kt.
- Enhanced Python export scripts to perform dynamic quantization and metadata injection.
- Updated shell scripts and GitHub workflows to package, test, and publish int8 model artifacts.
2025-07-09 16:02:12 +08:00
Fangjun Kuang
f1405779cf Fix nemo feature normalization in test code (#2361) 2025-07-08 15:41:56 +08:00
Fangjun Kuang
831aff187d Upload fp16 onnx model files for FireRedASR (#2360) 2025-07-08 13:46:03 +08:00
Fangjun Kuang
103e93d9f6 Add Java and Kotlin API for NeMo Canary models (#2359)
Add support for the NeMo Canary model in both Java and Kotlin APIs, wiring it through
JNI and updating examples and CI.

- Introduce OfflineCanaryModelConfig in Kotlin and Java with builder patterns
- Extend OfflineRecognizer to accept and apply the new canary config via setConfig
- Update JNI binding (GetOfflineConfig) and getOfflineModelConfig mapping (type 32), 
   plus examples and CI workflows
2025-07-08 13:45:26 +08:00
Fangjun Kuang
df4615ca1d Add C/CXX/JavaScript API for NeMo Canary models (#2357)
This PR introduces support for NeMo Canary models across C, C++, and JavaScript APIs 
by adding new Canary configuration structures, updating bindings, extending examples,
and enhancing CI workflows.

- Add OfflineCanaryModelConfig to all language bindings (C, C++, JS, ETS).
- Implement SetConfig methods and NAPI wrappers for updating recognizer config at runtime.
- Update examples and CI scripts to demonstrate and test NeMo Canary model usage.
2025-07-07 23:38:04 +08:00
156 changed files with 3813 additions and 218 deletions

View File

@@ -6,6 +6,11 @@ cd ./version-test
./run.sh
ls -lh
cd ../non-streaming-canary-decode-files
./run.sh
ls -lh
rm -rf sherpa-onnx-nemo-*
cd ../offline-decode-files
./run-zipformer-ctc.sh

View File

@@ -10,6 +10,16 @@ arch=$(node -p "require('os').arch()")
platform=$(node -p "require('os').platform()")
node_version=$(node -p "process.versions.node.split('.')[0]")
echo "----------non-streaming ASR NeMo Canary----------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
node ./test_asr_non_streaming_nemo_canary.js
rm -rf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8
echo "----------non-streaming ASR Zipformer CTC----------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2

View File

@@ -9,6 +9,14 @@ git status
ls -lh
ls -lh node_modules
# asr with offline nemo canary
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
node ./test-offline-nemo-canary.js
rm -rf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8
# asr with offline zipformer ctc
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2

View File

@@ -281,7 +281,39 @@ time $EXE \
$repo/test_wavs/1.wav \
$repo/test_wavs/8k.wav
rm -rf $repo
lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
log "Download pre-trained RNN-LM model from ${lm_repo_url}"
GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
lm_repo=$(basename $lm_repo_url)
pushd $lm_repo
git lfs pull --include "exp/no-state-epoch-99-avg-1.onnx"
popd
bigram_repo_url=https://huggingface.co/vsd-vector/librispeech_bigram_sherpa-onnx-zipformer-large-en-2023-06-26
log "Download bi-gram LM from ${bigram_repo_url}"
GIT_LFS_SKIP_SMUDGE=1 git clone $bigram_repo_url
bigramlm_repo=$(basename $bigram_repo_url)
pushd $bigramlm_repo
git lfs pull --include "2gram.fst"
popd
log "Start testing with LM and bi-gram LODR"
# TODO: find test examples that change with the LODR
time $EXE \
--tokens=$repo/tokens.txt \
--encoder=$repo/encoder-epoch-99-avg-1.onnx \
--decoder=$repo/decoder-epoch-99-avg-1.onnx \
--joiner=$repo/joiner-epoch-99-avg-1.onnx \
--num-threads=2 \
--decoding_method="modified_beam_search" \
--lm=$lm_repo/exp/no-state-epoch-99-avg-1.onnx \
--lodr-fst=$bigramlm_repo/2gram.fst \
--lodr-scale=-0.5 \
$repo/test_wavs/0.wav \
$repo/test_wavs/1.wav \
$repo/test_wavs/8k.wav
rm -rf $repo $lm_repo $bigramlm_repo
log "------------------------------------------------------------"
log "Run Paraformer (Chinese)"

View File

@@ -77,16 +77,6 @@ time $EXE \
$repo/test_wavs/DEV_T0000000001.wav \
$repo/test_wavs/DEV_T0000000002.wav
log "test int8"
time $EXE \
--debug=1 \
--zipformer2-ctc-model=$repo/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
--tokens=$repo/tokens.txt \
$repo/test_wavs/DEV_T0000000000.wav \
$repo/test_wavs/DEV_T0000000001.wav \
$repo/test_wavs/DEV_T0000000002.wav
rm -rf $repo
log "------------------------------------------------------------"

View File

@@ -174,7 +174,60 @@ for wave in ${waves[@]}; do
$wave
done
rm -rf $repo
lm_repo_url=https://huggingface.co/vsd-vector/icefall-librispeech-rnn-lm
log "Download pre-trained RNN-LM model from ${lm_repo_url}"
GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
lm_repo=$(basename $lm_repo_url)
pushd $lm_repo
git lfs pull --include "with-state-epoch-99-avg-1.onnx"
popd
bigram_repo_url=https://huggingface.co/vsd-vector/librispeech_bigram_sherpa-onnx-zipformer-large-en-2023-06-26
log "Download bi-gram LM from ${bigram_repo_url}"
GIT_LFS_SKIP_SMUDGE=1 git clone $bigram_repo_url
bigramlm_repo=$(basename $bigram_repo_url)
pushd $bigramlm_repo
git lfs pull --include "2gram.fst"
popd
log "Start testing LODR"
waves=(
$repo/test_wavs/0.wav
$repo/test_wavs/1.wav
$repo/test_wavs/8k.wav
)
for wave in ${waves[@]}; do
time $EXE \
--tokens=$repo/tokens.txt \
--encoder=$repo/encoder-epoch-99-avg-1.onnx \
--decoder=$repo/decoder-epoch-99-avg-1.onnx \
--joiner=$repo/joiner-epoch-99-avg-1.onnx \
--num-threads=2 \
--decoding_method="modified_beam_search" \
--lm=$lm_repo/with-state-epoch-99-avg-1.onnx \
--lodr-fst=$bigramlm_repo/2gram.fst \
--lodr-scale=-0.5 \
$wave
done
for wave in ${waves[@]}; do
time $EXE \
--tokens=$repo/tokens.txt \
--encoder=$repo/encoder-epoch-99-avg-1.onnx \
--decoder=$repo/decoder-epoch-99-avg-1.onnx \
--joiner=$repo/joiner-epoch-99-avg-1.onnx \
--num-threads=2 \
--decoding_method="modified_beam_search" \
--lm=$lm_repo/with-state-epoch-99-avg-1.onnx \
--lodr-fst=$bigramlm_repo/2gram.fst \
--lodr-scale=-0.5 \
--lm-shallow-fusion=true \
$wave
done
rm -rf $repo $bigramlm_repo $lm_repo
log "------------------------------------------------------------"
log "Run streaming Zipformer transducer (Bilingual, Chinese + English)"

View File

@@ -562,9 +562,39 @@ python3 ./python-api-examples/offline-decode-files.py \
$repo/test_wavs/1.wav \
$repo/test_wavs/8k.wav
lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
log "Download pre-trained RNN-LM model from ${lm_repo_url}"
GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
lm_repo=$(basename $lm_repo_url)
pushd $lm_repo
git lfs pull --include "exp/no-state-epoch-99-avg-1.onnx"
popd
bigram_repo_url=https://huggingface.co/vsd-vector/librispeech_bigram_sherpa-onnx-zipformer-large-en-2023-06-26
log "Download bi-gram LM from ${bigram_repo_url}"
GIT_LFS_SKIP_SMUDGE=1 git clone $bigram_repo_url
bigramlm_repo=$(basename $bigram_repo_url)
pushd $bigramlm_repo
git lfs pull --include "2gram.fst"
popd
log "Perform offline decoding with RNN-LM and LODR"
python3 ./python-api-examples/offline-decode-files.py \
--tokens=$repo/tokens.txt \
--encoder=$repo/encoder-epoch-99-avg-1.onnx \
--decoder=$repo/decoder-epoch-99-avg-1.onnx \
--joiner=$repo/joiner-epoch-99-avg-1.onnx \
--decoding-method=modified_beam_search \
--lm=$lm_repo/exp/no-state-epoch-99-avg-1.onnx \
--lodr-fst=$bigramlm_repo/2gram.fst \
--lodr-scale=-0.5 \
$repo/test_wavs/0.wav \
$repo/test_wavs/1.wav \
$repo/test_wavs/8k.wav
python3 sherpa-onnx/python/tests/test_offline_recognizer.py --verbose
rm -rf $repo
rm -rf $repo $lm_repo $bigramlm_repo
log "Test non-streaming paraformer models"

View File

@@ -127,6 +127,36 @@ jobs:
rm -rf dict lexicon.txt test-hr.wav replace.fst
rm -v $name
- name: Test NeMo Canary
shell: bash
run: |
name=nemo-canary-c-api
gcc -o $name ./c-api-examples/$name.c \
-I ./build/install/include \
-L ./build/install/lib/ \
-l sherpa-onnx-c-api \
-l onnxruntime
ls -lh $name
if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
ldd ./$name
echo "----"
readelf -d ./$name
fi
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
./$name
rm $name
rm -rf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8
- name: Test Dolphin CTC
shell: bash
run: |

View File

@@ -87,6 +87,40 @@ jobs:
otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib
fi
- name: Test NeMo Canary
shell: bash
run: |
name=nemo-canary-cxx-api
g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
-I ./build/install/include \
-L ./build/install/lib/ \
-l sherpa-onnx-cxx-api \
-l sherpa-onnx-c-api \
-l onnxruntime
ls -lh $name
if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
ldd ./$name
echo "----"
readelf -d ./$name
fi
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
ls -lh sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8
echo "---"
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
./$name
rm -rf sherpa-onnx-nemo-canary-*
rm -v ./$name
- name: Test streaming zipformer with Homophone replacer
shell: bash
run: |

View File

@@ -61,6 +61,11 @@ jobs:
sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288
sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k
sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000
sherpa-onnx-nemo-fast-conformer-ctc-en-24500-int8
sherpa-onnx-nemo-fast-conformer-ctc-es-1424-int8
sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288-int8
sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8
)
for m in ${models[@]}; do
@@ -89,6 +94,11 @@ jobs:
sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288
sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k
sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000
sherpa-onnx-nemo-fast-conformer-ctc-en-24500-int8
sherpa-onnx-nemo-fast-conformer-ctc-es-1424-int8
sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288-int8
sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8
)
for d in ${dirs[@]}; do
tar cjvf ${d}.tar.bz2 ./$d

View File

@@ -54,13 +54,18 @@ jobs:
curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/trans.txt
popd
cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms
cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms
cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms
tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms
tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms
tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms
names=(
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms-int8
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms-int8
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms-int8
)
for d in ${names[@]}; do
cp -av test_wavs $d/
tar cjvf $d.tar.bz2 $d
done
- name: Release
uses: svenstaro/upload-release-action@v2
@@ -71,3 +76,41 @@ jobs:
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: asr-models
- name: Publish to huggingface
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v3
with:
max_attempts: 20
timeout_seconds: 200
shell: bash
command: |
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
models=(
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms-int8
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms-int8
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms-int8
)
for m in ${models[@]}; do
rm -rf huggingface
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
cp -av $m/* huggingface
cd huggingface
git lfs track "*.onnx"
git lfs track "*.wav"
git status
git add .
git status
git commit -m "first commit"
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main
cd ..
done

View File

@@ -61,6 +61,11 @@ jobs:
sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288
sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k
sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000
sherpa-onnx-nemo-fast-conformer-transducer-en-24500-int8
sherpa-onnx-nemo-fast-conformer-transducer-es-1424-int8
sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288-int8
sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000-int8
)
for m in ${models[@]}; do
@@ -88,6 +93,11 @@ jobs:
sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288
sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k
sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000
sherpa-onnx-nemo-fast-conformer-transducer-en-24500-int8
sherpa-onnx-nemo-fast-conformer-transducer-es-1424-int8
sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288-int8
sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000-int8
)
for d in ${dirs[@]}; do
tar cjvf ${d}.tar.bz2 ./$d

View File

@@ -54,13 +54,18 @@ jobs:
curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/trans.txt
popd
cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms
cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms
cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms
tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms
tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms
tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms
models=(
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms-int8
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms-int8
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms-int8
)
for m in ${models[@]}; do
cp -av test_wavs $m
tar cjvf $m.tar.bz2 $m
done
- name: Release
uses: svenstaro/upload-release-action@v2
@@ -71,3 +76,41 @@ jobs:
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: asr-models
- name: Publish to huggingface
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v3
with:
max_attempts: 20
timeout_seconds: 200
shell: bash
command: |
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
models=(
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms-int8
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms-int8
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms-int8
)
for m in ${models[@]}; do
rm -rf huggingface
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
cp -av $m/* huggingface
cd huggingface
git lfs track "*.onnx"
git lfs track "*.wav"
git status
git add .
git status
git commit -m "first commit"
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main
cd ..
done

View File

@@ -0,0 +1,105 @@
name: export-nemo-parakeet-tdt
on:
push:
branches:
- refactor-export-nemo
workflow_dispatch:
concurrency:
group: export-nemo-parakeet-tdt-${{ github.ref }}
cancel-in-progress: true
jobs:
export-nemo-parakeet-tdt-0_6b-v2:
if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
name: parakeet tdt
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [macos-latest]
python-version: ["3.10"]
steps:
- uses: actions/checkout@v4
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install python dependencies
shell: bash
run: |
pip install \
nemo_toolkit['asr'] \
"numpy<2" \
ipython \
kaldi-native-fbank \
librosa \
onnx==1.17.0 \
onnxmltools==1.13.0 \
onnxruntime==1.17.1 \
soundfile
- name: Run
shell: bash
run: |
cd scripts/nemo/parakeet-tdt_ctc-0.6b-ja
./run-ctc.sh
- name: Collect files
shell: bash
run: |
models=(
sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8
)
for m in ${models[@]}; do
mv -v scripts/nemo/parakeet-tdt_ctc-0.6b-ja/$m .
tar cjfv $m.tar.bz2 $m
done
- name: Publish to huggingface
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v3
with:
max_attempts: 20
timeout_seconds: 200
shell: bash
command: |
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
models=(
sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8
)
for m in ${models[@]}; do
rm -rf huggingface
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
cp -av $m/* huggingface
cd huggingface
git lfs track "*.onnx"
git lfs track "*.wav"
git status
git add .
git status
git commit -m "first commit"
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main
cd ..
done
- name: Release
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
file: ./*.tar.bz2
overwrite: true
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: asr-models

View File

@@ -156,6 +156,10 @@ jobs:
pushd non-streaming-asr
./run-nemo-canary.sh
rm -rf sherpa-onnx-*
echo "---"
./run-zipformer-ctc.sh
rm -rf sherpa-onnx-*
echo "---"

View File

@@ -117,6 +117,13 @@ jobs:
cd ./java-api-examples
./run-version-test.sh
- name: Run java test (Nemo Canary)
shell: bash
run: |
cd ./java-api-examples
./run-non-streaming-decode-file-nemo-canary.sh
rm -rf sherpa-onnx-nemo-*
- name: Run java test (Non-streaming SenseVoice with homophone replacer)
shell: bash
run: |

View File

@@ -76,6 +76,14 @@ jobs:
run: |
gcc --version
- name: Test NeMo Canary ASR
if: matrix.os != 'windows-latest'
shell: bash
run: |
cd go-api-examples/non-streaming-canary-decode-files
./run.sh
rm -rf sherpa-onnx-nemo-*
- name: Test speech enhancement (GTCRN)
if: matrix.os != 'windows-latest'
shell: bash

View File

@@ -108,6 +108,7 @@ jobs:
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/add-punctuation
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/audio-tagging
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/keyword-spotting-from-file/
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-canary-decode-files/
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-decode-files/
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-speaker-diarization/
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-tts/
@@ -148,6 +149,19 @@ jobs:
name: ${{ matrix.os }}-libs
path: to-upload/
- name: Test non-streaming decoding files with NeMo Canary
shell: bash
run: |
cd scripts/go/_internal/non-streaming-canary-decode-files/
ls -lh
go mod tidy
cat go.mod
go build
ls -lh
./run.sh
rm -rf sherpa-onnx-nemo-*
- name: Test streaming decoding files
shell: bash
run: |

View File

@@ -24,7 +24,45 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: git config
shell: bash
run: |
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
- name: FireRed ASR fp16
shell: bash
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16 hf
git lfs install
git clone https://www.modelscope.cn/csukuangfj/sherpa-onnx-fire-red-asr-large-zh_en-fp16-2025-02-16.git ms
d=sherpa-onnx-fire-red-asr-large-zh_en-fp16-2025-02-16
git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d
mv -v hf/test_wavs $d
mv -v hf/README.md $d
mv -v hf/tokens.txt $d
mv -v ms/*.onnx $d
pushd $d
git lfs track "*.onnx"
git lfs track "*.wav"
git status
git add .
git commit -m "add models"
ls -lh
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
popd
rm -rf $d/.git
rm -rf $d/.gitattributes
tar cjvf $d.tar.bz2 $d
- name: Zipformer CTC (non-streaming)
if: false
shell: bash
run: |
git lfs install

View File

@@ -1,3 +1,22 @@
## 1.12.5
* Fix typo CMAKE_EXECUTBLE_LINKER_FLAGS -> CMAKE_EXECUTABLE_LINKER_FLAGS (#2344)
* Fix testing dart packages (#2345)
* fix(canary): use dynamo export, single input_ids and avoid 0/1 specialization (#2348)
* Fix TTS for Unreal Engine (#2349)
* Update readme to include https://github.com/mawwalker/stt-server (#2350)
* Add meta data to NeMo canary ONNX models (#2351)
* Update README to include https://github.com/bbeyondllove/asr_server (#2353)
* Add C++ runtime and Python API for NeMo Canary models (#2352)
* Add C/CXX/JavaScript API for NeMo Canary models (#2357)
* Add Java and Kotlin API for NeMo Canary models (#2359)
* Upload fp16 onnx model files for FireRedASR (#2360)
* Fix nemo feature normalization in test code (#2361)
* Refactor exporting NeMo models (#2362)
* Add LODR support to online and offline recognizers (#2026)
* Add CXX examples for NeMo TDT ASR. (#2363)
* Add Pascal/Go/C#/Dart API for NeMo Canary ASR models (#2367)
## 1.12.4
* Refactor release scripts. (#2323)

View File

@@ -14,7 +14,7 @@ project(sherpa-onnx)
# Remember to update
# ./CHANGELOG.md
# ./new-release.sh
set(SHERPA_ONNX_VERSION "1.12.4")
set(SHERPA_ONNX_VERSION "1.12.5")
# Disable warning about
#

View File

@@ -4,8 +4,8 @@
git clone https://github.com/k2-fsa/sherpa-onnx
cd sherpa-onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.4/sherpa-onnx-v1.12.4-android.tar.bz2
tar xvf sherpa-onnx-v1.12.4-android.tar.bz2
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.5/sherpa-onnx-v1.12.5-android.tar.bz2
tar xvf sherpa-onnx-v1.12.5-android.tar.bz2
cp -v jniLibs/arm64-v8a/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/arm64-v8a/
cp -v jniLibs/armeabi-v7a/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/armeabi-v7a/
@@ -16,5 +16,5 @@ cd android/SherpaOnnxAar
./gradlew :sherpa_onnx:assembleRelease
ls -lh ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar
cp ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar ../../sherpa-onnx-1.12.4.aar
cp ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar ../../sherpa-onnx-1.12.5.aar
```

View File

@@ -34,5 +34,5 @@ dependencies {
implementation 'pub.devrel:easypermissions:3.0.0'
implementation 'androidx.core:core-ktx:1.7.0'
// implementation files('/Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxAar/sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar')
implementation 'com.github.k2-fsa:sherpa-onnx:v1.12.4'
implementation 'com.github.k2-fsa:sherpa-onnx:v1.12.5'
}

View File

@@ -242,7 +242,7 @@ for d in ios-arm64_x86_64-simulator ios-arm64; do
<key>CFBundlePackageType</key>
<string>FMWK</string>
<key>CFBundleShortVersionString</key>
<string>1.12.4</string>
<string>1.12.5</string>
<key>CFBundleSupportedPlatforms</key>
<array>
<string>iPhoneOS</string>

View File

@@ -53,6 +53,9 @@ target_link_libraries(whisper-c-api sherpa-onnx-c-api)
add_executable(fire-red-asr-c-api fire-red-asr-c-api.c)
target_link_libraries(fire-red-asr-c-api sherpa-onnx-c-api)
add_executable(nemo-canary-c-api nemo-canary-c-api.c)
target_link_libraries(nemo-canary-c-api sherpa-onnx-c-api)
add_executable(sense-voice-c-api sense-voice-c-api.c)
target_link_libraries(sense-voice-c-api sherpa-onnx-c-api)

View File

@@ -0,0 +1,115 @@
// c-api-examples/nemo-canary-c-api.c
//
// Copyright (c) 2025 Xiaomi Corporation
// We assume you have pre-downloaded the Nemo Canary model
// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
// An example is given below:
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
// tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
// rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
//
// clang-format on
//
// see https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html
// for details
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sherpa-onnx/c-api/c-api.h"
int32_t main() {
const char *wav_filename =
"./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/de.wav";
const char *encoder_filename =
"sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx";
const char *decoder_filename =
"sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx";
const char *tokens_filename =
"sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt";
const char *provider = "cpu";
const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
if (wave == NULL) {
fprintf(stderr, "Failed to read %s\n", wav_filename);
return -1;
}
// Offline model config
SherpaOnnxOfflineModelConfig offline_model_config;
memset(&offline_model_config, 0, sizeof(offline_model_config));
// set debug to 1 to view more logs
offline_model_config.debug = 0;
offline_model_config.num_threads = 1;
offline_model_config.provider = provider;
offline_model_config.tokens = tokens_filename;
offline_model_config.canary.encoder = encoder_filename;
offline_model_config.canary.decoder = decoder_filename;
// so it output punctuations and cases
offline_model_config.canary.use_pnc = 1;
offline_model_config.canary.src_lang = "de";
// since there is a German audio, you can set tgt_lang to en or de
offline_model_config.canary.tgt_lang = "en";
// Recognizer config
SherpaOnnxOfflineRecognizerConfig recognizer_config;
memset(&recognizer_config, 0, sizeof(recognizer_config));
recognizer_config.decoding_method = "greedy_search";
recognizer_config.model_config = offline_model_config;
const SherpaOnnxOfflineRecognizer *recognizer =
SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
if (recognizer == NULL) {
fprintf(stderr, "Please check your config!\n");
SherpaOnnxFreeWave(wave);
return -1;
}
const SherpaOnnxOfflineStream *stream =
SherpaOnnxCreateOfflineStream(recognizer);
SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
wave->num_samples);
SherpaOnnxDecodeOfflineStream(recognizer, stream);
const SherpaOnnxOfflineRecognizerResult *result =
SherpaOnnxGetOfflineStreamResult(stream);
fprintf(stderr, "Decoded text (English): %s\n", result->text);
SherpaOnnxDestroyOfflineRecognizerResult(result);
SherpaOnnxDestroyOfflineStream(stream);
// now output German text
recognizer_config.model_config.canary.tgt_lang = "de";
SherpaOnnxOfflineRecognizerSetConfig(recognizer, &recognizer_config);
stream = SherpaOnnxCreateOfflineStream(recognizer);
SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
wave->num_samples);
SherpaOnnxDecodeOfflineStream(recognizer, stream);
result = SherpaOnnxGetOfflineStreamResult(stream);
fprintf(stderr, "Decoded text (German): %s\n", result->text);
SherpaOnnxDestroyOfflineRecognizerResult(result);
SherpaOnnxDestroyOfflineStream(stream);
SherpaOnnxDestroyOfflineRecognizer(recognizer);
SherpaOnnxFreeWave(wave);
return 0;
}

View File

@@ -54,7 +54,7 @@ int32_t main() {
"DEV_T0000000000.wav";
const char *model_filename =
"sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/"
"ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx";
"ctc-epoch-20-avg-1-chunk-16-left-128.onnx";
const char *tokens_filename =
"sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt";
const char *provider = "cpu";

View File

@@ -180,8 +180,8 @@ class BuildExtension(build_ext):
if make_args == "" and system_make_args == "":
print("for fast compilation, run:")
print('export SHERPA_ONNX_MAKE_ARGS="-j"; python setup.py install')
print('Setting make_args to "-j4"')
make_args = "-j4"
print('Setting make_args to "-j8"')
make_args = "-j8"
if "-G Ninja" in cmake_args:
build_cmd = f"""

View File

@@ -19,9 +19,9 @@ if(NOT SHERPA_ONNX_ENABLE_GPU)
endif()
set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-x64-gpu-1.17.1-patched.zip")
set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-x64-gpu-1.17.1-patched.zip")
set(onnxruntime_HASH "SHA256=1261de176e8d9d4d2019f8fa8c732c6d11494f3c6e73168ab6d2cc0903f22551")
set(onnxruntime_URL "ftp://ftp.4pd.io/pub/iluvatar/mr_v100/onnxruntime-linux-x64-gpu-1.17.1-patched.zip")
set(onnxruntime_URL2 "ftp://ftp.4pd.io/pub/iluvatar/mr_v100/onnxruntime-linux-x64-gpu-1.17.1-patched.zip")
set(onnxruntime_HASH "SHA256=8ae0625c2a9b110ff70768733c92e6585c875e16c50abd1015f0358dd41498ee")
# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
@@ -70,7 +70,7 @@ add_library(onnxruntime SHARED IMPORTED)
set_target_properties(onnxruntime PROPERTIES
IMPORTED_LOCATION ${location_onnxruntime}
INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include"
INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include/onnxruntime"
)
find_library(location_onnxruntime_cuda_lib onnxruntime_providers_cuda

View File

@@ -27,6 +27,9 @@ target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api)
add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc)
target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api)
add_executable(nemo-canary-cxx-api ./nemo-canary-cxx-api.cc)
target_link_libraries(nemo-canary-cxx-api sherpa-onnx-cxx-api)
if(SHERPA_ONNX_ENABLE_PORTAUDIO)
add_executable(sense-voice-simulate-streaming-microphone-cxx-api
./sense-voice-simulate-streaming-microphone-cxx-api.cc
@@ -46,6 +49,15 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO)
portaudio_static
)
add_executable(parakeet-tdt-ctc-simulate-streaming-microphone-cxx-api
./parakeet-tdt-ctc-simulate-streaming-microphone-cxx-api.cc
${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/microphone.cc
)
target_link_libraries(parakeet-tdt-ctc-simulate-streaming-microphone-cxx-api
sherpa-onnx-cxx-api
portaudio_static
)
add_executable(zipformer-ctc-simulate-streaming-microphone-cxx-api
./zipformer-ctc-simulate-streaming-microphone-cxx-api.cc
${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/microphone.cc
@@ -54,6 +66,15 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO)
sherpa-onnx-cxx-api
portaudio_static
)
add_executable(zipformer-transducer-simulate-streaming-microphone-cxx-api
./zipformer-transducer-simulate-streaming-microphone-cxx-api.cc
${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/microphone.cc
)
target_link_libraries(zipformer-transducer-simulate-streaming-microphone-cxx-api
sherpa-onnx-cxx-api
portaudio_static
)
endif()
if(SHERPA_ONNX_HAS_ALSA)

View File

@@ -0,0 +1,101 @@
// cxx-api-examples/nemo-canary-cxx-api.cc
//
// Copyright (c) 2025 Xiaomi Corporation
//
// This file demonstrates how to use NeMo Canary models with
// sherpa-onnx's C++ API.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
// tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
// rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
//
// clang-format on
//
// see https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html
// for details
#include <chrono> // NOLINT
#include <iostream>
#include <string>
#include "sherpa-onnx/c-api/cxx-api.h"
int32_t main() {
using namespace sherpa_onnx::cxx; // NOLINT
OfflineRecognizerConfig config;
config.model_config.canary.encoder =
"sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx";
config.model_config.canary.decoder =
"sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx";
// our input audio is German, so we set src_lang to "de"
config.model_config.canary.src_lang = "de";
// we can set tgt_lang either to de or en in this specific case
config.model_config.canary.tgt_lang = "en";
config.model_config.tokens =
"sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt";
config.model_config.num_threads = 1;
std::cout << "Loading model\n";
OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
if (!recognizer.Get()) {
std::cerr << "Please check your config\n";
return -1;
}
std::cout << "Loading model done\n";
std::string wave_filename =
"./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/de.wav";
Wave wave = ReadWave(wave_filename);
if (wave.samples.empty()) {
std::cerr << "Failed to read: '" << wave_filename << "'\n";
return -1;
}
std::cout << "Start recognition\n";
const auto begin = std::chrono::steady_clock::now();
OfflineStream stream = recognizer.CreateStream();
stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
wave.samples.size());
recognizer.Decode(&stream);
OfflineRecognizerResult result = recognizer.GetResult(&stream);
const auto end = std::chrono::steady_clock::now();
const float elapsed_seconds =
std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
.count() /
1000.;
float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
float rtf = elapsed_seconds / duration;
std::cout << "text (English): " << result.text << "\n";
printf("Number of threads: %d\n", config.model_config.num_threads);
printf("Duration: %.3fs\n", duration);
printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
duration, rtf);
// now output text in German
config.model_config.canary.tgt_lang = "de";
recognizer.SetConfig(config);
stream = recognizer.CreateStream();
stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
wave.samples.size());
recognizer.Decode(&stream);
result = recognizer.GetResult(&stream);
std::cout << "text (German): " << result.text << "\n";
return 0;
}

View File

@@ -0,0 +1,238 @@
// cxx-api-examples/parakeet-tdt-simulate-streaming-microphone-cxx-api.cc
// Copyright (c) 2025 Xiaomi Corporation
//
// This file demonstrates how to use parakeet-tdt with sherpa-onnx's C++ API
// for streaming speech recognition from a microphone.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8.tar.bz2
// tar xvf sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8.tar.bz2
// rm sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8.tar.bz2
//
// clang-format on
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <chrono> // NOLINT
#include <condition_variable> // NOLINT
#include <iostream>
#include <mutex> // NOLINT
#include <queue>
#include <vector>
#include "portaudio.h" // NOLINT
#include "sherpa-display.h" // NOLINT
#include "sherpa-onnx/c-api/cxx-api.h"
#include "sherpa-onnx/csrc/microphone.h"
std::queue<std::vector<float>> samples_queue;
std::condition_variable condition_variable;
std::mutex mutex;
bool stop = false;
static void Handler(int32_t /*sig*/) {
stop = true;
condition_variable.notify_one();
fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}
static int32_t RecordCallback(const void *input_buffer,
void * /*output_buffer*/,
unsigned long frames_per_buffer, // NOLINT
const PaStreamCallbackTimeInfo * /*time_info*/,
PaStreamCallbackFlags /*status_flags*/,
void * /*user_data*/) {
std::lock_guard<std::mutex> lock(mutex);
samples_queue.emplace(
reinterpret_cast<const float *>(input_buffer),
reinterpret_cast<const float *>(input_buffer) + frames_per_buffer);
condition_variable.notify_one();
return stop ? paComplete : paContinue;
}
static sherpa_onnx::cxx::VoiceActivityDetector CreateVad() {
using namespace sherpa_onnx::cxx; // NOLINT
VadModelConfig config;
config.silero_vad.model = "./silero_vad.onnx";
config.silero_vad.threshold = 0.25;
config.silero_vad.min_silence_duration = 0.25;
config.silero_vad.min_speech_duration = 0.25;
config.silero_vad.max_speech_duration = 5;
config.sample_rate = 16000;
config.debug = false;
VoiceActivityDetector vad = VoiceActivityDetector::Create(config, 60);
if (!vad.Get()) {
std::cerr << "Failed to create VAD. Please check your config\n";
exit(-1);
}
return vad;
}
static sherpa_onnx::cxx::OfflineRecognizer CreateOfflineRecognizer() {
using namespace sherpa_onnx::cxx; // NOLINT
OfflineRecognizerConfig config;
config.model_config.nemo_ctc.model =
"./sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8/model.int8.onnx";
config.model_config.tokens =
"./sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8/tokens.txt";
config.model_config.num_threads = 2;
config.model_config.debug = false;
std::cout << "Loading model\n";
OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
if (!recognizer.Get()) {
std::cerr << "Please check your config\n";
exit(-1);
}
std::cout << "Loading model done\n";
return recognizer;
}
int32_t main() {
signal(SIGINT, Handler);
using namespace sherpa_onnx::cxx; // NOLINT
auto vad = CreateVad();
auto recognizer = CreateOfflineRecognizer();
sherpa_onnx::Microphone mic;
PaDeviceIndex num_devices = Pa_GetDeviceCount();
if (num_devices == 0) {
std::cerr << " If you are using Linux, please try to modify "
"./build/bin/sense-voice-simulate-streaming-alsa-cxx-api\n";
return -1;
}
int32_t device_index = Pa_GetDefaultInputDevice();
const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
if (pDeviceIndex) {
fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
device_index = atoi(pDeviceIndex);
}
mic.PrintDevices(device_index);
float mic_sample_rate = 16000;
const char *sample_rate_str = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
if (sample_rate_str) {
mic_sample_rate = atof(sample_rate_str);
fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
}
float sample_rate = 16000;
LinearResampler resampler;
if (mic_sample_rate != sample_rate) {
float min_freq = std::min(mic_sample_rate, sample_rate);
float lowpass_cutoff = 0.99 * 0.5 * min_freq;
int32_t lowpass_filter_width = 6;
resampler = LinearResampler::Create(mic_sample_rate, sample_rate,
lowpass_cutoff, lowpass_filter_width);
}
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
nullptr)) {
std::cerr << "Failed to open microphone device\n";
return -1;
}
int32_t window_size = 512; // samples, please don't change
int32_t offset = 0;
std::vector<float> buffer;
bool speech_started = false;
auto started_time = std::chrono::steady_clock::now();
SherpaDisplay display;
std::cout << "Started! Please speak\n";
while (!stop) {
{
std::unique_lock<std::mutex> lock(mutex);
while (samples_queue.empty() && !stop) {
condition_variable.wait(lock);
}
const auto &s = samples_queue.front();
if (!resampler.Get()) {
buffer.insert(buffer.end(), s.begin(), s.end());
} else {
auto resampled = resampler.Resample(s.data(), s.size(), false);
buffer.insert(buffer.end(), resampled.begin(), resampled.end());
}
samples_queue.pop();
}
for (; offset + window_size < buffer.size(); offset += window_size) {
vad.AcceptWaveform(buffer.data() + offset, window_size);
if (!speech_started && vad.IsDetected()) {
speech_started = true;
started_time = std::chrono::steady_clock::now();
}
}
if (!speech_started) {
if (buffer.size() > 10 * window_size) {
offset -= buffer.size() - 10 * window_size;
buffer = {buffer.end() - 10 * window_size, buffer.end()};
}
}
auto current_time = std::chrono::steady_clock::now();
const float elapsed_seconds =
std::chrono::duration_cast<std::chrono::milliseconds>(current_time -
started_time)
.count() /
1000.;
if (speech_started && elapsed_seconds > 0.2) {
OfflineStream stream = recognizer.CreateStream();
stream.AcceptWaveform(sample_rate, buffer.data(), buffer.size());
recognizer.Decode(&stream);
OfflineRecognizerResult result = recognizer.GetResult(&stream);
display.UpdateText(result.text);
display.Display();
started_time = std::chrono::steady_clock::now();
}
while (!vad.IsEmpty()) {
auto segment = vad.Front();
vad.Pop();
OfflineStream stream = recognizer.CreateStream();
stream.AcceptWaveform(sample_rate, segment.samples.data(),
segment.samples.size());
recognizer.Decode(&stream);
OfflineRecognizerResult result = recognizer.GetResult(&stream);
display.UpdateText(result.text);
display.FinalizeCurrentSentence();
display.Display();
buffer.clear();
offset = 0;
speech_started = false;
}
}
return 0;
}

View File

@@ -136,11 +136,7 @@ int32_t main() {
fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
mic_sample_rate = atof(sample_rate_str);
}
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
nullptr) == false) {
std::cerr << "Failed to open microphone device\n";
return -1;
}
float sample_rate = 16000;
LinearResampler resampler;
if (mic_sample_rate != sample_rate) {
@@ -152,6 +148,12 @@ int32_t main() {
lowpass_cutoff, lowpass_filter_width);
}
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
nullptr)) {
std::cerr << "Failed to open microphone device\n";
return -1;
}
int32_t window_size = 512; // samples, please don't change
int32_t offset = 0;

View File

@@ -142,8 +142,8 @@ int32_t main() {
resampler = LinearResampler::Create(mic_sample_rate, sample_rate,
lowpass_cutoff, lowpass_filter_width);
}
if (mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
nullptr) == false) {
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
nullptr)) {
std::cerr << "Failed to open microphone device\n";
return -1;
}

View File

@@ -140,8 +140,8 @@ int32_t main() {
resampler = LinearResampler::Create(mic_sample_rate, sample_rate,
lowpass_cutoff, lowpass_filter_width);
}
if (mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
nullptr) == false) {
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
nullptr)) {
std::cerr << "Failed to open microphone device\n";
return -1;
}

View File

@@ -0,0 +1,245 @@
// cxx-api-examples/zipformer-transducer-simulate-streaming-microphone-cxx-api.cc
// Copyright (c) 2025 Xiaomi Corporation
//
// This file demonstrates how to use Zipformer transducer with sherpa-onnx's C++ API
// for streaming speech recognition from a microphone.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01.tar.bz2
// tar xvf sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01.tar.bz2
// rm sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01.tar.bz2
//
// clang-format on
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <chrono> // NOLINT
#include <condition_variable> // NOLINT
#include <iostream>
#include <mutex> // NOLINT
#include <queue>
#include <vector>
#include "portaudio.h" // NOLINT
#include "sherpa-display.h" // NOLINT
#include "sherpa-onnx/c-api/cxx-api.h"
#include "sherpa-onnx/csrc/microphone.h"
std::queue<std::vector<float>> samples_queue;
std::condition_variable condition_variable;
std::mutex mutex;
bool stop = false;
static void Handler(int32_t /*sig*/) {
stop = true;
condition_variable.notify_one();
fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}
static int32_t RecordCallback(const void *input_buffer,
void * /*output_buffer*/,
unsigned long frames_per_buffer, // NOLINT
const PaStreamCallbackTimeInfo * /*time_info*/,
PaStreamCallbackFlags /*status_flags*/,
void * /*user_data*/) {
std::lock_guard<std::mutex> lock(mutex);
samples_queue.emplace(
reinterpret_cast<const float *>(input_buffer),
reinterpret_cast<const float *>(input_buffer) + frames_per_buffer);
condition_variable.notify_one();
return stop ? paComplete : paContinue;
}
static sherpa_onnx::cxx::VoiceActivityDetector CreateVad() {
using namespace sherpa_onnx::cxx; // NOLINT
VadModelConfig config;
config.silero_vad.model = "./silero_vad.onnx";
config.silero_vad.threshold = 0.5;
config.silero_vad.min_silence_duration = 0.1;
config.silero_vad.min_speech_duration = 0.25;
config.silero_vad.max_speech_duration = 8;
config.sample_rate = 16000;
config.debug = false;
VoiceActivityDetector vad = VoiceActivityDetector::Create(config, 20);
if (!vad.Get()) {
std::cerr << "Failed to create VAD. Please check your config\n";
exit(-1);
}
return vad;
}
static sherpa_onnx::cxx::OfflineRecognizer CreateOfflineRecognizer() {
using namespace sherpa_onnx::cxx; // NOLINT
OfflineRecognizerConfig config;
config.model_config.transducer.encoder =
"./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/"
"encoder-epoch-99-avg-1.int8.onnx";
config.model_config.transducer.decoder =
"./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/"
"decoder-epoch-99-avg-1.onnx";
config.model_config.transducer.joiner =
"./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/"
"joiner-epoch-99-avg-1.int8.onnx";
config.model_config.tokens =
"./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/tokens.txt";
config.model_config.num_threads = 2;
config.model_config.debug = false;
std::cout << "Loading model\n";
OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
if (!recognizer.Get()) {
std::cerr << "Please check your config\n";
exit(-1);
}
std::cout << "Loading model done\n";
return recognizer;
}
int32_t main() {
signal(SIGINT, Handler);
using namespace sherpa_onnx::cxx; // NOLINT
auto vad = CreateVad();
auto recognizer = CreateOfflineRecognizer();
sherpa_onnx::Microphone mic;
PaDeviceIndex num_devices = Pa_GetDeviceCount();
if (num_devices == 0) {
std::cerr << " If you are using Linux, please try "
"./build/bin/zipformer-ctc-simulate-streaming-alsa-cxx-api\n";
return -1;
}
int32_t device_index = Pa_GetDefaultInputDevice();
const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
if (pDeviceIndex) {
fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
device_index = atoi(pDeviceIndex);
}
mic.PrintDevices(device_index);
float mic_sample_rate = 16000;
const char *sample_rate_str = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
if (sample_rate_str) {
fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
mic_sample_rate = atof(sample_rate_str);
}
float sample_rate = 16000;
LinearResampler resampler;
if (mic_sample_rate != sample_rate) {
float min_freq = std::min(mic_sample_rate, sample_rate);
float lowpass_cutoff = 0.99 * 0.5 * min_freq;
int32_t lowpass_filter_width = 6;
resampler = LinearResampler::Create(mic_sample_rate, sample_rate,
lowpass_cutoff, lowpass_filter_width);
}
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
nullptr)) {
std::cerr << "Failed to open microphone device\n";
return -1;
}
int32_t window_size = 512; // samples, please don't change
int32_t offset = 0;
std::vector<float> buffer;
bool speech_started = false;
auto started_time = std::chrono::steady_clock::now();
SherpaDisplay display;
std::cout << "Started! Please speak\n";
while (!stop) {
{
std::unique_lock<std::mutex> lock(mutex);
while (samples_queue.empty() && !stop) {
condition_variable.wait(lock);
}
const auto &s = samples_queue.front();
if (!resampler.Get()) {
buffer.insert(buffer.end(), s.begin(), s.end());
} else {
auto resampled = resampler.Resample(s.data(), s.size(), false);
buffer.insert(buffer.end(), resampled.begin(), resampled.end());
}
samples_queue.pop();
}
for (; offset + window_size < buffer.size(); offset += window_size) {
vad.AcceptWaveform(buffer.data() + offset, window_size);
if (!speech_started && vad.IsDetected()) {
speech_started = true;
started_time = std::chrono::steady_clock::now();
}
}
if (!speech_started) {
if (buffer.size() > 10 * window_size) {
offset -= buffer.size() - 10 * window_size;
buffer = {buffer.end() - 10 * window_size, buffer.end()};
}
}
auto current_time = std::chrono::steady_clock::now();
const float elapsed_seconds =
std::chrono::duration_cast<std::chrono::milliseconds>(current_time -
started_time)
.count() /
1000.;
if (speech_started && elapsed_seconds > 0.2) {
OfflineStream stream = recognizer.CreateStream();
stream.AcceptWaveform(sample_rate, buffer.data(), buffer.size());
recognizer.Decode(&stream);
OfflineRecognizerResult result = recognizer.GetResult(&stream);
display.UpdateText(result.text);
display.Display();
started_time = std::chrono::steady_clock::now();
}
while (!vad.IsEmpty()) {
auto segment = vad.Front();
vad.Pop();
OfflineStream stream = recognizer.CreateStream();
stream.AcceptWaveform(sample_rate, segment.samples.data(),
segment.samples.size());
recognizer.Decode(&stream);
OfflineRecognizerResult result = recognizer.GetResult(&stream);
display.UpdateText(result.text);
display.FinalizeCurrentSentence();
display.Display();
buffer.clear();
offset = 0;
speech_started = false;
}
}
return 0;
}

View File

@@ -9,7 +9,7 @@ environment:
sdk: ">=3.0.0 <4.0.0"
dependencies:
sherpa_onnx: ^1.12.4
sherpa_onnx: ^1.12.5
path: ^1.9.0
args: ^2.5.0

View File

@@ -9,7 +9,7 @@ environment:
sdk: ">=3.0.0 <4.0.0"
dependencies:
sherpa_onnx: ^1.12.4
sherpa_onnx: ^1.12.5
path: ^1.9.0
args: ^2.5.0

View File

@@ -9,7 +9,7 @@ environment:
sdk: ">=3.0.0 <4.0.0"
dependencies:
sherpa_onnx: ^1.12.4
sherpa_onnx: ^1.12.5
# sherpa_onnx:
# path: ../../flutter/sherpa_onnx
path: ^1.9.0

View File

@@ -0,0 +1,84 @@
// Copyright (c) 2025 Xiaomi Corporation
import 'dart:io';
import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';
void main(List<String> arguments) async {
await initSherpaOnnx();
final parser = ArgParser()
..addOption('encoder', help: 'Path to the NeMo Canary encoder model')
..addOption('decoder', help: 'Path to the NeMo Canary decoder model')
..addOption('src-lang', help: 'Language of the input audio')
..addOption('tgt-lang', help: 'Language of the recognition result')
..addOption('tokens', help: 'Path to tokens.txt')
..addOption('input-wav', help: 'Path to input.wav to transcribe');
final res = parser.parse(arguments);
if (res['encoder'] == null ||
res['decoder'] == null ||
res['src-lang'] == null ||
res['tgt-lang'] == null ||
res['tokens'] == null ||
res['input-wav'] == null) {
print(parser.usage);
exit(1);
}
final encoder = res['encoder'] as String;
final decoder = res['decoder'] as String;
final srcLang = res['src-lang'] as String;
final tgtLang = res['tgt-lang'] as String;
final tokens = res['tokens'] as String;
final inputWav = res['input-wav'] as String;
final canary = sherpa_onnx.OfflineCanaryModelConfig(
encoder: encoder, decoder: decoder, srcLang: srcLang, tgtLang: tgtLang);
final modelConfig = sherpa_onnx.OfflineModelConfig(
canary: canary,
tokens: tokens,
debug: false,
numThreads: 1,
);
var config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
final recognizer = sherpa_onnx.OfflineRecognizer(config);
final waveData = sherpa_onnx.readWave(inputWav);
final stream = recognizer.createStream();
stream.acceptWaveform(
samples: waveData.samples, sampleRate: waveData.sampleRate);
recognizer.decode(stream);
final result = recognizer.getResult(stream);
print('Result in $tgtLang: ${result.text}');
stream.free();
// Example to change the target language to de
if (tgtLang != 'en') {
var json = config.toJson();
((json['model'] as Map<String, dynamic>)!['canary']
as Map<String, dynamic>)!['tgtLang'] = 'en';
config = sherpa_onnx.OfflineRecognizerConfig.fromJson(json);
recognizer.setConfig(config);
final stream = recognizer.createStream();
stream.acceptWaveform(
samples: waveData.samples, sampleRate: waveData.sampleRate);
recognizer.decode(stream);
final result = recognizer.getResult(stream);
print('Result in English: ${result.text}');
stream.free();
}
recognizer.free();
}

View File

@@ -10,7 +10,7 @@ environment:
# Add regular dependencies here.
dependencies:
sherpa_onnx: ^1.12.4
sherpa_onnx: ^1.12.5
path: ^1.9.0
args: ^2.5.0

View File

@@ -0,0 +1,33 @@
#!/usr/bin/env bash
set -ex
dart pub get
if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
fi
for tgt_lang in en de es fr; do
dart run \
./bin/nemo-canary.dart \
--encoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx \
--decoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx \
--tokens ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt \
--src-lang en \
--tgt-lang $tgt_lang \
--input-wav ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav
done
for tgt_lang in en de; do
dart run \
./bin/nemo-canary.dart \
--encoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx \
--decoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx \
--tokens ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt \
--src-lang de \
--tgt-lang $tgt_lang \
--input-wav ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/de.wav
done

View File

@@ -8,7 +8,7 @@ environment:
sdk: ">=3.0.0 <4.0.0"
dependencies:
sherpa_onnx: ^1.12.4
sherpa_onnx: ^1.12.5
# sherpa_onnx:
# path: ../../flutter/sherpa_onnx
path: ^1.9.0

View File

@@ -9,7 +9,7 @@ environment:
sdk: ">=3.0.0 <4.0.0"
dependencies:
sherpa_onnx: ^1.12.4
sherpa_onnx: ^1.12.5
path: ^1.9.0
args: ^2.5.0

View File

@@ -10,7 +10,7 @@ environment:
# Add regular dependencies here.
dependencies:
sherpa_onnx: ^1.12.4
sherpa_onnx: ^1.12.5
# sherpa_onnx:
# path: ../../flutter/sherpa_onnx
path: ^1.9.0

View File

@@ -11,7 +11,7 @@ environment:
# Add regular dependencies here.
dependencies:
sherpa_onnx: ^1.12.4
sherpa_onnx: ^1.12.5
path: ^1.9.0
args: ^2.5.0

View File

@@ -8,7 +8,7 @@ environment:
# Add regular dependencies here.
dependencies:
sherpa_onnx: ^1.12.4
sherpa_onnx: ^1.12.5
path: ^1.9.0
args: ^2.5.0

View File

@@ -10,7 +10,7 @@ environment:
sdk: ">=3.0.0 <4.0.0"
dependencies:
sherpa_onnx: ^1.12.4
sherpa_onnx: ^1.12.5
path: ^1.9.0
args: ^2.5.0

View File

@@ -9,7 +9,7 @@ environment:
sdk: ">=3.0.0 <4.0.0"
dependencies:
sherpa_onnx: ^1.12.4
sherpa_onnx: ^1.12.5
path: ^1.9.0
args: ^2.5.0

View File

@@ -0,0 +1,44 @@
// Copyright (c) 2025 Xiaomi Corporation
//
// This file shows how to use a NeMo Canary model for speech recognition.
//
// You can find the model doc at
// https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html
using SherpaOnnx;
class NonStreamingAsrCanary
{
static void Main(string[] args)
{
// please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
var config = new OfflineRecognizerConfig();
config.ModelConfig.Canary.Encoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx";
config.ModelConfig.Canary.Decoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx";
config.ModelConfig.Canary.SrcLang = "en";
config.ModelConfig.Canary.TgtLang = "en";
config.ModelConfig.Tokens = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt";
config.ModelConfig.Debug = 0;
var recognizer = new OfflineRecognizer(config);
var testWaveFilename = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav";
var reader = new WaveReader(testWaveFilename);
var stream = recognizer.CreateStream();
stream.AcceptWaveform(reader.SampleRate, reader.Samples);
recognizer.Decode(stream);
var text = stream.Result.Text;
Console.WriteLine("Text (English): {0}", text);
// Now output text in German
config.ModelConfig.Canary.TgtLang = "de";
recognizer.SetConfig(config);
stream = recognizer.CreateStream();
stream.AcceptWaveform(reader.SampleRate, reader.Samples);
recognizer.Decode(stream);
text = stream.Result.Text;
Console.WriteLine("Text (German): {0}", text);
}
}

View File

@@ -0,0 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<RootNamespace>non_streaming_canary_decode_files</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\Common\Common.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,11 @@
#!/usr/bin/env bash
set -ex
if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
fi
dotnet run

View File

@@ -39,6 +39,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speech-enhancement-gtcrn",
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "version-test", "version-test\version-test.csproj", "{E57711E5-6546-4BA0-B627-79C94F415BC5}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "non-streaming-canary-decode-files", "non-streaming-canary-decode-files\non-streaming-canary-decode-files.csproj", "{925779DB-4429-4366-87C3-B14DD44AE1D4}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -117,6 +119,10 @@ Global
{E57711E5-6546-4BA0-B627-79C94F415BC5}.Debug|Any CPU.Build.0 = Debug|Any CPU
{E57711E5-6546-4BA0-B627-79C94F415BC5}.Release|Any CPU.ActiveCfg = Release|Any CPU
{E57711E5-6546-4BA0-B627-79C94F415BC5}.Release|Any CPU.Build.0 = Release|Any CPU
{925779DB-4429-4366-87C3-B14DD44AE1D4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{925779DB-4429-4366-87C3-B14DD44AE1D4}.Debug|Any CPU.Build.0 = Debug|Any CPU
{925779DB-4429-4366-87C3-B14DD44AE1D4}.Release|Any CPU.ActiveCfg = Release|Any CPU
{925779DB-4429-4366-87C3-B14DD44AE1D4}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

View File

@@ -5,7 +5,7 @@ description: >
publish_to: 'none'
version: 1.12.4
version: 1.12.5
topics:
- speech-recognition
@@ -31,7 +31,7 @@ dependencies:
record: ^5.1.0
url_launcher: ^6.2.6
sherpa_onnx: ^1.12.4
sherpa_onnx: ^1.12.5
# sherpa_onnx:
# path: ../../flutter/sherpa_onnx

View File

@@ -5,7 +5,7 @@ description: >
publish_to: 'none' # Remove this line if you wish to publish to pub.dev
version: 1.12.4
version: 1.12.5
environment:
sdk: ">=2.17.0 <4.0.0"
@@ -18,7 +18,7 @@ dependencies:
cupertino_icons: ^1.0.6
path_provider: ^2.1.3
path: ^1.9.0
sherpa_onnx: ^1.12.4
sherpa_onnx: ^1.12.5
# sherpa_onnx:
# path: ../../flutter/sherpa_onnx
url_launcher: 6.2.6

View File

@@ -163,6 +163,44 @@ class OfflineWhisperModelConfig {
final int tailPaddings;
}
class OfflineCanaryModelConfig {
const OfflineCanaryModelConfig(
{this.encoder = '',
this.decoder = '',
this.srcLang = 'en',
this.tgtLang = 'en',
this.usePnc = true});
factory OfflineCanaryModelConfig.fromJson(Map<String, dynamic> json) {
return OfflineCanaryModelConfig(
encoder: json['encoder'] as String? ?? '',
decoder: json['decoder'] as String? ?? '',
srcLang: json['srcLang'] as String? ?? 'en',
tgtLang: json['tgtLang'] as String? ?? 'en',
usePnc: json['usePnc'] as bool? ?? true,
);
}
@override
String toString() {
return 'OfflineCanaryModelConfig(encoder: $encoder, decoder: $decoder, srcLang: $srcLang, tgtLang: $tgtLang, usePnc: $usePnc)';
}
Map<String, dynamic> toJson() => {
'encoder': encoder,
'decoder': decoder,
'srcLang': srcLang,
'tgtLang': tgtLang,
'usePnc': usePnc,
};
final String encoder;
final String decoder;
final String srcLang;
final String tgtLang;
final bool usePnc;
}
class OfflineFireRedAsrModelConfig {
const OfflineFireRedAsrModelConfig({this.encoder = '', this.decoder = ''});
@@ -310,6 +348,7 @@ class OfflineModelConfig {
this.fireRedAsr = const OfflineFireRedAsrModelConfig(),
this.dolphin = const OfflineDolphinModelConfig(),
this.zipformerCtc = const OfflineZipformerCtcModelConfig(),
this.canary = const OfflineCanaryModelConfig(),
required this.tokens,
this.numThreads = 1,
this.debug = true,
@@ -362,6 +401,10 @@ class OfflineModelConfig {
? OfflineZipformerCtcModelConfig.fromJson(
json['zipformerCtc'] as Map<String, dynamic>)
: const OfflineZipformerCtcModelConfig(),
canary: json['canary'] != null
? OfflineCanaryModelConfig.fromJson(
json['canary'] as Map<String, dynamic>)
: const OfflineCanaryModelConfig(),
tokens: json['tokens'] as String,
numThreads: json['numThreads'] as int? ?? 1,
debug: json['debug'] as bool? ?? true,
@@ -375,7 +418,7 @@ class OfflineModelConfig {
@override
String toString() {
return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, fireRedAsr: $fireRedAsr, dolphin: $dolphin, zipformerCtc: $zipformerCtc, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, fireRedAsr: $fireRedAsr, dolphin: $dolphin, zipformerCtc: $zipformerCtc, canary: $canary, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
}
Map<String, dynamic> toJson() => {
@@ -389,6 +432,7 @@ class OfflineModelConfig {
'fireRedAsr': fireRedAsr.toJson(),
'dolphin': dolphin.toJson(),
'zipformerCtc': zipformerCtc.toJson(),
'canary': canary.toJson(),
'tokens': tokens,
'numThreads': numThreads,
'debug': debug,
@@ -409,6 +453,7 @@ class OfflineModelConfig {
final OfflineFireRedAsrModelConfig fireRedAsr;
final OfflineDolphinModelConfig dolphin;
final OfflineZipformerCtcModelConfig zipformerCtc;
final OfflineCanaryModelConfig canary;
final String tokens;
final int numThreads;
@@ -549,7 +594,28 @@ class OfflineRecognizer {
/// The user is responsible to call the OfflineRecognizer.free()
/// method of the returned instance to avoid memory leak.
factory OfflineRecognizer(OfflineRecognizerConfig config) {
final c = convertConfig(config);
final ptr = SherpaOnnxBindings.createOfflineRecognizer?.call(c) ?? nullptr;
freeConfig(c);
return OfflineRecognizer._(ptr: ptr, config: config);
}
void setConfig(OfflineRecognizerConfig config) {
final c = convertConfig(config);
SherpaOnnxBindings.offlineRecognizerSetConfig?.call(ptr, c);
freeConfig(c);
// we don't update this.config
}
static Pointer<SherpaOnnxOfflineRecognizerConfig> convertConfig(
OfflineRecognizerConfig config) {
final c = calloc<SherpaOnnxOfflineRecognizerConfig>();
c.ref.feat.sampleRate = config.feat.sampleRate;
@@ -609,6 +675,12 @@ class OfflineRecognizer {
c.ref.model.zipformerCtc.model =
config.model.zipformerCtc.model.toNativeUtf8();
c.ref.model.canary.encoder = config.model.canary.encoder.toNativeUtf8();
c.ref.model.canary.decoder = config.model.canary.decoder.toNativeUtf8();
c.ref.model.canary.srcLang = config.model.canary.srcLang.toNativeUtf8();
c.ref.model.canary.tgtLang = config.model.canary.tgtLang.toNativeUtf8();
c.ref.model.canary.usePnc = config.model.canary.usePnc ? 1 : 0;
c.ref.model.tokens = config.model.tokens.toNativeUtf8();
c.ref.model.numThreads = config.model.numThreads;
@@ -637,8 +709,10 @@ class OfflineRecognizer {
c.ref.hr.lexicon = config.hr.lexicon.toNativeUtf8();
c.ref.hr.ruleFsts = config.hr.ruleFsts.toNativeUtf8();
final ptr = SherpaOnnxBindings.createOfflineRecognizer?.call(c) ?? nullptr;
return c;
}
static void freeConfig(Pointer<SherpaOnnxOfflineRecognizerConfig> c) {
calloc.free(c.ref.hr.dictDir);
calloc.free(c.ref.hr.lexicon);
calloc.free(c.ref.hr.ruleFsts);
@@ -653,6 +727,10 @@ class OfflineRecognizer {
calloc.free(c.ref.model.modelType);
calloc.free(c.ref.model.provider);
calloc.free(c.ref.model.tokens);
calloc.free(c.ref.model.canary.tgtLang);
calloc.free(c.ref.model.canary.srcLang);
calloc.free(c.ref.model.canary.decoder);
calloc.free(c.ref.model.canary.encoder);
calloc.free(c.ref.model.zipformerCtc.model);
calloc.free(c.ref.model.dolphin.model);
calloc.free(c.ref.model.fireRedAsr.decoder);
@@ -674,8 +752,6 @@ class OfflineRecognizer {
calloc.free(c.ref.model.transducer.decoder);
calloc.free(c.ref.model.transducer.joiner);
calloc.free(c);
return OfflineRecognizer._(ptr: ptr, config: config);
}
/// The user has to invoke stream.free() on the returned instance

View File

@@ -280,6 +280,16 @@ final class SherpaOnnxOfflineWhisperModelConfig extends Struct {
external int tailPaddings;
}
final class SherpaOnnxOfflineCanaryModelConfig extends Struct {
external Pointer<Utf8> encoder;
external Pointer<Utf8> decoder;
external Pointer<Utf8> srcLang;
external Pointer<Utf8> tgtLang;
@Int32()
external int usePnc;
}
final class SherpaOnnxOfflineMoonshineModelConfig extends Struct {
external Pointer<Utf8> preprocessor;
external Pointer<Utf8> encoder;
@@ -338,6 +348,7 @@ final class SherpaOnnxOfflineModelConfig extends Struct {
external SherpaOnnxOfflineFireRedAsrModelConfig fireRedAsr;
external SherpaOnnxOfflineDolphinModelConfig dolphin;
external SherpaOnnxOfflineZipformerCtcModelConfig zipformerCtc;
external SherpaOnnxOfflineCanaryModelConfig canary;
}
final class SherpaOnnxOfflineRecognizerConfig extends Struct {
@@ -876,6 +887,14 @@ typedef CreateOfflineRecognizerNative = Pointer<SherpaOnnxOfflineRecognizer>
typedef CreateOfflineRecognizer = CreateOfflineRecognizerNative;
typedef OfflineRecognizerSetConfigNative = Void Function(
Pointer<SherpaOnnxOfflineRecognizer>,
Pointer<SherpaOnnxOfflineRecognizerConfig>);
typedef OfflineRecognizerSetConfig = void Function(
Pointer<SherpaOnnxOfflineRecognizer>,
Pointer<SherpaOnnxOfflineRecognizerConfig>);
typedef DestroyOfflineRecognizerNative = Void Function(
Pointer<SherpaOnnxOfflineRecognizer>);
@@ -1341,6 +1360,7 @@ class SherpaOnnxBindings {
static CreateOfflineRecognizer? createOfflineRecognizer;
static DestroyOfflineRecognizer? destroyOfflineRecognizer;
static OfflineRecognizerSetConfig? offlineRecognizerSetConfig;
static CreateOfflineStream? createOfflineStream;
static DestroyOfflineStream? destroyOfflineStream;
static AcceptWaveformOffline? acceptWaveformOffline;
@@ -1741,6 +1761,11 @@ class SherpaOnnxBindings {
'SherpaOnnxDestroyOfflineRecognizer')
.asFunction();
offlineRecognizerSetConfig ??= dynamicLibrary
.lookup<NativeFunction<OfflineRecognizerSetConfigNative>>(
'SherpaOnnxOfflineRecognizerSetConfig')
.asFunction();
createOfflineStream ??= dynamicLibrary
.lookup<NativeFunction<CreateOfflineStreamNative>>(
'SherpaOnnxCreateOfflineStream')

View File

@@ -17,7 +17,7 @@ topics:
- voice-activity-detection
# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
version: 1.12.4
version: 1.12.5
homepage: https://github.com/k2-fsa/sherpa-onnx
@@ -30,23 +30,23 @@ dependencies:
flutter:
sdk: flutter
sherpa_onnx_android: ^1.12.4
sherpa_onnx_android: ^1.12.5
# sherpa_onnx_android:
# path: ../sherpa_onnx_android
sherpa_onnx_macos: ^1.12.4
sherpa_onnx_macos: ^1.12.5
# sherpa_onnx_macos:
# path: ../sherpa_onnx_macos
sherpa_onnx_linux: ^1.12.4
sherpa_onnx_linux: ^1.12.5
# sherpa_onnx_linux:
# path: ../sherpa_onnx_linux
sherpa_onnx_windows: ^1.12.4
sherpa_onnx_windows: ^1.12.5
# sherpa_onnx_windows:
# path: ../sherpa_onnx_windows
sherpa_onnx_ios: ^1.12.4
sherpa_onnx_ios: ^1.12.5
# sherpa_onnx_ios:
# path: ../sherpa_onnx_ios

View File

@@ -7,7 +7,7 @@
# https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c
Pod::Spec.new do |s|
s.name = 'sherpa_onnx_ios'
s.version = '1.12.4'
s.version = '1.12.5'
s.summary = 'A new Flutter FFI plugin project.'
s.description = <<-DESC
A new Flutter FFI plugin project.

View File

@@ -4,7 +4,7 @@
#
Pod::Spec.new do |s|
s.name = 'sherpa_onnx_macos'
s.version = '1.12.4'
s.version = '1.12.5'
s.summary = 'sherpa-onnx Flutter FFI plugin project.'
s.description = <<-DESC
sherpa-onnx Flutter FFI plugin project.

View File

@@ -0,0 +1,17 @@
module non-streaming-canary-decode-files
go 1.17
require (
github.com/k2-fsa/sherpa-onnx-go v1.12.4
github.com/spf13/pflag v1.0.6
github.com/youpy/go-wav v0.3.2
)
require (
github.com/k2-fsa/sherpa-onnx-go-linux v1.12.4 // indirect
github.com/k2-fsa/sherpa-onnx-go-macos v1.12.4 // indirect
github.com/k2-fsa/sherpa-onnx-go-windows v1.12.4 // indirect
github.com/youpy/go-riff v0.1.0 // indirect
github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b // indirect
)

View File

@@ -0,0 +1,113 @@
package main
import (
"bytes"
"encoding/binary"
"log"
"os"
"strings"
sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
"github.com/youpy/go-wav"
)
func main() {
log.SetFlags(log.LstdFlags | log.Lmicroseconds)
config := sherpa.OfflineRecognizerConfig{}
config.ModelConfig.Canary.Encoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx"
config.ModelConfig.Canary.Decoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx"
config.ModelConfig.Canary.SrcLang = "en"
config.ModelConfig.Canary.TgtLang = "en"
config.ModelConfig.Canary.UsePnc = 1
config.ModelConfig.Tokens = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt"
waveFilename := "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav"
samples, sampleRate := readWave(waveFilename)
log.Println("Initializing recognizer (may take several seconds)")
recognizer := sherpa.NewOfflineRecognizer(&config)
log.Println("Recognizer created!")
defer sherpa.DeleteOfflineRecognizer(recognizer)
log.Println("Start decoding!")
stream := sherpa.NewOfflineStream(recognizer)
defer sherpa.DeleteOfflineStream(stream)
stream.AcceptWaveform(sampleRate, samples)
recognizer.Decode(stream)
log.Println("Decoding done!")
result := stream.GetResult()
log.Println("Text in English: " + strings.ToLower(result.Text))
s := sherpa.NewOfflineStream(recognizer)
defer sherpa.DeleteOfflineStream(s)
s.AcceptWaveform(sampleRate, samples)
config.ModelConfig.Canary.TgtLang = "de"
recognizer.SetConfig(&config)
recognizer.Decode(s)
result = s.GetResult()
log.Println("Text in German: " + strings.ToLower(result.Text))
}
func readWave(filename string) (samples []float32, sampleRate int) {
file, _ := os.Open(filename)
defer file.Close()
reader := wav.NewReader(file)
format, err := reader.Format()
if err != nil {
log.Fatalf("Failed to read wave format")
}
if format.AudioFormat != 1 {
log.Fatalf("Support only PCM format. Given: %v\n", format.AudioFormat)
}
if format.NumChannels != 1 {
log.Fatalf("Support only 1 channel wave file. Given: %v\n", format.NumChannels)
}
if format.BitsPerSample != 16 {
log.Fatalf("Support only 16-bit per sample. Given: %v\n", format.BitsPerSample)
}
reader.Duration() // so that it initializes reader.Size
buf := make([]byte, reader.Size)
n, err := reader.Read(buf)
if n != int(reader.Size) {
log.Fatalf("Failed to read %v bytes. Returned %v bytes\n", reader.Size, n)
}
samples = samplesInt16ToFloat(buf)
sampleRate = int(format.SampleRate)
return
}
func samplesInt16ToFloat(inSamples []byte) []float32 {
numSamples := len(inSamples) / 2
outSamples := make([]float32, numSamples)
for i := 0; i != numSamples; i++ {
s := inSamples[i*2 : (i+1)*2]
var s16 int16
buf := bytes.NewReader(s)
err := binary.Read(buf, binary.LittleEndian, &s16)
if err != nil {
log.Fatal("Failed to parse 16-bit sample")
}
outSamples[i] = float32(s16) / 32768
}
return outSamples
}

View File

@@ -0,0 +1,13 @@
#!/usr/bin/env bash
set -ex
if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
fi
go mod tidy
go build
./non-streaming-canary-decode-files

View File

@@ -1,7 +1,7 @@
/**
* Use these variables when you tailor your ArkTS code. They must be of the const type.
*/
export const HAR_VERSION = '1.12.4';
export const HAR_VERSION = '1.12.5';
export const BUILD_MODE_NAME = 'debug';
export const DEBUG = true;
export const TARGET_NAME = 'default';

View File

@@ -7,6 +7,7 @@ export { Samples,
OfflineStream,
FeatureConfig,
HomophoneReplacerConfig,
OfflineCanaryModelConfig,
OfflineDolphinModelConfig,
OfflineTransducerModelConfig,
OfflineParaformerModelConfig,

View File

@@ -23,7 +23,7 @@ or update your `oh-package.json5` to include the following:
```
"dependencies": {
"sherpa_onnx": "1.12.4",
"sherpa_onnx": "1.12.5",
},
```

View File

@@ -1,6 +1,6 @@
{
"name": "sherpa_onnx",
"version": "1.12.4",
"version": "1.12.5",
"description": "On-device speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without Internet connection",
"main": "Index.ets",
"author": "The next-gen Kaldi team",

View File

@@ -93,6 +93,27 @@ static SherpaOnnxOfflineNemoEncDecCtcModelConfig GetOfflineNeMoCtcModelConfig(
return c;
}
static SherpaOnnxOfflineCanaryModelConfig GetOfflineCanaryModelConfig(
Napi::Object obj) {
SherpaOnnxOfflineCanaryModelConfig c;
memset(&c, 0, sizeof(c));
c.use_pnc = 1; // Align default with JS default
if (!obj.Has("canary") || !obj.Get("canary").IsObject()) {
return c;
}
Napi::Object o = obj.Get("canary").As<Napi::Object>();
SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder);
SHERPA_ONNX_ASSIGN_ATTR_STR(src_lang, srcLang);
SHERPA_ONNX_ASSIGN_ATTR_STR(tgt_lang, tgtLang);
SHERPA_ONNX_ASSIGN_ATTR_INT32(use_pnc, usePnc);
return c;
}
static SherpaOnnxOfflineWhisperModelConfig GetOfflineWhisperModelConfig(
Napi::Object obj) {
SherpaOnnxOfflineWhisperModelConfig c;
@@ -203,6 +224,7 @@ static SherpaOnnxOfflineModelConfig GetOfflineModelConfig(Napi::Object obj) {
c.fire_red_asr = GetOfflineFireRedAsrModelConfig(o);
c.dolphin = GetOfflineDolphinModelConfig(o);
c.zipformer_ctc = GetOfflineZipformerCtcModelConfig(o);
c.canary = GetOfflineCanaryModelConfig(o);
SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
@@ -241,6 +263,78 @@ static SherpaOnnxOfflineLMConfig GetOfflineLMConfig(Napi::Object obj) {
return c;
}
static SherpaOnnxOfflineRecognizerConfig ParseConfig(Napi::Object o) {
SherpaOnnxOfflineRecognizerConfig c;
memset(&c, 0, sizeof(c));
c.feat_config = GetFeatureConfig(o);
c.model_config = GetOfflineModelConfig(o);
c.lm_config = GetOfflineLMConfig(o);
c.hr = GetHomophoneReplacerConfig(o);
SHERPA_ONNX_ASSIGN_ATTR_STR(decoding_method, decodingMethod);
SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active_paths, maxActivePaths);
SHERPA_ONNX_ASSIGN_ATTR_STR(hotwords_file, hotwordsFile);
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(hotwords_score, hotwordsScore);
SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fsts, ruleFsts);
SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars);
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(blank_penalty, blankPenalty);
return c;
}
static void FreeConfig(const SherpaOnnxOfflineRecognizerConfig &c) {
SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.encoder);
SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.decoder);
SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.joiner);
SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.model);
SHERPA_ONNX_DELETE_C_STR(c.model_config.nemo_ctc.model);
SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.encoder);
SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.decoder);
SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.language);
SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.task);
SHERPA_ONNX_DELETE_C_STR(c.model_config.tdnn.model);
SHERPA_ONNX_DELETE_C_STR(c.model_config.sense_voice.model);
SHERPA_ONNX_DELETE_C_STR(c.model_config.sense_voice.language);
SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.preprocessor);
SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.encoder);
SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.uncached_decoder);
SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.cached_decoder);
SHERPA_ONNX_DELETE_C_STR(c.model_config.fire_red_asr.encoder);
SHERPA_ONNX_DELETE_C_STR(c.model_config.fire_red_asr.decoder);
SHERPA_ONNX_DELETE_C_STR(c.model_config.dolphin.model);
SHERPA_ONNX_DELETE_C_STR(c.model_config.zipformer_ctc.model);
SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.encoder);
SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.decoder);
SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.src_lang);
SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.tgt_lang);
SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens);
SHERPA_ONNX_DELETE_C_STR(c.model_config.provider);
SHERPA_ONNX_DELETE_C_STR(c.model_config.model_type);
SHERPA_ONNX_DELETE_C_STR(c.model_config.modeling_unit);
SHERPA_ONNX_DELETE_C_STR(c.model_config.bpe_vocab);
SHERPA_ONNX_DELETE_C_STR(c.model_config.telespeech_ctc);
SHERPA_ONNX_DELETE_C_STR(c.lm_config.model);
SHERPA_ONNX_DELETE_C_STR(c.decoding_method);
SHERPA_ONNX_DELETE_C_STR(c.hotwords_file);
SHERPA_ONNX_DELETE_C_STR(c.rule_fsts);
SHERPA_ONNX_DELETE_C_STR(c.rule_fars);
SHERPA_ONNX_DELETE_C_STR(c.hr.dict_dir);
SHERPA_ONNX_DELETE_C_STR(c.hr.lexicon);
SHERPA_ONNX_DELETE_C_STR(c.hr.rule_fsts);
}
static Napi::External<SherpaOnnxOfflineRecognizer>
CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
@@ -274,20 +368,7 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {
Napi::Object o = info[0].As<Napi::Object>();
SherpaOnnxOfflineRecognizerConfig c;
memset(&c, 0, sizeof(c));
c.feat_config = GetFeatureConfig(o);
c.model_config = GetOfflineModelConfig(o);
c.lm_config = GetOfflineLMConfig(o);
c.hr = GetHomophoneReplacerConfig(o);
SHERPA_ONNX_ASSIGN_ATTR_STR(decoding_method, decodingMethod);
SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active_paths, maxActivePaths);
SHERPA_ONNX_ASSIGN_ATTR_STR(hotwords_file, hotwordsFile);
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(hotwords_score, hotwordsScore);
SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fsts, ruleFsts);
SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars);
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(blank_penalty, blankPenalty);
SherpaOnnxOfflineRecognizerConfig c = ParseConfig(o);
#if __OHOS__
std::unique_ptr<NativeResourceManager,
@@ -302,51 +383,7 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {
SherpaOnnxCreateOfflineRecognizer(&c);
#endif
SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.encoder);
SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.decoder);
SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.joiner);
SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.model);
SHERPA_ONNX_DELETE_C_STR(c.model_config.nemo_ctc.model);
SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.encoder);
SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.decoder);
SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.language);
SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.task);
SHERPA_ONNX_DELETE_C_STR(c.model_config.tdnn.model);
SHERPA_ONNX_DELETE_C_STR(c.model_config.sense_voice.model);
SHERPA_ONNX_DELETE_C_STR(c.model_config.sense_voice.language);
SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.preprocessor);
SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.encoder);
SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.uncached_decoder);
SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.cached_decoder);
SHERPA_ONNX_DELETE_C_STR(c.model_config.fire_red_asr.encoder);
SHERPA_ONNX_DELETE_C_STR(c.model_config.fire_red_asr.decoder);
SHERPA_ONNX_DELETE_C_STR(c.model_config.dolphin.model);
SHERPA_ONNX_DELETE_C_STR(c.model_config.zipformer_ctc.model);
SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens);
SHERPA_ONNX_DELETE_C_STR(c.model_config.provider);
SHERPA_ONNX_DELETE_C_STR(c.model_config.model_type);
SHERPA_ONNX_DELETE_C_STR(c.model_config.modeling_unit);
SHERPA_ONNX_DELETE_C_STR(c.model_config.bpe_vocab);
SHERPA_ONNX_DELETE_C_STR(c.model_config.telespeech_ctc);
SHERPA_ONNX_DELETE_C_STR(c.lm_config.model);
SHERPA_ONNX_DELETE_C_STR(c.decoding_method);
SHERPA_ONNX_DELETE_C_STR(c.hotwords_file);
SHERPA_ONNX_DELETE_C_STR(c.rule_fsts);
SHERPA_ONNX_DELETE_C_STR(c.rule_fars);
SHERPA_ONNX_DELETE_C_STR(c.hr.dict_dir);
SHERPA_ONNX_DELETE_C_STR(c.hr.lexicon);
SHERPA_ONNX_DELETE_C_STR(c.hr.rule_fsts);
FreeConfig(c);
if (!recognizer) {
Napi::TypeError::New(env, "Please check your config!")
@@ -470,6 +507,43 @@ static void AcceptWaveformOfflineWrapper(const Napi::CallbackInfo &info) {
#endif
}
static void OfflineRecognizerSetConfigWrapper(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 2) {
std::ostringstream os;
os << "Expect only 2 arguments. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return;
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env,
"Argument 0 should be an offline recognizer pointer.")
.ThrowAsJavaScriptException();
return;
}
if (!info[1].IsObject()) {
Napi::TypeError::New(env, "Expect an object as the second argument")
.ThrowAsJavaScriptException();
return;
}
Napi::Object o = info[1].As<Napi::Object>();
SherpaOnnxOfflineRecognizerConfig c = ParseConfig(o);
const SherpaOnnxOfflineRecognizer *recognizer =
info[0].As<Napi::External<SherpaOnnxOfflineRecognizer>>().Data();
SherpaOnnxOfflineRecognizerSetConfig(recognizer, &c);
FreeConfig(c);
}
static void DecodeOfflineStreamWrapper(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 2) {
@@ -548,6 +622,9 @@ void InitNonStreamingAsr(Napi::Env env, Napi::Object exports) {
exports.Set(Napi::String::New(env, "decodeOfflineStream"),
Napi::Function::New(env, DecodeOfflineStreamWrapper));
exports.Set(Napi::String::New(env, "offlineRecognizerSetConfig"),
Napi::Function::New(env, OfflineRecognizerSetConfigWrapper));
exports.Set(Napi::String::New(env, "getOfflineStreamResultAsJson"),
Napi::Function::New(env, GetOfflineStreamResultAsJsonWrapper));
}

View File

@@ -22,6 +22,7 @@ export const voiceActivityDetectorFlush: (handle: object) => void;
export const createOfflineRecognizer: (config: object, mgr?: object) => object;
export const createOfflineStream: (handle: object) => object;
export const offlineRecognizerSetConfig: (handle: object, config: object) => void;
export const acceptWaveformOffline: (handle: object, audio: object) => void;
export const decodeOfflineStream: (handle: object, streamHandle: object) => void;
export const getOfflineStreamResultAsJson: (streamHandle: object) => string;

View File

@@ -4,6 +4,7 @@ import {
createOfflineStream,
decodeOfflineStream,
getOfflineStreamResultAsJson,
offlineRecognizerSetConfig,
} from 'libsherpa_onnx.so';
export interface Samples {
@@ -67,6 +68,14 @@ export class OfflineWhisperModelConfig {
public tailPaddings: number = -1;
}
export class OfflineCanaryModelConfig {
public encoder: string = '';
public decoder: string = '';
public srcLang: string = '';
public tgtLang: string = '';
public usePnc: number = 1;
}
export class OfflineTdnnModelConfig {
public model: string = '';
}
@@ -102,6 +111,7 @@ export class OfflineModelConfig {
public moonshine: OfflineMoonshineModelConfig = new OfflineMoonshineModelConfig();
public dolphin: OfflineDolphinModelConfig = new OfflineDolphinModelConfig();
public zipformerCtc: OfflineZipformerCtcModelConfig = new OfflineZipformerCtcModelConfig();
public canary: OfflineCanaryModelConfig = new OfflineCanaryModelConfig();
}
export class OfflineLMConfig {
@@ -151,6 +161,10 @@ export class OfflineRecognizer {
this.config = config
}
setConfig(config: OfflineRecognizerConfig) {
offlineRecognizerSetConfig(this.handle, config);
}
createStream(): OfflineStream {
const handle: object = createOfflineStream(this.handle);
return new OfflineStream(handle);

View File

@@ -6,7 +6,7 @@
"author": "",
"license": "",
"dependencies": {
"sherpa_onnx": "1.12.4"
"sherpa_onnx": "1.12.5"
}
}

View File

@@ -6,7 +6,7 @@
"author": "",
"license": "",
"dependencies": {
"sherpa_onnx": "1.12.4",
"sherpa_onnx": "1.12.5",
}
}

View File

@@ -6,7 +6,7 @@
"author": "",
"license": "",
"dependencies": {
"sherpa_onnx": "1.12.4",
"sherpa_onnx": "1.12.5",
}
}

View File

@@ -6,7 +6,7 @@
"author": "",
"license": "",
"dependencies": {
"sherpa_onnx": "1.12.4",
"sherpa_onnx": "1.12.5",
}
}

View File

@@ -1,6 +1,6 @@
# Introduction
Please download ./sherpa_onnx-v1.12.4.har
Please download ./sherpa_onnx-v1.12.5.har
from <https://huggingface.co/csukuangfj/sherpa-onnx-harmony-os/tree/main/har>
Hint: For users who have no access to huggingface, please use

View File

@@ -7,7 +7,7 @@
"license": "",
"dependencies": {
// please see https://ohpm.openharmony.cn/#/cn/detail/sherpa_onnx
"sherpa_onnx": "1.12.4",
"sherpa_onnx": "1.12.5",
}
}

View File

@@ -0,0 +1,56 @@
// Copyright 2024 Xiaomi Corporation
// This file shows how to use an offline NeMo Canary model, i.e.,
// non-streaming NeMo Canary model, to decode files.
import com.k2fsa.sherpa.onnx.*;
public class NonStreamingDecodeFileNemoCanary {
public static void main(String[] args) {
// please refer to
// https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html
// to download model files
String encoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx";
String decoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx";
String tokens = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt";
String waveFilename = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav";
WaveReader reader = new WaveReader(waveFilename);
OfflineCanaryModelConfig canary =
OfflineCanaryModelConfig.builder()
.setEncoder(encoder)
.setDecoder(decoder)
.setSrcLang("en")
.setTgtLang("en")
.setUsePnc(true)
.build();
OfflineModelConfig modelConfig =
OfflineModelConfig.builder()
.setCanary(canary)
.setTokens(tokens)
.setNumThreads(1)
.setDebug(true)
.build();
OfflineRecognizerConfig config =
OfflineRecognizerConfig.builder()
.setOfflineModelConfig(modelConfig)
.setDecodingMethod("greedy_search")
.build();
OfflineRecognizer recognizer = new OfflineRecognizer(config);
OfflineStream stream = recognizer.createStream();
stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
recognizer.decode(stream);
String text = recognizer.getResult(stream).getText();
System.out.printf("filename:%s\nresult(English):%s\n", waveFilename, text);
stream.release();
recognizer.release();
}
}

View File

@@ -24,11 +24,18 @@ This directory contains examples for the JAVA API of sherpa-onnx.
```bash
./run-non-streaming-decode-file-dolphin-ctc.sh
./run-non-streaming-decode-file-fire-red-asr.sh
./run-non-streaming-decode-file-moonshine.sh
./run-non-streaming-decode-file-nemo-canary.sh
./run-non-streaming-decode-file-nemo.sh
./run-non-streaming-decode-file-paraformer.sh
./run-non-streaming-decode-file-sense-voice.sh
./run-non-streaming-decode-file-tele-speech-ctc.sh
./run-non-streaming-decode-file-transducer-hotwords.sh
./run-non-streaming-decode-file-transducer.sh
./run-non-streaming-decode-file-whisper-multiple.sh
./run-non-streaming-decode-file-whisper.sh
./run-non-streaming-decode-file-nemo.sh
./run-non-streaming-decode-file-zipformer-ctc.sh
```
## Non-Streaming Speech recognition with homophone replacer

View File

@@ -0,0 +1,37 @@
#!/usr/bin/env bash
set -ex
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
mkdir -p ../build
pushd ../build
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..
make -j4
ls -lh lib
popd
fi
if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
pushd ../sherpa-onnx/java-api
make
popd
fi
if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
fi
java \
-Djava.library.path=$PWD/../build/lib \
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
NonStreamingDecodeFileNemoCanary.java

View File

@@ -2,8 +2,8 @@ jdk:
- openjdk17
before_install:
- wget https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.4/sherpa-onnx-1.12.4.aar
- wget https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.5/sherpa-onnx-1.12.5.aar
install:
- FILE="-Dfile=sherpa-onnx-1.12.4.aar"
- mvn install:install-file $FILE -DgroupId=com.k2fsa.sherpa.onnx -DartifactId=sherpa-onnx -Dversion=1.12.4 -Dpackaging=aar -DgeneratePom=true
- FILE="-Dfile=sherpa-onnx-1.12.5.aar"
- mvn install:install-file $FILE -DgroupId=com.k2fsa.sherpa.onnx -DartifactId=sherpa-onnx -Dversion=1.12.5 -Dpackaging=aar -DgeneratePom=true

View File

@@ -455,8 +455,31 @@ function testOfflineSenseVoiceWithHr() {
ls -lh $out_filename
java -Djava.library.path=../build/lib -jar $out_filename
}
testVersion
function testOfflineNeMoCanary() {
if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
fi
out_filename=test_offline_nemo_canary.jar
kotlinc-jvm -include-runtime -d $out_filename \
test_offline_nemo_canary.kt \
FeatureConfig.kt \
HomophoneReplacerConfig.kt \
OfflineRecognizer.kt \
OfflineStream.kt \
WaveReader.kt \
faked-asset-manager.kt
ls -lh $out_filename
java -Djava.library.path=../build/lib -jar $out_filename
}
# testVersion
testOfflineNeMoCanary
testOfflineSenseVoiceWithHr
testOfflineSpeechDenoiser
testOfflineSpeakerDiarization

View File

@@ -0,0 +1,48 @@
package com.k2fsa.sherpa.onnx
fun main() {
val recognizer = createOfflineRecognizer()
val waveFilename = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav"
val objArray = WaveReader.readWaveFromFile(
filename = waveFilename,
)
val samples: FloatArray = objArray[0] as FloatArray
val sampleRate: Int = objArray[1] as Int
var stream = recognizer.createStream()
stream.acceptWaveform(samples, sampleRate=sampleRate)
recognizer.decode(stream)
var result = recognizer.getResult(stream)
println("English: $result")
stream.release()
// now output text in German
val config = recognizer.config.copy(modelConfig=recognizer.config.modelConfig.copy(
canary=recognizer.config.modelConfig.canary.copy(
tgtLang="de"
)
))
recognizer.setConfig(config)
stream = recognizer.createStream()
stream.acceptWaveform(samples, sampleRate=sampleRate)
recognizer.decode(stream)
result = recognizer.getResult(stream)
println("German: $result")
stream.release()
recognizer.release()
}
fun createOfflineRecognizer(): OfflineRecognizer {
val config = OfflineRecognizerConfig(
modelConfig = getOfflineModelConfig(type = 32)!!,
)
return OfflineRecognizer(config = config)
}

View File

@@ -5,9 +5,9 @@ for speech recognition.
|Directory| Pre-built exe (x64)|Pre-built exe (x86)| Description|
|---------|--------------------|-------------------|------------|
|[./NonStreamingSpeechRecognition](./NonStreamingSpeechRecognition)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.4/sherpa-onnx-non-streaming-asr-x64-v1.12.4.exe)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.4/sherpa-onnx-non-streaming-asr-x86-v1.12.4.exe)| Non-streaming speech recognition|
|[./StreamingSpeechRecognition](./StreamingSpeechRecognition)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.4/sherpa-onnx-streaming-asr-x64-v1.12.4.exe)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.4/sherpa-onnx-streaming-asr-x86-v1.12.4.exe)| Streaming speech recognition|
|[./NonStreamingTextToSpeech](./NonStreamingTextToSpeech)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.4/sherpa-onnx-non-streaming-tts-x64-v1.12.4.exe)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.4/sherpa-onnx-non-streaming-tts-x86-v1.12.4.exe)| Non-streaming text to speech|
|[./NonStreamingSpeechRecognition](./NonStreamingSpeechRecognition)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.5/sherpa-onnx-non-streaming-asr-x64-v1.12.5.exe)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.5/sherpa-onnx-non-streaming-asr-x86-v1.12.5.exe)| Non-streaming speech recognition|
|[./StreamingSpeechRecognition](./StreamingSpeechRecognition)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.5/sherpa-onnx-streaming-asr-x64-v1.12.5.exe)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.5/sherpa-onnx-streaming-asr-x86-v1.12.5.exe)| Streaming speech recognition|
|[./NonStreamingTextToSpeech](./NonStreamingTextToSpeech)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.5/sherpa-onnx-non-streaming-tts-x64-v1.12.5.exe)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.5/sherpa-onnx-non-streaming-tts-x86-v1.12.5.exe)| Non-streaming text to speech|
Caution: You need to use Windows and install Visual Studio 2022 in order to
compile it.

View File

@@ -1,8 +1,8 @@
#!/usr/bin/env bash
set -ex
old_version="1\.12\.3"
new_version="1\.12\.4"
old_version="1\.12\.4"
new_version="1\.12\.5"
replace_str="s/$old_version/$new_version/g"
sed -i.bak "$replace_str" ./sherpa-onnx/csrc/version.cc

View File

@@ -123,6 +123,7 @@ The following tables list the examples in this folder.
|[./test_asr_non_streaming_moonshine.js](./test_asr_non_streaming_moonshine.js)|Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine)|
|[./test_vad_with_non_streaming_asr_moonshine.js](./test_vad_with_non_streaming_asr_moonshine.js)| Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine) + [Silero VAD](https://github.com/snakers4/silero-vad)|
|[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search|
|[./test_asr_non_streaming_nemo_canary.js](./test_asr_non_streaming_nemo_canary.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [Canary](https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html#sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8-english-spanish-german-french) model|
|[./test_asr_non_streaming_zipformer_ctc.js](./test_asr_non_streaming_zipformer_ctc.js)|Non-streaming speech recognition from a file using a Zipformer CTC model with greedy search|
|[./test_asr_non_streaming_nemo_parakeet_tdt_v2.js](./test_asr_non_streaming_nemo_parakeet_tdt_v2.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [parakeet-tdt-0.6b-v2](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/nemo-transducer-models.html#sherpa-onnx-nemo-parakeet-tdt-0-6b-v2-int8-english) model with greedy search|
|[./test_asr_non_streaming_dolphin_ctc.js](./test_asr_non_streaming_dolphin_ctc.js)|Non-streaming speech recognition from a file using a [Dolphinhttps://github.com/DataoceanAI/Dolphin]) CTC model with greedy search|
@@ -389,6 +390,16 @@ npm install naudiodon2
node ./test_vad_asr_non_streaming_zipformer_ctc_microphone.js
```
### Non-streaming speech recognition with NeMo Canary models
```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
node ./test_asr_non_streaming_nemo_canary.js
```
### Non-streaming speech recognition with NeMo CTC models
```bash

View File

@@ -1,5 +1,5 @@
{
"dependencies": {
"sherpa-onnx-node": "^1.12.4"
"sherpa-onnx-node": "^1.12.5"
}
}

View File

@@ -0,0 +1,62 @@
// Copyright (c) 2024 Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');
// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
'featConfig': {
'sampleRate': 16000,
'featureDim': 80,
},
'modelConfig': {
'canary': {
'encoder':
'./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx',
'decoder':
'./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx',
'srcLang': 'en',
'tgtLang': 'en',
'usePnc': 1,
},
'tokens':
'./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt',
'numThreads': 2,
'provider': 'cpu',
'debug': 0,
}
};
const waveFilename =
'./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav';
const recognizer = new sherpa_onnx.OfflineRecognizer(config);
console.log('Started')
let start = Date.now();
let stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
recognizer.decode(stream);
let result = recognizer.getResult(stream)
let stop = Date.now();
console.log('Done')
const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds')
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds')
console.log(
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
real_time_factor.toFixed(3))
console.log(waveFilename)
console.log('result (English)\n', result)
stream = recognizer.createStream();
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
recognizer.config.modelConfig.canary.tgtLang = 'de';
recognizer.setConfig(recognizer.config);
recognizer.decode(stream);
result = recognizer.getResult(stream)
console.log('result (German)\n', result)

View File

@@ -63,7 +63,7 @@ for text-to-speech.
You can use the following command to run it:
```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
tar xf kokoro-en-v0_19.tar.bz2
rm kokoro-en-v0_19.tar.bz2
@@ -154,6 +154,22 @@ rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
node ./test-offline-dolphin-ctc.js
```
## ./test-offline-nemo-canary.js
[./test-offline-nemo-canary.js](./test-offline-nemo-canary.js) demonstrates
how to decode a file with a NeMo Canary model. In the code we use
[sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8](https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html#sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8-english-spanish-german-french).
You can use the following command to run it:
```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
node ./test-offline-nemo-canary.js
```
## ./test-offline-zipformer-ctc.js
[./test-offline-zipformer-ctc.js](./test-offline-zipformer-ctc.js) demonstrates

View File

@@ -2,7 +2,7 @@
"dependencies": {
"mic": "^2.1.2",
"naudiodon2": "^2.4.0",
"sherpa-onnx": "^1.12.4",
"sherpa-onnx": "^1.12.5",
"wav": "^1.0.2"
}
}

View File

@@ -0,0 +1,56 @@
// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
//
const fs = require('fs');
const {Readable} = require('stream');
const wav = require('wav');
const sherpa_onnx = require('sherpa-onnx');
function createOfflineRecognizer() {
let config = {
modelConfig: {
canary: {
encoder:
'./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx',
decoder:
'./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx',
srcLang: 'en',
tgtLang: 'en',
usePnc: 1,
},
debug: 0,
tokens:
'./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt',
}
};
return sherpa_onnx.createOfflineRecognizer(config);
}
const recognizer = createOfflineRecognizer();
let stream = recognizer.createStream();
const waveFilename =
'./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav';
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);
recognizer.decode(stream);
let text = recognizer.getResult(stream).text;
console.log(`text in English: ${text}`);
stream.free();
// now output German text
recognizer.config.modelConfig.canary.tgtLang = 'de';
recognizer.setConfig(recognizer.config);
stream = recognizer.createStream();
stream.acceptWaveform(wave.sampleRate, wave.samples);
recognizer.decode(stream);
text = recognizer.getResult(stream).text;
console.log(`text in German: ${text}`);
stream.free();
recognizer.free();

View File

@@ -10,3 +10,4 @@ telespeech_ctc
moonshine
dolphin_ctc
zipformer_ctc
nemo_canary

View File

@@ -0,0 +1,107 @@
{ Copyright (c) 2025 Xiaomi Corporation }
{
This file shows how to use a non-streaming NeMo Canary model
to decode files.
You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}
program nemo_canary;
{$mode objfpc}
uses
sherpa_onnx,
DateUtils,
SysUtils;
var
Wave: TSherpaOnnxWave;
WaveFilename: AnsiString;
Config: TSherpaOnnxOfflineRecognizerConfig;
Recognizer: TSherpaOnnxOfflineRecognizer;
Stream: TSherpaOnnxOfflineStream;
RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
Start: TDateTime;
Stop: TDateTime;
Elapsed: Single;
Duration: Single;
RealTimeFactor: Single;
begin
Initialize(Config);
Config.ModelConfig.Canary.Encoder := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx';
Config.ModelConfig.Canary.Decoder := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx';
Config.ModelConfig.Canary.SrcLang := 'en';
Config.ModelConfig.Canary.TgtLang := 'en';
Config.ModelConfig.Canary.UsePnc := True;
Config.ModelConfig.Tokens := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt';
Config.ModelConfig.Provider := 'cpu';
Config.ModelConfig.NumThreads := 1;
Config.ModelConfig.Debug := False;
WaveFilename := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav';
Wave := SherpaOnnxReadWave(WaveFilename);
Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
Stream := Recognizer.CreateStream();
Start := Now;
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
Recognizer.Decode(Stream);
RecognitionResult := Recognizer.GetResult(Stream);
Stop := Now;
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
Duration := Length(Wave.Samples) / Wave.SampleRate;
RealTimeFactor := Elapsed / Duration;
WriteLn(RecognitionResult.ToString);
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
WriteLn(Format('Wave duration %.3f s', [Duration]));
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
FreeAndNil(Stream);
WriteLn('-----------Output German-----');
Stream := Recognizer.CreateStream();
Start := Now;
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
Config.ModelConfig.Canary.TgtLang := 'de';
Recognizer.SetConfig(Config);
Recognizer.Decode(Stream);
RecognitionResult := Recognizer.GetResult(Stream);
Stop := Now;
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
Duration := Length(Wave.Samples) / Wave.SampleRate;
RealTimeFactor := Elapsed / Duration;
WriteLn(RecognitionResult.ToString);
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
WriteLn(Format('Wave duration %.3f s', [Duration]));
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
{Free resources to avoid memory leak.
Note: You don't need to invoke them for this simple script.
However, you have to invoke them in your own large/complex project.
}
FreeAndNil(Stream);
FreeAndNil(Recognizer);
end.

View File

@@ -0,0 +1,42 @@
#!/usr/bin/env bash
set -ex
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
mkdir -p ../../build
pushd ../../build
cmake \
-DCMAKE_INSTALL_PREFIX=./install \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
..
cmake --build . --target install --config Release
ls -lh lib
popd
fi
if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
fi
fpc \
-dSHERPA_ONNX_USE_SHARED_LIBS \
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
-Fl$SHERPA_ONNX_DIR/build/install/lib \
./nemo_canary.pas
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
./nemo_canary

View File

@@ -4,7 +4,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.k2fsa.sherpa.onnx</groupId>
<artifactId>sherpa-onnx-android</artifactId>
<version>1.12.4</version>
<version>1.12.5</version>
<url>https://github.com/k2-fsa/sherpa-onnx</url>
<packaging>pom</packaging>
<description>First Android Library</description>

View File

@@ -35,6 +35,25 @@ file(s) with a non-streaming model.
/path/to/0.wav \
/path/to/1.wav
also with RNN LM rescoring and LODR (optional):
./python-api-examples/offline-decode-files.py \
--tokens=/path/to/tokens.txt \
--encoder=/path/to/encoder.onnx \
--decoder=/path/to/decoder.onnx \
--joiner=/path/to/joiner.onnx \
--num-threads=2 \
--decoding-method=modified_beam_search \
--debug=false \
--sample-rate=16000 \
--feature-dim=80 \
--lm=/path/to/lm.onnx \
--lm-scale=0.1 \
--lodr-fst=/path/to/lodr.fst \
--lodr-scale=-0.1 \
/path/to/0.wav \
/path/to/1.wav
(3) For CTC models from NeMo
python3 ./python-api-examples/offline-decode-files.py \
@@ -269,6 +288,39 @@ def get_args():
default="greedy_search",
help="Valid values are greedy_search and modified_beam_search",
)
parser.add_argument(
"--lm",
metavar="file",
type=str,
default="",
help="Path to RNN LM model",
)
parser.add_argument(
"--lm-scale",
metavar="lm_scale",
type=float,
default=0.1,
help="LM model scale for rescoring",
)
parser.add_argument(
"--lodr-fst",
metavar="file",
type=str,
default="",
help="Path to LODR FST model. Used only when --lm is given.",
)
parser.add_argument(
"--lodr-scale",
metavar="lodr_scale",
type=float,
default=-0.1,
help="LODR scale for rescoring.Used only when --lodr_fst is given.",
)
parser.add_argument(
"--debug",
type=bool,
@@ -364,6 +416,10 @@ def main():
num_threads=args.num_threads,
sample_rate=args.sample_rate,
feature_dim=args.feature_dim,
lm=args.lm,
lm_scale=args.lm_scale,
lodr_fst=args.lodr_fst,
lodr_scale=args.lodr_scale,
decoding_method=args.decoding_method,
hotwords_file=args.hotwords_file,
hotwords_score=args.hotwords_score,

View File

@@ -21,6 +21,22 @@ rm sherpa-onnx-streaming-zipformer-en-2023-06-26.tar.bz2
./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/1.wav \
./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/8k.wav
or with RNN LM rescoring and LODR:
./python-api-examples/online-decode-files.py \
--tokens=./sherpa-onnx-streaming-zipformer-en-2023-06-26/tokens.txt \
--encoder=./sherpa-onnx-streaming-zipformer-en-2023-06-26/encoder-epoch-99-avg-1-chunk-16-left-64.onnx \
--decoder=./sherpa-onnx-streaming-zipformer-en-2023-06-26/decoder-epoch-99-avg-1-chunk-16-left-64.onnx \
--joiner=./sherpa-onnx-streaming-zipformer-en-2023-06-26/joiner-epoch-99-avg-1-chunk-16-left-64.onnx \
--decoding-method=modified_beam_search \
--lm=/path/to/lm.onnx \
--lm-scale=0.1 \
--lodr-fst=/path/to/lodr.fst \
--lodr-scale=-0.1 \
./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/0.wav \
./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/1.wav \
./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/8k.wav
(2) Streaming paraformer
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
@@ -186,6 +202,22 @@ def get_args():
""",
)
parser.add_argument(
"--lodr-fst",
metavar="file",
type=str,
default="",
help="Path to LODR FST model. Used only when --lm is given.",
)
parser.add_argument(
"--lodr-scale",
metavar="lodr_scale",
type=float,
default=-0.1,
help="LODR scale for rescoring.Used only when --lodr_fst is given.",
)
parser.add_argument(
"--provider",
type=str,
@@ -320,6 +352,8 @@ def main():
max_active_paths=args.max_active_paths,
lm=args.lm,
lm_scale=args.lm_scale,
lodr_fst=args.lodr_fst,
lodr_scale=args.lodr_scale,
hotwords_file=args.hotwords_file,
hotwords_score=args.hotwords_score,
modeling_unit=args.modeling_unit,

View File

@@ -565,6 +565,38 @@ def get_models():
ls -lh
popd
""",
),
Model(
model_name="sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8",
idx=33,
lang="en",
lang2="English",
short_name="parakeet_tdt_ctc_110m",
cmd="""
pushd $model_name
rm -rfv test_wavs
ls -lh
popd
""",
),
Model(
model_name="sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8",
idx=34,
lang="ja",
lang2="Japanese",
short_name="parakeet-tdt_ctc_0.6b_ja",
cmd="""
pushd $model_name
rm -rfv test_wavs
ls -lh
popd
""",
),

View File

@@ -0,0 +1,32 @@
/// Copyright (c) 2024.5 by 东风破
using System.Runtime.InteropServices;
namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct OfflineCanaryModelConfig
{
public OfflineCanaryModelConfig()
{
Encoder = "";
Decoder = "";
SrcLang = "en";
TgtLang = "en";
UsePnc = 1;
}
[MarshalAs(UnmanagedType.LPStr)]
public string Encoder;
[MarshalAs(UnmanagedType.LPStr)]
public string Decoder;
[MarshalAs(UnmanagedType.LPStr)]
public string SrcLang;
[MarshalAs(UnmanagedType.LPStr)]
public string TgtLang;
public int UsePnc;
}
}

View File

@@ -28,6 +28,7 @@ namespace SherpaOnnx
FireRedAsr = new OfflineFireRedAsrModelConfig();
Dolphin = new OfflineDolphinModelConfig();
ZipformerCtc = new OfflineZipformerCtcModelConfig();
Canary = new OfflineCanaryModelConfig();
}
public OfflineTransducerModelConfig Transducer;
public OfflineParaformerModelConfig Paraformer;
@@ -62,5 +63,6 @@ namespace SherpaOnnx
public OfflineFireRedAsrModelConfig FireRedAsr;
public OfflineDolphinModelConfig Dolphin;
public OfflineZipformerCtcModelConfig ZipformerCtc;
public OfflineCanaryModelConfig Canary;
}
}

View File

@@ -14,6 +14,11 @@ namespace SherpaOnnx
_handle = new HandleRef(this, h);
}
public void SetConfig(OfflineRecognizerConfig config)
{
SherpaOnnxOfflineRecognizerSetConfig(_handle.Handle, ref config);
}
public OfflineStream CreateStream()
{
IntPtr p = SherpaOnnxCreateOfflineStream(_handle.Handle);
@@ -65,6 +70,9 @@ namespace SherpaOnnx
[DllImport(Dll.Filename)]
private static extern IntPtr SherpaOnnxCreateOfflineRecognizer(ref OfflineRecognizerConfig config);
[DllImport(Dll.Filename)]
private static extern void SherpaOnnxOfflineRecognizerSetConfig(IntPtr handle, ref OfflineRecognizerConfig config);
[DllImport(Dll.Filename)]
private static extern void SherpaOnnxDestroyOfflineRecognizer(IntPtr handle);

Some files were not shown because too many files have changed in this diff Show More