diff --git a/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc-non-streaming.yaml b/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc-non-streaming.yaml index bbabfb60..c078630b 100644 --- a/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc-non-streaming.yaml +++ b/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc-non-streaming.yaml @@ -61,6 +61,11 @@ jobs: sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288 sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000 + sherpa-onnx-nemo-fast-conformer-ctc-en-24500-int8 + sherpa-onnx-nemo-fast-conformer-ctc-es-1424-int8 + sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288-int8 + sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8 + sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8 ) for m in ${models[@]}; do @@ -89,6 +94,11 @@ jobs: sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288 sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000 + sherpa-onnx-nemo-fast-conformer-ctc-en-24500-int8 + sherpa-onnx-nemo-fast-conformer-ctc-es-1424-int8 + sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288-int8 + sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8 + sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8 ) for d in ${dirs[@]}; do tar cjvf ${d}.tar.bz2 ./$d diff --git a/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc.yaml b/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc.yaml index 8c40a558..617b0765 100644 --- a/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc.yaml +++ b/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc.yaml @@ -54,13 +54,18 @@ jobs: curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/trans.txt popd - cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms - cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms - cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms - - tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms - tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms - tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms + names=( + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms-int8 + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms-int8 + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms-int8 + ) + for d in ${names[@]}; do + cp -av test_wavs $d/ + tar cjvf $d.tar.bz2 $d + done - name: Release uses: svenstaro/upload-release-action@v2 @@ -71,3 +76,41 @@ jobs: repo_name: k2-fsa/sherpa-onnx repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} tag: asr-models + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + models=( + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms-int8 + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms-int8 + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms-int8 + ) + + for m in ${models[@]}; do + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface + cp -av $m/* huggingface + cd huggingface + git lfs track "*.onnx" + git lfs track "*.wav" + git status + git add . + git status + git commit -m "first commit" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main + cd .. + done diff --git a/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer-non-streaming.yaml b/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer-non-streaming.yaml index 4a7e2339..d119358a 100644 --- a/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer-non-streaming.yaml +++ b/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer-non-streaming.yaml @@ -61,6 +61,11 @@ jobs: sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288 sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000 + sherpa-onnx-nemo-fast-conformer-transducer-en-24500-int8 + sherpa-onnx-nemo-fast-conformer-transducer-es-1424-int8 + sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288-int8 + sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8 + sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000-int8 ) for m in ${models[@]}; do @@ -88,6 +93,11 @@ jobs: sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288 sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000 + sherpa-onnx-nemo-fast-conformer-transducer-en-24500-int8 + sherpa-onnx-nemo-fast-conformer-transducer-es-1424-int8 + sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288-int8 + sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8 + sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000-int8 ) for d in ${dirs[@]}; do tar cjvf ${d}.tar.bz2 ./$d diff --git a/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer.yaml b/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer.yaml index 477de451..78071904 100644 --- a/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer.yaml +++ b/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer.yaml @@ -54,13 +54,18 @@ jobs: curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/trans.txt popd - cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms - cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms - cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms - - tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms - tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms - tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms + models=( + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms-int8 + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms-int8 + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms-int8 + ) + for m in ${models[@]}; do + cp -av test_wavs $m + tar cjvf $m.tar.bz2 $m + done - name: Release uses: svenstaro/upload-release-action@v2 @@ -71,3 +76,41 @@ jobs: repo_name: k2-fsa/sherpa-onnx repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} tag: asr-models + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + models=( + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms-int8 + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms-int8 + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms-int8 + ) + + for m in ${models[@]}; do + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface + cp -av $m/* huggingface + cd huggingface + git lfs track "*.onnx" + git lfs track "*.wav" + git status + git add . + git status + git commit -m "first commit" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main + cd .. + done diff --git a/.github/workflows/export-nemo-parakeet-tdt.yaml b/.github/workflows/export-nemo-parakeet-tdt.yaml new file mode 100644 index 00000000..afd43965 --- /dev/null +++ b/.github/workflows/export-nemo-parakeet-tdt.yaml @@ -0,0 +1,105 @@ +name: export-nemo-parakeet-tdt + +on: + push: + branches: + - refactor-export-nemo + workflow_dispatch: + +concurrency: + group: export-nemo-parakeet-tdt-${{ github.ref }} + cancel-in-progress: true + +jobs: + export-nemo-parakeet-tdt-0_6b-v2: + if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' + name: parakeet tdt + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [macos-latest] + python-version: ["3.10"] + + steps: + - uses: actions/checkout@v4 + + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install python dependencies + shell: bash + run: | + pip install \ + nemo_toolkit['asr'] \ + "numpy<2" \ + ipython \ + kaldi-native-fbank \ + librosa \ + onnx==1.17.0 \ + onnxmltools==1.13.0 \ + onnxruntime==1.17.1 \ + soundfile + + - name: Run + shell: bash + run: | + cd scripts/nemo/parakeet-tdt_ctc-0.6b-ja + ./run-ctc.sh + + - name: Collect files + shell: bash + run: | + models=( + sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8 + ) + for m in ${models[@]}; do + mv -v scripts/nemo/parakeet-tdt_ctc-0.6b-ja/$m . + tar cjfv $m.tar.bz2 $m + done + + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + models=( + sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8 + ) + + for m in ${models[@]}; do + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface + cp -av $m/* huggingface + cd huggingface + git lfs track "*.onnx" + git lfs track "*.wav" + git status + git add . + git status + git commit -m "first commit" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main + cd .. + done + + - name: Release + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + file: ./*.tar.bz2 + overwrite: true + repo_name: k2-fsa/sherpa-onnx + repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} + tag: asr-models diff --git a/scripts/apk/generate-vad-asr-apk-script.py b/scripts/apk/generate-vad-asr-apk-script.py index 67079797..1c442366 100755 --- a/scripts/apk/generate-vad-asr-apk-script.py +++ b/scripts/apk/generate-vad-asr-apk-script.py @@ -565,6 +565,38 @@ def get_models(): ls -lh + popd + """, + ), + Model( + model_name="sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8", + idx=33, + lang="en", + lang2="English", + short_name="parakeet_tdt_ctc_110m", + cmd=""" + pushd $model_name + + rm -rfv test_wavs + + ls -lh + + popd + """, + ), + Model( + model_name="sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8", + idx=34, + lang="ja", + lang2="Japanese", + short_name="parakeet-tdt_ctc_0.6b_ja", + cmd=""" + pushd $model_name + + rm -rfv test_wavs + + ls -lh + popd """, ), diff --git a/scripts/nemo/fast-conformer-hybrid-transducer-ctc/README.md b/scripts/nemo/fast-conformer-hybrid-transducer-ctc/README.md index d156d7e3..22d6fd78 100644 --- a/scripts/nemo/fast-conformer-hybrid-transducer-ctc/README.md +++ b/scripts/nemo/fast-conformer-hybrid-transducer-ctc/README.md @@ -23,5 +23,6 @@ This folder contains scripts for exporting models from - https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_multilingual_fastconformer_hybrid_large_pc - https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/parakeet-tdt_ctc-110m + - https://huggingface.co/nvidia/parakeet-tdt_ctc-0.6b-ja to `sherpa-onnx`. diff --git a/scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-ctc-non-streaming.py b/scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-ctc-non-streaming.py index fe16d074..35fb94b4 100755 --- a/scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-ctc-non-streaming.py +++ b/scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-ctc-non-streaming.py @@ -6,6 +6,7 @@ from typing import Dict import nemo.collections.asr as nemo_asr import onnx import torch +from onnxruntime.quantization import QuantType, quantize_dynamic def get_args(): @@ -86,6 +87,12 @@ def main(): } add_meta_data(filename, meta_data) + quantize_dynamic( + model_input="./model.onnx", + model_output="./model.int8.onnx", + weight_type=QuantType.QUInt8, + ) + print("preprocessor", asr_model.cfg.preprocessor) print(meta_data) diff --git a/scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-ctc.py b/scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-ctc.py index 622e1059..ff9bddc3 100755 --- a/scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-ctc.py +++ b/scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-ctc.py @@ -6,6 +6,7 @@ from typing import Dict import nemo.collections.asr as nemo_asr import onnx import torch +from onnxruntime.quantization import QuantType, quantize_dynamic def get_args(): @@ -114,6 +115,11 @@ def main(): "comment": "Only the CTC branch is exported", } add_meta_data(filename, meta_data) + quantize_dynamic( + model_input="./model.onnx", + model_output="./model.int8.onnx", + weight_type=QuantType.QUInt8, + ) print(meta_data) diff --git a/scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-transducer-non-streaming.py b/scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-transducer-non-streaming.py index 0f00e776..6be3906a 100755 --- a/scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-transducer-non-streaming.py +++ b/scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-transducer-non-streaming.py @@ -6,6 +6,7 @@ from typing import Dict import nemo.collections.asr as nemo_asr import onnx import torch +from onnxruntime.quantization import QuantType, quantize_dynamic def get_args(): @@ -90,6 +91,13 @@ def main(): } add_meta_data("encoder.onnx", meta_data) + for m in ["encoder", "decoder", "joiner"]: + quantize_dynamic( + model_input=f"{m}.onnx", + model_output=f"{m}.int8.onnx", + weight_type=QuantType.QUInt8, + ) + print(meta_data) diff --git a/scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-transducer.py b/scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-transducer.py index 0b6ac3f1..81136f4b 100755 --- a/scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-transducer.py +++ b/scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-transducer.py @@ -6,6 +6,7 @@ from typing import Dict import nemo.collections.asr as nemo_asr import onnx import torch +from onnxruntime.quantization import QuantType, quantize_dynamic def get_args(): @@ -122,6 +123,13 @@ def main(): } add_meta_data("encoder.onnx", meta_data) + for m in ["encoder", "decoder", "joiner"]: + quantize_dynamic( + model_input=f"{m}.onnx", + model_output=f"{m}.int8.onnx", + weight_type=QuantType.QUInt8, + ) + print(meta_data) diff --git a/scripts/nemo/fast-conformer-hybrid-transducer-ctc/run-ctc-non-streaming.sh b/scripts/nemo/fast-conformer-hybrid-transducer-ctc/run-ctc-non-streaming.sh index 335f6449..92da9571 100755 --- a/scripts/nemo/fast-conformer-hybrid-transducer-ctc/run-ctc-non-streaming.sh +++ b/scripts/nemo/fast-conformer-hybrid-transducer-ctc/run-ctc-non-streaming.sh @@ -19,6 +19,12 @@ log "Process $name at $url" d=sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000 mkdir -p $d mv -v model.onnx $d/ +cp -v tokens.txt $d/ +ls -lh $d + +d=sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8 +mkdir -p $d +mv -v model.int8.onnx $d/ mv -v tokens.txt $d/ ls -lh $d @@ -33,6 +39,12 @@ log "Process $name at $url" d=sherpa-onnx-nemo-fast-conformer-ctc-en-24500 mkdir -p $d mv -v model.onnx $d/ +cp -v tokens.txt $d/ +ls -lh $d + +d=sherpa-onnx-nemo-fast-conformer-ctc-en-24500-int8 +mkdir -p $d +mv -v model.int8.onnx $d/ mv -v tokens.txt $d/ ls -lh $d @@ -45,6 +57,12 @@ doc="This collection contains the Spanish FastConformer Hybrid (CTC and Transduc d=sherpa-onnx-nemo-fast-conformer-ctc-es-1424 mkdir -p $d mv -v model.onnx $d/ +cp -v tokens.txt $d/ +ls -lh $d + +d=sherpa-onnx-nemo-fast-conformer-ctc-es-1424-int8 +mkdir -p $d +mv -v model.int8.onnx $d/ mv -v tokens.txt $d/ ls -lh $d @@ -57,6 +75,12 @@ doc="This collection contains the Multilingual FastConformer Hybrid (Transducer d=sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288 mkdir -p $d mv -v model.onnx $d/ +cp -v tokens.txt $d/ +ls -lh $d + +d=sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288-int8 +mkdir -p $d +mv -v model.int8.onnx $d/ mv -v tokens.txt $d/ ls -lh $d @@ -69,6 +93,12 @@ doc="This collection contains the Multilingual FastConformer Hybrid (Transducer d=sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k mkdir -p $d mv -v model.onnx $d/ +cp -v tokens.txt $d/ +ls -lh $d + +d=sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8 +mkdir -p $d +mv -v model.int8.onnx $d/ mv -v tokens.txt $d/ ls -lh $d @@ -92,6 +122,16 @@ mkdir -p $d/test_wavs cp en.wav $d/test_wavs/0.wav cp -v $data/en-english.wav $d/test_wavs/1.wav +d=sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8 +python3 ./test-onnx-ctc-non-streaming.py \ + --model $d/model.int8.onnx \ + --tokens $d/tokens.txt \ + --wav $data/en-english.wav +mkdir -p $d/test_wavs + +cp en.wav $d/test_wavs/0.wav +cp -v $data/en-english.wav $d/test_wavs/1.wav + d=sherpa-onnx-nemo-fast-conformer-ctc-en-24500 python3 ./test-onnx-ctc-non-streaming.py \ --model $d/model.onnx \ @@ -101,6 +141,15 @@ mkdir -p $d/test_wavs cp en.wav $d/test_wavs/0.wav cp -v $data/en-english.wav $d/test_wavs +d=sherpa-onnx-nemo-fast-conformer-ctc-en-24500-int8 +python3 ./test-onnx-ctc-non-streaming.py \ + --model $d/model.int8.onnx \ + --tokens $d/tokens.txt \ + --wav $data/en-english.wav +mkdir -p $d/test_wavs +cp en.wav $d/test_wavs/0.wav +cp -v $data/en-english.wav $d/test_wavs + d=sherpa-onnx-nemo-fast-conformer-ctc-es-1424 python3 ./test-onnx-ctc-non-streaming.py \ --model $d/model.onnx \ @@ -109,6 +158,14 @@ python3 ./test-onnx-ctc-non-streaming.py \ mkdir -p $d/test_wavs cp -v $data/es-spanish.wav $d/test_wavs +d=sherpa-onnx-nemo-fast-conformer-ctc-es-1424-int8 +python3 ./test-onnx-ctc-non-streaming.py \ + --model $d/model.int8.onnx \ + --tokens $d/tokens.txt \ + --wav $data/es-spanish.wav +mkdir -p $d/test_wavs +cp -v $data/es-spanish.wav $d/test_wavs + d=sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288 mkdir -p $d/test_wavs for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav; do @@ -119,6 +176,16 @@ for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav; do cp -v $data/$w $d/test_wavs done +d=sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288-int8 +mkdir -p $d/test_wavs +for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav; do + python3 ./test-onnx-ctc-non-streaming.py \ + --model $d/model.int8.onnx \ + --tokens $d/tokens.txt \ + --wav $data/$w + cp -v $data/$w $d/test_wavs +done + d=sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k mkdir -p $d/test_wavs for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav hr-croatian.wav it-italian.wav po-polish.wav ru-russian.wav uk-ukrainian.wav; do @@ -128,3 +195,13 @@ for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav hr-croatian.w --wav $data/$w cp -v $data/$w $d/test_wavs done + +d=sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8 +mkdir -p $d/test_wavs +for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav hr-croatian.wav it-italian.wav po-polish.wav ru-russian.wav uk-ukrainian.wav; do + python3 ./test-onnx-ctc-non-streaming.py \ + --model $d/model.int8.onnx \ + --tokens $d/tokens.txt \ + --wav $data/$w + cp -v $data/$w $d/test_wavs +done diff --git a/scripts/nemo/fast-conformer-hybrid-transducer-ctc/run-ctc.sh b/scripts/nemo/fast-conformer-hybrid-transducer-ctc/run-ctc.sh index 6a3f0a69..d54d16b4 100755 --- a/scripts/nemo/fast-conformer-hybrid-transducer-ctc/run-ctc.sh +++ b/scripts/nemo/fast-conformer-hybrid-transducer-ctc/run-ctc.sh @@ -17,11 +17,22 @@ ms=( for m in ${ms[@]}; do ./export-onnx-ctc.py --model $m d=sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-${m}ms + + d_int8=sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-${m}ms-int8 + if [ ! -f $d/model.onnx ]; then - mkdir -p $d + mkdir -p $d $d_int8 mv -v model.onnx $d/ - mv -v tokens.txt $d/ + cp -v tokens.txt $d/ + + mv -v model.int8.onnx $d_int8/ + mv -v tokens.txt $d_int8/ + + echo "---$d---" ls -lh $d + + echo "---$d_int8---" + ls -lh $d_int8 fi done @@ -29,8 +40,16 @@ done for m in ${ms[@]}; do d=sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-${m}ms + echo "---$d---" python3 ./test-onnx-ctc.py \ --model $d/model.onnx \ --tokens $d/tokens.txt \ --wav ./0.wav + + d=sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-${m}ms-int8 + echo "---$d---" + python3 ./test-onnx-ctc.py \ + --model $d/model.int8.onnx \ + --tokens $d/tokens.txt \ + --wav ./0.wav done diff --git a/scripts/nemo/fast-conformer-hybrid-transducer-ctc/run-transducer-non-streaming.sh b/scripts/nemo/fast-conformer-hybrid-transducer-ctc/run-transducer-non-streaming.sh index 059d97ce..54de08a9 100755 --- a/scripts/nemo/fast-conformer-hybrid-transducer-ctc/run-transducer-non-streaming.sh +++ b/scripts/nemo/fast-conformer-hybrid-transducer-ctc/run-transducer-non-streaming.sh @@ -18,7 +18,17 @@ log "Process $name at $url" ./export-onnx-transducer-non-streaming.py --model $name --doc "$doc" d=sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000 mkdir -p $d -mv -v *.onnx $d/ +mv -v encoder.onnx $d/ +mv -v decoder.onnx $d/ +mv -v joiner.onnx $d/ +cp -v tokens.txt $d/ +ls -lh $d + +d=sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000-int8 +mkdir -p $d +mv -v encoder.int8.onnx $d/ +mv -v decoder.int8.onnx $d/ +mv -v joiner.int8.onnx $d/ mv -v tokens.txt $d/ ls -lh $d @@ -32,7 +42,17 @@ log "Process $name at $url" d=sherpa-onnx-nemo-fast-conformer-transducer-en-24500 mkdir -p $d -mv -v *.onnx $d/ +mv -v encoder.onnx $d/ +mv -v decoder.onnx $d/ +mv -v joiner.onnx $d/ +cp -v tokens.txt $d/ +ls -lh $d + +d=sherpa-onnx-nemo-fast-conformer-transducer-en-24500-int8 +mkdir -p $d +mv -v encoder.int8.onnx $d/ +mv -v decoder.int8.onnx $d/ +mv -v joiner.int8.onnx $d/ mv -v tokens.txt $d/ ls -lh $d @@ -44,7 +64,17 @@ doc="This collection contains the Spanish FastConformer Hybrid (CTC and Transduc d=sherpa-onnx-nemo-fast-conformer-transducer-es-1424 mkdir -p $d -mv -v *.onnx $d/ +mv -v encoder.onnx $d/ +mv -v decoder.onnx $d/ +mv -v joiner.onnx $d/ +cp -v tokens.txt $d/ +ls -lh $d + +d=sherpa-onnx-nemo-fast-conformer-transducer-es-1424-int8 +mkdir -p $d +mv -v encoder.int8.onnx $d/ +mv -v decoder.int8.onnx $d/ +mv -v joiner.int8.onnx $d/ mv -v tokens.txt $d/ ls -lh $d @@ -56,7 +86,17 @@ doc="This collection contains the Multilingual FastConformer Hybrid (Transducer d=sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288 mkdir -p $d -mv -v *.onnx $d/ +mv -v encoder.onnx $d/ +mv -v decoder.onnx $d/ +mv -v joiner.onnx $d/ +cp -v tokens.txt $d/ +ls -lh $d + +d=sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288-int8 +mkdir -p $d +mv -v encoder.int8.onnx $d/ +mv -v decoder.int8.onnx $d/ +mv -v joiner.int8.onnx $d/ mv -v tokens.txt $d/ ls -lh $d @@ -68,7 +108,17 @@ doc="This collection contains the Multilingual FastConformer Hybrid (Transducer d=sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k mkdir -p $d -mv -v *.onnx $d/ +mv -v encoder.onnx $d/ +mv -v decoder.onnx $d/ +mv -v joiner.onnx $d/ +cp -v tokens.txt $d/ +ls -lh $d + +d=sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8 +mkdir -p $d +mv -v encoder.int8.onnx $d/ +mv -v decoder.int8.onnx $d/ +mv -v joiner.int8.onnx $d/ mv -v tokens.txt $d/ ls -lh $d @@ -101,6 +151,25 @@ mkdir -p $d/test_wavs cp en.wav $d/test_wavs/0.wav cp -v $data/en-english.wav $d/test_wavs +d=sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000-int8 +python3 ./test-onnx-transducer-non-streaming.py \ + --encoder $d/encoder.int8.onnx \ + --decoder $d/decoder.int8.onnx \ + --joiner $d/joiner.int8.onnx \ + --tokens $d/tokens.txt \ + --wav $data/en-english.wav + +python3 ./test-onnx-transducer-non-streaming.py \ + --encoder $d/encoder.int8.onnx \ + --decoder $d/decoder.int8.onnx \ + --joiner $d/joiner.int8.onnx \ + --tokens $d/tokens.txt \ + --wav ./en.wav + +mkdir -p $d/test_wavs +cp en.wav $d/test_wavs/0.wav +cp -v $data/en-english.wav $d/test_wavs + d=sherpa-onnx-nemo-fast-conformer-transducer-en-24500 python3 ./test-onnx-transducer-non-streaming.py \ --encoder $d/encoder.onnx \ @@ -112,6 +181,17 @@ mkdir -p $d/test_wavs cp en.wav $d/test_wavs/0.wav cp -v $data/en-english.wav $d/test_wavs +d=sherpa-onnx-nemo-fast-conformer-transducer-en-24500-int8 +python3 ./test-onnx-transducer-non-streaming.py \ + --encoder $d/encoder.int8.onnx \ + --decoder $d/decoder.int8.onnx \ + --joiner $d/joiner.int8.onnx \ + --tokens $d/tokens.txt \ + --wav $data/en-english.wav +mkdir -p $d/test_wavs +cp en.wav $d/test_wavs/0.wav +cp -v $data/en-english.wav $d/test_wavs + d=sherpa-onnx-nemo-fast-conformer-transducer-es-1424 python3 ./test-onnx-transducer-non-streaming.py \ --encoder $d/encoder.onnx \ @@ -122,6 +202,16 @@ python3 ./test-onnx-transducer-non-streaming.py \ mkdir -p $d/test_wavs cp -v $data/es-spanish.wav $d/test_wavs +d=sherpa-onnx-nemo-fast-conformer-transducer-es-1424-int8 +python3 ./test-onnx-transducer-non-streaming.py \ + --encoder $d/encoder.int8.onnx \ + --decoder $d/decoder.int8.onnx \ + --joiner $d/joiner.int8.onnx \ + --tokens $d/tokens.txt \ + --wav $data/es-spanish.wav +mkdir -p $d/test_wavs +cp -v $data/es-spanish.wav $d/test_wavs + d=sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288 mkdir -p $d/test_wavs for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav; do @@ -134,6 +224,18 @@ for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav; do cp -v $data/$w $d/test_wavs done +d=sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288-int8 +mkdir -p $d/test_wavs +for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav; do + python3 ./test-onnx-transducer-non-streaming.py \ + --encoder $d/encoder.int8.onnx \ + --decoder $d/decoder.int8.onnx \ + --joiner $d/joiner.int8.onnx \ + --tokens $d/tokens.txt \ + --wav $data/$w + cp -v $data/$w $d/test_wavs +done + d=sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k mkdir -p $d/test_wavs for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav hr-croatian.wav it-italian.wav po-polish.wav ru-russian.wav uk-ukrainian.wav; do @@ -145,3 +247,15 @@ for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav hr-croatian.w --wav $data/$w cp -v $data/$w $d/test_wavs done + +d=sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8 +mkdir -p $d/test_wavs +for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav hr-croatian.wav it-italian.wav po-polish.wav ru-russian.wav uk-ukrainian.wav; do + python3 ./test-onnx-transducer-non-streaming.py \ + --encoder $d/encoder.int8.onnx \ + --decoder $d/decoder.int8.onnx \ + --joiner $d/joiner.int8.onnx \ + --tokens $d/tokens.txt \ + --wav $data/$w + cp -v $data/$w $d/test_wavs +done diff --git a/scripts/nemo/fast-conformer-hybrid-transducer-ctc/run-transducer.sh b/scripts/nemo/fast-conformer-hybrid-transducer-ctc/run-transducer.sh index a9157fec..ce176caf 100755 --- a/scripts/nemo/fast-conformer-hybrid-transducer-ctc/run-transducer.sh +++ b/scripts/nemo/fast-conformer-hybrid-transducer-ctc/run-transducer.sh @@ -17,13 +17,24 @@ ms=( for m in ${ms[@]}; do ./export-onnx-transducer.py --model $m d=sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-${m}ms + d_int8=sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-${m}ms-int8 if [ ! -f $d/encoder.onnx ]; then - mkdir -p $d + mkdir -p $d $d_int8 mv -v encoder.onnx $d/ mv -v decoder.onnx $d/ mv -v joiner.onnx $d/ - mv -v tokens.txt $d/ + cp -v tokens.txt $d/ + + mv -v encoder.int8.onnx $d_int8/ + mv -v decoder.int8.onnx $d_int8/ + mv -v joiner.int8.onnx $d_int8/ + mv -v tokens.txt $d_int8/ + + echo "---$d---" ls -lh $d + + echo "---$d_int8---" + ls -lh $d_int8 fi done @@ -37,4 +48,12 @@ for m in ${ms[@]}; do --joiner $d/joiner.onnx \ --tokens $d/tokens.txt \ --wav ./0.wav + + d=sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-${m}ms-int8 + python3 ./test-onnx-transducer.py \ + --encoder $d/encoder.int8.onnx \ + --decoder $d/decoder.int8.onnx \ + --joiner $d/joiner.int8.onnx \ + --tokens $d/tokens.txt \ + --wav ./0.wav done diff --git a/scripts/nemo/parakeet-tdt_ctc-0.6b-ja/export-onnx-ctc.py b/scripts/nemo/parakeet-tdt_ctc-0.6b-ja/export-onnx-ctc.py new file mode 100755 index 00000000..1314fa66 --- /dev/null +++ b/scripts/nemo/parakeet-tdt_ctc-0.6b-ja/export-onnx-ctc.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) +import os +from typing import Dict + +import nemo.collections.asr as nemo_asr +import onnx +import torch +from onnxruntime.quantization import QuantType, quantize_dynamic + + +def add_meta_data(filename: str, meta_data: Dict[str, str]): + """Add meta data to an ONNX model. It is changed in-place. + + Args: + filename: + Filename of the ONNX model to be changed. + meta_data: + Key-value pairs. + """ + model = onnx.load(filename) + while len(model.metadata_props): + model.metadata_props.pop() + + for key, value in meta_data.items(): + meta = model.metadata_props.add() + meta.key = key + meta.value = str(value) + + onnx.save(model, filename) + + +@torch.no_grad() +def main(): + asr_model = nemo_asr.models.ASRModel.from_pretrained( + model_name="nvidia/parakeet-tdt_ctc-0.6b-ja" + ) + + print(asr_model.cfg) + print(asr_model) + + with open("./tokens.txt", "w", encoding="utf-8") as f: + for i, s in enumerate(asr_model.joint.vocabulary): + f.write(f"{s} {i}\n") + f.write(f" {i+1}\n") + print("Saved to tokens.txt") + + decoder_type = "ctc" + asr_model.change_decoding_strategy(decoder_type=decoder_type) + asr_model.eval() + + asr_model.set_export_config({"decoder_type": "ctc"}) + + filename = "model.onnx" + + asr_model.export(filename, onnx_opset_version=18) + + normalize_type = asr_model.cfg.preprocessor.normalize + if normalize_type == "NA": + normalize_type = "" + + meta_data = { + "vocab_size": asr_model.decoder.vocab_size, + "normalize_type": normalize_type, + "subsampling_factor": 8, + "model_type": "EncDecHybridRNNTCTCBPEModel", + "version": "1", + "model_author": "NeMo", + "url": "https://huggingface.co/nvidia/parakeet-tdt_ctc-0.6b-ja", + "comment": "Only the CTC branch is exported", + "doc": "See https://huggingface.co/nvidia/parakeet-tdt_ctc-0.6b-ja", + } + + os.system("ls -lh *.onnx") + + quantize_dynamic( + model_input="./model.onnx", + model_output="./model.int8.onnx", + weight_type=QuantType.QUInt8, + ) + + add_meta_data("model.int8.onnx", meta_data) + + os.system("ls -lh *.onnx") + + print("preprocessor", asr_model.cfg.preprocessor) + print(meta_data) + + +if __name__ == "__main__": + main() diff --git a/scripts/nemo/parakeet-tdt_ctc-0.6b-ja/run-ctc.sh b/scripts/nemo/parakeet-tdt_ctc-0.6b-ja/run-ctc.sh new file mode 100755 index 00000000..b970b00a --- /dev/null +++ b/scripts/nemo/parakeet-tdt_ctc-0.6b-ja/run-ctc.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +set -ex + +python3 ./export-onnx-ctc.py + +ls -lh *.onnx + +mkdir -p test_wavs +pushd test_wavs +curl -SL -O https://huggingface.co/csukuangfj/reazonspeech-k2-v2-ja-en/resolve/main/test_wavs/transcripts.txt +curl -SL -O https://hf-mirror.com/csukuangfj/reazonspeech-k2-v2-ja-en/resolve/main/test_wavs/test_ja_1.wav +curl -SL -O https://hf-mirror.com/csukuangfj/reazonspeech-k2-v2-ja-en/resolve/main/test_wavs/test_ja_2.wav +popd + +d=sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8 + +mkdir -p $d +mv -v model.int8.onnx $d/ +cp -v tokens.txt $d/ +cp -av test_wavs $d +ls -lh $d + + +d=sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8 +python3 ./test-onnx-ctc-non-streaming.py \ + --model $d/model.int8.onnx \ + --tokens $d/tokens.txt \ + --wav $d/test_wavs/test_ja_1.wav + +python3 ./test-onnx-ctc-non-streaming.py \ + --model $d/model.int8.onnx \ + --tokens $d/tokens.txt \ + --wav $d/test_wavs/test_ja_2.wav diff --git a/scripts/nemo/parakeet-tdt_ctc-0.6b-ja/test-onnx-ctc-non-streaming.py b/scripts/nemo/parakeet-tdt_ctc-0.6b-ja/test-onnx-ctc-non-streaming.py new file mode 120000 index 00000000..592162a3 --- /dev/null +++ b/scripts/nemo/parakeet-tdt_ctc-0.6b-ja/test-onnx-ctc-non-streaming.py @@ -0,0 +1 @@ +../fast-conformer-hybrid-transducer-ctc/test-onnx-ctc-non-streaming.py \ No newline at end of file diff --git a/sherpa-onnx/kotlin-api/OfflineRecognizer.kt b/sherpa-onnx/kotlin-api/OfflineRecognizer.kt index 0b6ac25a..96fff544 100644 --- a/sherpa-onnx/kotlin-api/OfflineRecognizer.kt +++ b/sherpa-onnx/kotlin-api/OfflineRecognizer.kt @@ -601,6 +601,26 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { tokens = "$modelDir/tokens.txt", ) } + + 33 -> { + val modelDir = "sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8" + return OfflineModelConfig( + nemo = OfflineNemoEncDecCtcModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 34 -> { + val modelDir = "sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8" + return OfflineModelConfig( + nemo = OfflineNemoEncDecCtcModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } } return null }