diff --git a/.github/workflows/apk-asr-2pass.yaml b/.github/workflows/apk-asr-2pass.yaml index 72885db4..7cb4253f 100644 --- a/.github/workflows/apk-asr-2pass.yaml +++ b/.github/workflows/apk-asr-2pass.yaml @@ -23,8 +23,8 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - total: ["4"] - index: ["0", "1", "2", "3"] + total: ["16"] + index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15"] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/apk-vad-asr.yaml b/.github/workflows/apk-vad-asr.yaml index fe706aa1..bf103111 100644 --- a/.github/workflows/apk-vad-asr.yaml +++ b/.github/workflows/apk-vad-asr.yaml @@ -23,8 +23,8 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - total: ["10"] - index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] + total: ["18"] + index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17"] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/run-java-test.yaml b/.github/workflows/run-java-test.yaml index 4b8dda24..d0345709 100644 --- a/.github/workflows/run-java-test.yaml +++ b/.github/workflows/run-java-test.yaml @@ -105,6 +105,16 @@ jobs: make -j4 ls -lh lib + - name: Run java test (VAD + Non-streaming Dolphin CTC) + shell: bash + run: | + cd ./java-api-examples + ./run-vad-non-streaming-dolphin-ctc.sh + rm *.onnx + ls -lh *.wav + rm *.wav + rm -rf sherpa-onnx-dolphin-* + - name: Run speech enhancement (GTCRN) shell: bash run: | @@ -135,6 +145,9 @@ jobs: run: | cd ./java-api-examples + ./run-non-streaming-decode-file-dolphin-ctc.sh + rm -rf sherpa-onnx-dolphin-* + ./run-non-streaming-decode-file-moonshine.sh rm -rf sherpa-onnx-moonshine-* diff --git a/.gitignore b/.gitignore index 2fbc184f..e642ec53 100644 --- a/.gitignore +++ b/.gitignore @@ -140,3 +140,4 @@ README-DEV.txt *.jit ##clion .idea +sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 diff --git a/java-api-examples/NonStreamingDecodeFileDolphinCtc.java b/java-api-examples/NonStreamingDecodeFileDolphinCtc.java new file mode 100644 index 00000000..dea8ae64 --- /dev/null +++ b/java-api-examples/NonStreamingDecodeFileDolphinCtc.java @@ -0,0 +1,49 @@ +// Copyright 2025 Xiaomi Corporation + +// This file shows how to use an offline Dolphin CTC model, i.e., +// non-streaming Dolphin CTC model, to decode files. +import com.k2fsa.sherpa.onnx.*; + +public class NonStreamingDecodeFileDolphinCtc { + public static void main(String[] args) { + // please refer to + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models + // to download model files + String model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx"; + String tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt"; + + String waveFilename = + "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav"; + + WaveReader reader = new WaveReader(waveFilename); + + OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().setModel(model).build(); + + OfflineModelConfig modelConfig = + OfflineModelConfig.builder() + .setDolphin(dolphin) + .setTokens(tokens) + .setNumThreads(1) + .setDebug(true) + .build(); + + OfflineRecognizerConfig config = + OfflineRecognizerConfig.builder() + .setOfflineModelConfig(modelConfig) + .setDecodingMethod("greedy_search") + .build(); + + OfflineRecognizer recognizer = new OfflineRecognizer(config); + OfflineStream stream = recognizer.createStream(); + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate()); + + recognizer.decode(stream); + + String text = recognizer.getResult(stream).getText(); + + System.out.printf("filename:%s\nresult:%s\n", waveFilename, text); + + stream.release(); + recognizer.release(); + } +} diff --git a/java-api-examples/README.md b/java-api-examples/README.md index 779c1b25..d0ea4a74 100755 --- a/java-api-examples/README.md +++ b/java-api-examples/README.md @@ -23,6 +23,7 @@ This directory contains examples for the JAVA API of sherpa-onnx. ## Non-Streaming Speech recognition ```bash +./run-non-streaming-decode-file-dolphin-ctc.sh ./run-non-streaming-decode-file-paraformer.sh ./run-non-streaming-decode-file-sense-voice.sh ./run-non-streaming-decode-file-transducer.sh @@ -102,6 +103,12 @@ The punctuation model supports both English and Chinese. ./run-vad-remove-slience.sh ``` +## VAD + Non-streaming Dolphin CTC for speech recognition + +```bash +./run-vad-non-streaming-dolphin-ctc.sh +``` + ## VAD + Non-streaming SenseVoice for speech recognition ```bash diff --git a/java-api-examples/VadNonStreamingDolphinCtc.java b/java-api-examples/VadNonStreamingDolphinCtc.java new file mode 100644 index 00000000..2183ee60 --- /dev/null +++ b/java-api-examples/VadNonStreamingDolphinCtc.java @@ -0,0 +1,123 @@ +// Copyright 2025 Xiaomi Corporation + +// This file shows how to use a silero_vad model with a non-streaming Dolphin +// CTC model for speech recognition. + +import com.k2fsa.sherpa.onnx.*; +import java.util.Arrays; + +public class VadNonStreamingSenseVoice { + public static Vad createVad() { + // please download ./silero_vad.onnx from + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models + String model = "./silero_vad.onnx"; + SileroVadModelConfig sileroVad = + SileroVadModelConfig.builder() + .setModel(model) + .setThreshold(0.5f) + .setMinSilenceDuration(0.25f) + .setMinSpeechDuration(0.5f) + .setWindowSize(512) + .setMaxSpeechDuration(5.0f) + .build(); + + VadModelConfig config = + VadModelConfig.builder() + .setSileroVadModelConfig(sileroVad) + .setSampleRate(16000) + .setNumThreads(1) + .setDebug(true) + .setProvider("cpu") + .build(); + + return new Vad(config); + } + + public static OfflineRecognizer createOfflineRecognizer() { + // please refer to + // https://k2-fsa.github.io/sherpa/onnx/dolphin/index.html + // to download model files + String model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx"; + String tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt"; + + OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().setModel(model).build(); + + OfflineModelConfig modelConfig = + OfflineModelConfig.builder() + .setDolphin(dolphin) + .setTokens(tokens) + .setNumThreads(1) + .setDebug(true) + .build(); + + OfflineRecognizerConfig config = + OfflineRecognizerConfig.builder() + .setOfflineModelConfig(modelConfig) + .setDecodingMethod("greedy_search") + .build(); + + return new OfflineRecognizer(config); + } + + public static void main(String[] args) { + + Vad vad = createVad(); + OfflineRecognizer recognizer = createOfflineRecognizer(); + + // You can download the test file from + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models + String testWaveFilename = "./lei-jun-test.wav"; + WaveReader reader = new WaveReader(testWaveFilename); + + int numSamples = reader.getSamples().length; + int numIter = numSamples / 512; + + for (int i = 0; i != numIter; ++i) { + int start = i * 512; + int end = start + 512; + float[] samples = Arrays.copyOfRange(reader.getSamples(), start, end); + vad.acceptWaveform(samples); + if (vad.isSpeechDetected()) { + while (!vad.empty()) { + SpeechSegment segment = vad.front(); + float startTime = segment.getStart() / 16000.0f; + float duration = segment.getSamples().length / 16000.0f; + + OfflineStream stream = recognizer.createStream(); + stream.acceptWaveform(segment.getSamples(), 16000); + recognizer.decode(stream); + String text = recognizer.getResult(stream).getText(); + stream.release(); + + if (!text.isEmpty()) { + System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text); + } + + vad.pop(); + } + } + } + + vad.flush(); + while (!vad.empty()) { + SpeechSegment segment = vad.front(); + float startTime = segment.getStart() / 16000.0f; + float duration = segment.getSamples().length / 16000.0f; + + OfflineStream stream = recognizer.createStream(); + stream.acceptWaveform(segment.getSamples(), 16000); + recognizer.decode(stream); + String text = recognizer.getResult(stream).getText(); + stream.release(); + + if (!text.isEmpty()) { + System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text); + } + + vad.pop(); + } + + vad.release(); + recognizer.release(); + } +} diff --git a/java-api-examples/run-non-streaming-decode-file-dolphin-ctc.sh b/java-api-examples/run-non-streaming-decode-file-dolphin-ctc.sh new file mode 100755 index 00000000..a61c1044 --- /dev/null +++ b/java-api-examples/run-non-streaming-decode-file-dolphin-ctc.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +set -ex + +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then + mkdir -p ../build + pushd ../build + cmake \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=ON \ + .. + + make -j4 + ls -lh lib + popd +fi + +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then + pushd ../sherpa-onnx/java-api + make + popd +fi + +if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 + tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 + rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 + ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 +fi + +java \ + -Djava.library.path=$PWD/../build/lib \ + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ + NonStreamingDecodeFileDolphinCtc.java diff --git a/java-api-examples/run-vad-non-streaming-dolphin-ctc.sh b/java-api-examples/run-vad-non-streaming-dolphin-ctc.sh new file mode 100755 index 00000000..d6f5a99d --- /dev/null +++ b/java-api-examples/run-vad-non-streaming-dolphin-ctc.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +set -ex + +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then + mkdir -p ../build + pushd ../build + cmake \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=ON \ + .. + + make -j4 + ls -lh lib + popd +fi + +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then + pushd ../sherpa-onnx/java-api + make + popd +fi + +if [ ! -f ./silero_vad.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx +fi + +if [ ! -f ./lei-jun-test.wav ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav +fi + +if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 + tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 + rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 + ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 +fi + +java \ + -Djava.library.path=$PWD/../build/lib \ + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ + ./VadNonStreamingDolphinCtc.java diff --git a/kotlin-api-examples/run.sh b/kotlin-api-examples/run.sh index 7f197714..173e51ea 100755 --- a/kotlin-api-examples/run.sh +++ b/kotlin-api-examples/run.sh @@ -190,6 +190,13 @@ function testSpokenLanguageIdentification() { } function testOfflineAsr() { + if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 + tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 + rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 + ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 + fi + if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 diff --git a/kotlin-api-examples/test_offline_asr.kt b/kotlin-api-examples/test_offline_asr.kt index 4baa8e91..a6e29148 100644 --- a/kotlin-api-examples/test_offline_asr.kt +++ b/kotlin-api-examples/test_offline_asr.kt @@ -1,7 +1,7 @@ package com.k2fsa.sherpa.onnx fun main() { - val types = arrayOf(0, 2, 5, 6, 15, 21, 24) + val types = arrayOf(0, 2, 5, 6, 15, 21, 24, 25) for (type in types) { test(type) } @@ -18,6 +18,7 @@ fun test(type: Int) { 15 -> "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav" 21 -> "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav" 24 -> "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav" + 25 -> "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav" else -> null } diff --git a/scripts/apk/generate-asr-2pass-apk-script.py b/scripts/apk/generate-asr-2pass-apk-script.py index 87cb0a0b..138da81e 100755 --- a/scripts/apk/generate-asr-2pass-apk-script.py +++ b/scripts/apk/generate-asr-2pass-apk-script.py @@ -157,6 +157,21 @@ def get_2nd_models(): ls -lh + popd + """, + ), + Model( + model_name="sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02", + idx=25, + lang="multi_lang", + short_name="dolphin_base_ctc", + cmd=""" + pushd $model_name + + rm -rfv test_wavs + + ls -lh + popd """, ), @@ -301,6 +316,48 @@ def get_1st_models(): ls -lh + popd + """, + ), + Model( + model_name="sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01", + idx=15, + lang="zh", + short_name="int8_small_zipformer", + rule_fsts="itn_zh_number.fst", + cmd=""" + if [ ! -f itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + fi + pushd $model_name + rm -f bpe.model + + rm -rf test_wavs + rm README.md + + ls -lh + + popd + """, + ), + Model( + model_name="sherpa-onnx-streaming-zipformer-small-ctc-zh-2025-04-01", + idx=16, + lang="zh", + short_name="small_zipformer", + rule_fsts="itn_zh_number.fst", + cmd=""" + if [ ! -f itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + fi + pushd $model_name + rm -f bpe.model + + rm -rf test_wavs + rm README.md + + ls -lh + popd """, ), @@ -313,19 +370,25 @@ def get_models(): first = get_1st_models() second = get_2nd_models() - combinations = [ - ( - "sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23", - "sherpa-onnx-paraformer-zh-2023-09-14", - ), - ( - "sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23", - "icefall-asr-zipformer-wenetspeech-20230615", - ), - ( - "sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23", - "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17", - ), + combinations = [] + + first_zh = [ + "sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23", + "sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01", + "sherpa-onnx-streaming-zipformer-small-ctc-zh-2025-04-01", + ] + + second_zh = [ + "sherpa-onnx-paraformer-zh-2023-09-14", + "icefall-asr-zipformer-wenetspeech-20230615", + "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17", + "sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02", + ] + for first_m in first_zh: + for second_m in second_zh: + combinations.append((first_m, second_m)) + + combinations += [ ( "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17", "sherpa-onnx-whisper-tiny.en", diff --git a/scripts/apk/generate-asr-apk-script.py b/scripts/apk/generate-asr-apk-script.py index bda643c0..e8db919a 100755 --- a/scripts/apk/generate-asr-apk-script.py +++ b/scripts/apk/generate-asr-apk-script.py @@ -260,6 +260,48 @@ def get_models(): ls -lh + popd + """, + ), + Model( + model_name="sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01", + idx=15, + lang="zh", + short_name="int8_small_zipformer", + rule_fsts="itn_zh_number.fst", + cmd=""" + if [ ! -f itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + fi + pushd $model_name + rm -f bpe.model + + rm -rf test_wavs + rm README.md + + ls -lh + + popd + """, + ), + Model( + model_name="sherpa-onnx-streaming-zipformer-small-ctc-zh-2025-04-01", + idx=16, + lang="zh", + short_name="small_zipformer", + rule_fsts="itn_zh_number.fst", + cmd=""" + if [ ! -f itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + fi + pushd $model_name + rm -f bpe.model + + rm -rf test_wavs + rm README.md + + ls -lh + popd """, ), diff --git a/scripts/apk/generate-vad-asr-apk-script.py b/scripts/apk/generate-vad-asr-apk-script.py index 27c773a3..e8e0a9d9 100755 --- a/scripts/apk/generate-vad-asr-apk-script.py +++ b/scripts/apk/generate-vad-asr-apk-script.py @@ -440,6 +440,22 @@ def get_models(): ls -lh + popd + """, + ), + Model( + model_name="sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02", + idx=25, + lang="multi_lang", + lang2="multi_lang", + short_name="multi_lang", + cmd=""" + pushd $model_name + + rm -rfv test_wavs + + ls -lh + popd """, ), diff --git a/sherpa-onnx/java-api/Makefile b/sherpa-onnx/java-api/Makefile index 249e3eb8..c55d8e0a 100644 --- a/sherpa-onnx/java-api/Makefile +++ b/sherpa-onnx/java-api/Makefile @@ -30,6 +30,7 @@ java_files += OfflineFireRedAsrModelConfig.java java_files += OfflineMoonshineModelConfig.java java_files += OfflineNemoEncDecCtcModelConfig.java java_files += OfflineSenseVoiceModelConfig.java +java_files += OfflineDolphinModelConfig.java java_files += OfflineModelConfig.java java_files += OfflineRecognizerConfig.java java_files += OfflineRecognizerResult.java diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineDolphinModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineDolphinModelConfig.java new file mode 100644 index 00000000..68e55b20 --- /dev/null +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineDolphinModelConfig.java @@ -0,0 +1,32 @@ +// Copyright 2025 Xiaomi Corporation + +package com.k2fsa.sherpa.onnx; + +public class OfflineDolphinModelConfig { + private final String model; + + private OfflineDolphinModelConfig(Builder builder) { + this.model = builder.model; + } + + public static Builder builder() { + return new Builder(); + } + + public String getModel() { + return model; + } + + public static class Builder { + private String model = ""; + + public OfflineDolphinModelConfig build() { + return new OfflineDolphinModelConfig(this); + } + + public Builder setModel(String model) { + this.model = model; + return this; + } + } +} \ No newline at end of file diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineModelConfig.java index 68c81744..34f105b6 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineModelConfig.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineModelConfig.java @@ -10,6 +10,7 @@ public class OfflineModelConfig { private final OfflineMoonshineModelConfig moonshine; private final OfflineNemoEncDecCtcModelConfig nemo; private final OfflineSenseVoiceModelConfig senseVoice; + private final OfflineDolphinModelConfig dolphin; private final String teleSpeech; private final String tokens; private final int numThreads; @@ -28,6 +29,7 @@ public class OfflineModelConfig { this.moonshine = builder.moonshine; this.nemo = builder.nemo; this.senseVoice = builder.senseVoice; + this.dolphin = builder.dolphin; this.teleSpeech = builder.teleSpeech; this.tokens = builder.tokens; this.numThreads = builder.numThreads; @@ -62,6 +64,10 @@ public class OfflineModelConfig { return senseVoice; } + public OfflineDolphinModelConfig getDolphin() { + return dolphin; + } + public String getTokens() { return tokens; } @@ -102,6 +108,7 @@ public class OfflineModelConfig { private OfflineMoonshineModelConfig moonshine = OfflineMoonshineModelConfig.builder().build(); private OfflineNemoEncDecCtcModelConfig nemo = OfflineNemoEncDecCtcModelConfig.builder().build(); private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build(); + private OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().build(); private String teleSpeech = ""; private String tokens = ""; private int numThreads = 1; @@ -120,6 +127,11 @@ public class OfflineModelConfig { return this; } + public Builder setDolphin(OfflineDolphinModelConfig dolphin) { + this.dolphin = dolphin; + return this; + } + public Builder setParaformer(OfflineParaformerModelConfig paraformer) { this.paraformer = paraformer; return this; diff --git a/sherpa-onnx/jni/offline-recognizer.cc b/sherpa-onnx/jni/offline-recognizer.cc index 1515bdb7..2eebe084 100644 --- a/sherpa-onnx/jni/offline-recognizer.cc +++ b/sherpa-onnx/jni/offline-recognizer.cc @@ -265,6 +265,19 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) { ans.model_config.nemo_ctc.model = p; env->ReleaseStringUTFChars(s, p); + // dolphin + fid = env->GetFieldID(model_config_cls, "dolphin", + "Lcom/k2fsa/sherpa/onnx/OfflineDolphinModelConfig;"); + jobject dolphin_config = env->GetObjectField(model_config, fid); + jclass dolphin_config_cls = env->GetObjectClass(dolphin_config); + + fid = env->GetFieldID(nemo_config_cls, "model", "Ljava/lang/String;"); + + s = (jstring)env->GetObjectField(dolphin_config, fid); + p = env->GetStringUTFChars(s, nullptr); + ans.model_config.dolphin.model = p; + env->ReleaseStringUTFChars(s, p); + fid = env->GetFieldID(model_config_cls, "teleSpeech", "Ljava/lang/String;"); s = (jstring)env->GetObjectField(model_config, fid); p = env->GetStringUTFChars(s, nullptr); diff --git a/sherpa-onnx/kotlin-api/OfflineRecognizer.kt b/sherpa-onnx/kotlin-api/OfflineRecognizer.kt index 4c584710..4786ae8e 100644 --- a/sherpa-onnx/kotlin-api/OfflineRecognizer.kt +++ b/sherpa-onnx/kotlin-api/OfflineRecognizer.kt @@ -25,6 +25,10 @@ data class OfflineNemoEncDecCtcModelConfig( var model: String = "", ) +data class OfflineDolphinModelConfig( + var model: String = "", +) + data class OfflineWhisperModelConfig( var encoder: String = "", var decoder: String = "", @@ -59,6 +63,7 @@ data class OfflineModelConfig( var moonshine: OfflineMoonshineModelConfig = OfflineMoonshineModelConfig(), var nemo: OfflineNemoEncDecCtcModelConfig = OfflineNemoEncDecCtcModelConfig(), var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(), + var dolphin: OfflineDolphinModelConfig = OfflineDolphinModelConfig(), var teleSpeech: String = "", var numThreads: Int = 1, var debug: Boolean = false, @@ -481,6 +486,16 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { tokens = "$modelDir/tokens.txt", ) } + + 25 -> { + val modelDir = "sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02" + return OfflineModelConfig( + dolphin = OfflineDolphinModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } } return null } diff --git a/sherpa-onnx/kotlin-api/OnlineRecognizer.kt b/sherpa-onnx/kotlin-api/OnlineRecognizer.kt index 15476f99..3aeecdca 100644 --- a/sherpa-onnx/kotlin-api/OnlineRecognizer.kt +++ b/sherpa-onnx/kotlin-api/OnlineRecognizer.kt @@ -374,6 +374,26 @@ fun getModelConfig(type: Int): OnlineModelConfig? { modelType = "zipformer", ) } + + 15 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01" + return OnlineModelConfig( + zipformer2Ctc = OnlineZipformer2CtcModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 16 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-small-ctc-zh-2025-04-01" + return OnlineModelConfig( + zipformer2Ctc = OnlineZipformer2CtcModelConfig( + model = "$modelDir/model.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } } return null }