Add Kotlin and Java API for Dolphin CTC models (#2086)

This commit is contained in:
Fangjun Kuang
2025-04-02 21:16:14 +08:00
committed by GitHub
parent 0de7e1b9f0
commit eee5575836
20 changed files with 517 additions and 18 deletions

View File

@@ -0,0 +1,49 @@
// Copyright 2025 Xiaomi Corporation
// This file shows how to use an offline Dolphin CTC model, i.e.,
// non-streaming Dolphin CTC model, to decode files.
import com.k2fsa.sherpa.onnx.*;
public class NonStreamingDecodeFileDolphinCtc {
public static void main(String[] args) {
// please refer to
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
// to download model files
String model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx";
String tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt";
String waveFilename =
"./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav";
WaveReader reader = new WaveReader(waveFilename);
OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().setModel(model).build();
OfflineModelConfig modelConfig =
OfflineModelConfig.builder()
.setDolphin(dolphin)
.setTokens(tokens)
.setNumThreads(1)
.setDebug(true)
.build();
OfflineRecognizerConfig config =
OfflineRecognizerConfig.builder()
.setOfflineModelConfig(modelConfig)
.setDecodingMethod("greedy_search")
.build();
OfflineRecognizer recognizer = new OfflineRecognizer(config);
OfflineStream stream = recognizer.createStream();
stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
recognizer.decode(stream);
String text = recognizer.getResult(stream).getText();
System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);
stream.release();
recognizer.release();
}
}

View File

@@ -23,6 +23,7 @@ This directory contains examples for the JAVA API of sherpa-onnx.
## Non-Streaming Speech recognition
```bash
./run-non-streaming-decode-file-dolphin-ctc.sh
./run-non-streaming-decode-file-paraformer.sh
./run-non-streaming-decode-file-sense-voice.sh
./run-non-streaming-decode-file-transducer.sh
@@ -102,6 +103,12 @@ The punctuation model supports both English and Chinese.
./run-vad-remove-slience.sh
```
## VAD + Non-streaming Dolphin CTC for speech recognition
```bash
./run-vad-non-streaming-dolphin-ctc.sh
```
## VAD + Non-streaming SenseVoice for speech recognition
```bash

View File

@@ -0,0 +1,123 @@
// Copyright 2025 Xiaomi Corporation
// This file shows how to use a silero_vad model with a non-streaming Dolphin
// CTC model for speech recognition.
import com.k2fsa.sherpa.onnx.*;
import java.util.Arrays;
public class VadNonStreamingSenseVoice {
public static Vad createVad() {
// please download ./silero_vad.onnx from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
String model = "./silero_vad.onnx";
SileroVadModelConfig sileroVad =
SileroVadModelConfig.builder()
.setModel(model)
.setThreshold(0.5f)
.setMinSilenceDuration(0.25f)
.setMinSpeechDuration(0.5f)
.setWindowSize(512)
.setMaxSpeechDuration(5.0f)
.build();
VadModelConfig config =
VadModelConfig.builder()
.setSileroVadModelConfig(sileroVad)
.setSampleRate(16000)
.setNumThreads(1)
.setDebug(true)
.setProvider("cpu")
.build();
return new Vad(config);
}
public static OfflineRecognizer createOfflineRecognizer() {
// please refer to
// https://k2-fsa.github.io/sherpa/onnx/dolphin/index.html
// to download model files
String model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx";
String tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt";
OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().setModel(model).build();
OfflineModelConfig modelConfig =
OfflineModelConfig.builder()
.setDolphin(dolphin)
.setTokens(tokens)
.setNumThreads(1)
.setDebug(true)
.build();
OfflineRecognizerConfig config =
OfflineRecognizerConfig.builder()
.setOfflineModelConfig(modelConfig)
.setDecodingMethod("greedy_search")
.build();
return new OfflineRecognizer(config);
}
public static void main(String[] args) {
Vad vad = createVad();
OfflineRecognizer recognizer = createOfflineRecognizer();
// You can download the test file from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
String testWaveFilename = "./lei-jun-test.wav";
WaveReader reader = new WaveReader(testWaveFilename);
int numSamples = reader.getSamples().length;
int numIter = numSamples / 512;
for (int i = 0; i != numIter; ++i) {
int start = i * 512;
int end = start + 512;
float[] samples = Arrays.copyOfRange(reader.getSamples(), start, end);
vad.acceptWaveform(samples);
if (vad.isSpeechDetected()) {
while (!vad.empty()) {
SpeechSegment segment = vad.front();
float startTime = segment.getStart() / 16000.0f;
float duration = segment.getSamples().length / 16000.0f;
OfflineStream stream = recognizer.createStream();
stream.acceptWaveform(segment.getSamples(), 16000);
recognizer.decode(stream);
String text = recognizer.getResult(stream).getText();
stream.release();
if (!text.isEmpty()) {
System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
}
vad.pop();
}
}
}
vad.flush();
while (!vad.empty()) {
SpeechSegment segment = vad.front();
float startTime = segment.getStart() / 16000.0f;
float duration = segment.getSamples().length / 16000.0f;
OfflineStream stream = recognizer.createStream();
stream.acceptWaveform(segment.getSamples(), 16000);
recognizer.decode(stream);
String text = recognizer.getResult(stream).getText();
stream.release();
if (!text.isEmpty()) {
System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
}
vad.pop();
}
vad.release();
recognizer.release();
}
}

View File

@@ -0,0 +1,38 @@
#!/usr/bin/env bash
set -ex
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
mkdir -p ../build
pushd ../build
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..
make -j4
ls -lh lib
popd
fi
if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
pushd ../sherpa-onnx/java-api
make
popd
fi
if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
fi
java \
-Djava.library.path=$PWD/../build/lib \
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
NonStreamingDecodeFileDolphinCtc.java

View File

@@ -0,0 +1,46 @@
#!/usr/bin/env bash
set -ex
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
mkdir -p ../build
pushd ../build
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..
make -j4
ls -lh lib
popd
fi
if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
pushd ../sherpa-onnx/java-api
make
popd
fi
if [ ! -f ./silero_vad.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi
if [ ! -f ./lei-jun-test.wav ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi
if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
fi
java \
-Djava.library.path=$PWD/../build/lib \
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
./VadNonStreamingDolphinCtc.java