From e3280027f95cff53b21588d721404009695ffda2 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 25 Apr 2025 11:18:57 +0800 Subject: [PATCH] Support decoding multiple streams in Java API. (#2149) --- .github/workflows/run-java-test.yaml | 2 + ...NonStreamingDecodeFileWhisperMultiple.java | 59 +++++++++++++++++++ ...-streaming-decode-file-whisper-multiple.sh | 38 ++++++++++++ .../k2fsa/sherpa/onnx/OfflineRecognizer.java | 10 ++++ .../k2fsa/sherpa/onnx/OnlineRecognizer.java | 10 ++++ sherpa-onnx/jni/offline-recognizer.cc | 28 ++++++++- sherpa-onnx/jni/online-recognizer.cc | 16 +++++ 7 files changed, 160 insertions(+), 3 deletions(-) create mode 100644 java-api-examples/NonStreamingDecodeFileWhisperMultiple.java create mode 100755 java-api-examples/run-non-streaming-decode-file-whisper-multiple.sh diff --git a/.github/workflows/run-java-test.yaml b/.github/workflows/run-java-test.yaml index d0345709..b1ea186f 100644 --- a/.github/workflows/run-java-test.yaml +++ b/.github/workflows/run-java-test.yaml @@ -166,6 +166,8 @@ jobs: rm -rf sherpa-onnx-fire-red-* ./run-non-streaming-decode-file-whisper.sh + + ./run-non-streaming-decode-file-whisper-multiple.sh rm -rf sherpa-onnx-whisper-* ./run-non-streaming-decode-file-nemo.sh diff --git a/java-api-examples/NonStreamingDecodeFileWhisperMultiple.java b/java-api-examples/NonStreamingDecodeFileWhisperMultiple.java new file mode 100644 index 00000000..971d0f11 --- /dev/null +++ b/java-api-examples/NonStreamingDecodeFileWhisperMultiple.java @@ -0,0 +1,59 @@ +// Copyright 2025 Xiaomi Corporation + +// This file shows how to use an offline whisper, i.e., non-streaming whisper, +// to decode files. +import com.k2fsa.sherpa.onnx.*; + +public class NonStreamingDecodeFileWhisperMultiple { + public static void main(String[] args) { + // please refer to + // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html + // to download model files + String encoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx"; + String decoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx"; + String tokens = "./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt"; + + String waveFilename0 = "./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav"; + String waveFilename1 = "./sherpa-onnx-whisper-tiny.en/test_wavs/1.wav"; + + WaveReader reader0 = new WaveReader(waveFilename0); + WaveReader reader1 = new WaveReader(waveFilename1); + + OfflineWhisperModelConfig whisper = + OfflineWhisperModelConfig.builder().setEncoder(encoder).setDecoder(decoder).build(); + + OfflineModelConfig modelConfig = + OfflineModelConfig.builder() + .setWhisper(whisper) + .setTokens(tokens) + .setNumThreads(1) + .setDebug(true) + .build(); + + OfflineRecognizerConfig config = + OfflineRecognizerConfig.builder() + .setOfflineModelConfig(modelConfig) + .setDecodingMethod("greedy_search") + .build(); + + OfflineRecognizer recognizer = new OfflineRecognizer(config); + OfflineStream stream0 = recognizer.createStream(); + stream0.acceptWaveform(reader0.getSamples(), reader0.getSampleRate()); + + OfflineStream stream1 = recognizer.createStream(); + stream1.acceptWaveform(reader1.getSamples(), reader1.getSampleRate()); + + OfflineStream[] ss = new OfflineStream[] {stream0, stream1}; + recognizer.decode(ss); + + String text0 = recognizer.getResult(stream0).getText(); + String text1 = recognizer.getResult(stream1).getText(); + + System.out.printf("filename0:%s\nresult0:%s\n\n", waveFilename0, text0); + System.out.printf("filename1:%s\nresult1:%s\n\n", waveFilename1, text1); + + stream0.release(); + stream1.release(); + recognizer.release(); + } +} diff --git a/java-api-examples/run-non-streaming-decode-file-whisper-multiple.sh b/java-api-examples/run-non-streaming-decode-file-whisper-multiple.sh new file mode 100755 index 00000000..883cbdc7 --- /dev/null +++ b/java-api-examples/run-non-streaming-decode-file-whisper-multiple.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +set -ex + +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then + mkdir -p ../build + pushd ../build + cmake \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=ON \ + .. + + make -j4 + ls -lh lib + popd +fi + +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then + pushd ../sherpa-onnx/java-api + make + popd +fi + +if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 + + tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 + rm sherpa-onnx-whisper-tiny.en.tar.bz2 +fi + +java \ + -Djava.library.path=$PWD/../build/lib \ + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ + NonStreamingDecodeFileWhisperMultiple.java diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizer.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizer.java index 1133ed32..a89870ce 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizer.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizer.java @@ -17,6 +17,14 @@ public class OfflineRecognizer { decode(ptr, s.getPtr()); } + public void decode(OfflineStream[] ss) { + long[] streamPtrs = new long[ss.length]; + for (int i = 0; i < ss.length; ++i) { + streamPtrs[i] = ss[i].getPtr(); + } + decodeStreams(ptr, streamPtrs); + } + public OfflineStream createStream() { long p = createStream(ptr); return new OfflineStream(p); @@ -55,5 +63,7 @@ public class OfflineRecognizer { private native void decode(long ptr, long streamPtr); + private native void decodeStreams(long ptr, long[] streamPtrs); + private native Object[] getResult(long streamPtr); } diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineRecognizer.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineRecognizer.java index c98d15bb..206d46ba 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineRecognizer.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineRecognizer.java @@ -18,6 +18,14 @@ public class OnlineRecognizer { decode(ptr, s.getPtr()); } + public void decode(OnlineStream[] ss) { + long[] streamPtrs = new long[ss.length]; + for (int i = 0; i < ss.length; ++i) { + streamPtrs[i] = ss[i].getPtr(); + } + decodeStreams(ptr, streamPtrs); + } + public boolean isReady(OnlineStream s) { return isReady(ptr, s.getPtr()); } @@ -68,6 +76,8 @@ public class OnlineRecognizer { private native void decode(long ptr, long streamPtr); + private native void decodeStreams(long ptr, long[] streamPtrs); + private native boolean isEndpoint(long ptr, long streamPtr); private native boolean isReady(long ptr, long streamPtr); diff --git a/sherpa-onnx/jni/offline-recognizer.cc b/sherpa-onnx/jni/offline-recognizer.cc index b63d6351..15650366 100644 --- a/sherpa-onnx/jni/offline-recognizer.cc +++ b/sherpa-onnx/jni/offline-recognizer.cc @@ -366,21 +366,43 @@ Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_createStream(JNIEnv * /*env*/, SHERPA_ONNX_EXTERN_C JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_decode( - JNIEnv *env, jobject /*obj*/, jlong ptr, jlong streamPtr) { + JNIEnv *env, jobject /*obj*/, jlong ptr, jlong stream_ptr) { SafeJNI(env, "OfflineRecognizer_decode", [&] { if (!ValidatePointer(env, ptr, "OfflineRecognizer_decode", "OfflineRecognizer pointer is null.") || - !ValidatePointer(env, streamPtr, "OfflineRecognizer_decode", + !ValidatePointer(env, stream_ptr, "OfflineRecognizer_decode", "OfflineStream pointer is null.")) { return; } auto recognizer = reinterpret_cast(ptr); - auto stream = reinterpret_cast(streamPtr); + auto stream = reinterpret_cast(stream_ptr); recognizer->DecodeStream(stream); }); } +SHERPA_ONNX_EXTERN_C +JNIEXPORT void JNICALL +Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_decodeStreams( + JNIEnv *env, jobject /*obj*/, jlong ptr, jlongArray stream_ptrs) { + SafeJNI(env, "OfflineRecognizer_decode_streams", [&] { + if (!ValidatePointer(env, ptr, "OfflineRecognizer_decode_streams", + "OfflineRecognizer pointer is null.")) { + return; + } + + auto recognizer = reinterpret_cast(ptr); + + jlong *p = env->GetLongArrayElements(stream_ptrs, nullptr); + jsize n = env->GetArrayLength(stream_ptrs); + + auto ss = reinterpret_cast(p); + recognizer->DecodeStreams(ss, n); + + env->ReleaseLongArrayElements(stream_ptrs, p, JNI_ABORT); + }); +} + SHERPA_ONNX_EXTERN_C JNIEXPORT jobjectArray JNICALL Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_getResult(JNIEnv *env, diff --git a/sherpa-onnx/jni/online-recognizer.cc b/sherpa-onnx/jni/online-recognizer.cc index dbe205c4..ca0684a6 100644 --- a/sherpa-onnx/jni/online-recognizer.cc +++ b/sherpa-onnx/jni/online-recognizer.cc @@ -339,6 +339,22 @@ JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_decode( recognizer->DecodeStream(stream); } +SHERPA_ONNX_EXTERN_C +JNIEXPORT void JNICALL +Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_decodeStreams( + JNIEnv *env, jobject /*obj*/, jlong ptr, jlongArray stream_ptrs) { + auto recognizer = reinterpret_cast(ptr); + + jlong *p = env->GetLongArrayElements(stream_ptrs, nullptr); + jsize n = env->GetArrayLength(stream_ptrs); + + auto ss = reinterpret_cast(p); + + recognizer->DecodeStreams(ss, n); + + env->ReleaseLongArrayElements(stream_ptrs, p, JNI_ABORT); +} + SHERPA_ONNX_EXTERN_C JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_createStream(JNIEnv *env,