diff --git a/.github/workflows/run-java-test.yaml b/.github/workflows/run-java-test.yaml index ed5901e7..e3cf6f49 100644 --- a/.github/workflows/run-java-test.yaml +++ b/.github/workflows/run-java-test.yaml @@ -124,6 +124,9 @@ jobs: ./run-non-streaming-decode-file-transducer.sh rm -rf sherpa-onnx-zipformer-* + ./run-non-streaming-decode-file-fire-red-asr.sh + rm -rf sherpa-onnx-fire-red-* + ./run-non-streaming-decode-file-whisper.sh rm -rf sherpa-onnx-whisper-* diff --git a/java-api-examples/NonStreamingDecodeFileFireRedAsr.java b/java-api-examples/NonStreamingDecodeFileFireRedAsr.java new file mode 100644 index 00000000..694b1e28 --- /dev/null +++ b/java-api-examples/NonStreamingDecodeFileFireRedAsr.java @@ -0,0 +1,50 @@ +// Copyright 2025 Xiaomi Corporation + +// This file shows how to use an offline FireRedAsr AED model +// to decode files. +import com.k2fsa.sherpa.onnx.*; + +public class NonStreamingDecodeFileFireRedAsr { + public static void main(String[] args) { + // please refer to + // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/FireRedAsr/index.html + // to download model files + String encoder = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx"; + String decoder = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx"; + String tokens = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt"; + + String waveFilename = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav"; + + WaveReader reader = new WaveReader(waveFilename); + + OfflineFireRedAsrModelConfig fireRedAsr = + OfflineFireRedAsrModelConfig.builder().setEncoder(encoder).setDecoder(decoder).build(); + + OfflineModelConfig modelConfig = + OfflineModelConfig.builder() + .setFireRedAsr(fireRedAsr) + .setTokens(tokens) + .setNumThreads(2) + .setDebug(true) + .build(); + + OfflineRecognizerConfig config = + OfflineRecognizerConfig.builder() + .setOfflineModelConfig(modelConfig) + .setDecodingMethod("greedy_search") + .build(); + + OfflineRecognizer recognizer = new OfflineRecognizer(config); + OfflineStream stream = recognizer.createStream(); + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate()); + + recognizer.decode(stream); + + String text = recognizer.getResult(stream).getText(); + + System.out.printf("filename:%s\nresult:%s\n", waveFilename, text); + + stream.release(); + recognizer.release(); + } +} diff --git a/java-api-examples/run-non-streaming-decode-file-fire-red-asr.sh b/java-api-examples/run-non-streaming-decode-file-fire-red-asr.sh new file mode 100755 index 00000000..5ea5a7cc --- /dev/null +++ b/java-api-examples/run-non-streaming-decode-file-fire-red-asr.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +set -ex + +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then + mkdir -p ../build + pushd ../build + cmake \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=ON \ + .. + + make -j4 + ls -lh lib + popd +fi + +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then + pushd ../sherpa-onnx/java-api + make + popd +fi + +if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 + tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 + rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 + ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16 +fi + +java \ + -Djava.library.path=$PWD/../build/lib \ + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ + NonStreamingDecodeFileFireRedAsr.java diff --git a/kotlin-api-examples/run.sh b/kotlin-api-examples/run.sh index c815b6ad..6d1c2e40 100755 --- a/kotlin-api-examples/run.sh +++ b/kotlin-api-examples/run.sh @@ -190,6 +190,13 @@ function testSpokenLanguageIdentification() { } function testOfflineAsr() { + if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 + tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 + rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 + ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16 + fi + if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 diff --git a/kotlin-api-examples/test_offline_asr.kt b/kotlin-api-examples/test_offline_asr.kt index 6c2c10a6..4baa8e91 100644 --- a/kotlin-api-examples/test_offline_asr.kt +++ b/kotlin-api-examples/test_offline_asr.kt @@ -1,7 +1,7 @@ package com.k2fsa.sherpa.onnx fun main() { - val types = arrayOf(0, 2, 5, 6, 15, 21) + val types = arrayOf(0, 2, 5, 6, 15, 21, 24) for (type in types) { test(type) } @@ -17,6 +17,7 @@ fun test(type: Int) { 6 -> "./sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/8k.wav" 15 -> "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav" 21 -> "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav" + 24 -> "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav" else -> null } diff --git a/sherpa-onnx/java-api/Makefile b/sherpa-onnx/java-api/Makefile index 0721daf1..1990a37e 100644 --- a/sherpa-onnx/java-api/Makefile +++ b/sherpa-onnx/java-api/Makefile @@ -26,6 +26,7 @@ java_files += OnlineRecognizer.java java_files += OfflineTransducerModelConfig.java java_files += OfflineParaformerModelConfig.java java_files += OfflineWhisperModelConfig.java +java_files += OfflineFireRedAsrModelConfig.java java_files += OfflineMoonshineModelConfig.java java_files += OfflineNemoEncDecCtcModelConfig.java java_files += OfflineSenseVoiceModelConfig.java diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineFireRedAsrModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineFireRedAsrModelConfig.java new file mode 100644 index 00000000..4d48296d --- /dev/null +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineFireRedAsrModelConfig.java @@ -0,0 +1,42 @@ +package com.k2fsa.sherpa.onnx; + +public class OfflineFireRedAsrModelConfig { + private final String encoder; + private final String decoder; + + private OfflineFireRedAsrModelConfig(Builder builder) { + this.encoder = builder.encoder; + this.decoder = builder.decoder; + } + + public static Builder builder() { + return new Builder(); + } + + public String getEncoder() { + return encoder; + } + + public String getDecoder() { + return decoder; + } + + public static class Builder { + private String encoder = ""; + private String decoder = ""; + + public OfflineFireRedAsrModelConfig build() { + return new OfflineFireRedAsrModelConfig(this); + } + + public Builder setEncoder(String encoder) { + this.encoder = encoder; + return this; + } + + public Builder setDecoder(String decoder) { + this.decoder = decoder; + return this; + } + } +} diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineModelConfig.java index 4d0192b6..68c81744 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineModelConfig.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineModelConfig.java @@ -6,6 +6,7 @@ public class OfflineModelConfig { private final OfflineTransducerModelConfig transducer; private final OfflineParaformerModelConfig paraformer; private final OfflineWhisperModelConfig whisper; + private final OfflineFireRedAsrModelConfig fireRedAsr; private final OfflineMoonshineModelConfig moonshine; private final OfflineNemoEncDecCtcModelConfig nemo; private final OfflineSenseVoiceModelConfig senseVoice; @@ -23,6 +24,7 @@ public class OfflineModelConfig { this.transducer = builder.transducer; this.paraformer = builder.paraformer; this.whisper = builder.whisper; + this.fireRedAsr = builder.fireRedAsr; this.moonshine = builder.moonshine; this.nemo = builder.nemo; this.senseVoice = builder.senseVoice; @@ -96,6 +98,7 @@ public class OfflineModelConfig { private OfflineParaformerModelConfig paraformer = OfflineParaformerModelConfig.builder().build(); private OfflineTransducerModelConfig transducer = OfflineTransducerModelConfig.builder().build(); private OfflineWhisperModelConfig whisper = OfflineWhisperModelConfig.builder().build(); + private OfflineFireRedAsrModelConfig fireRedAsr = OfflineFireRedAsrModelConfig.builder().build(); private OfflineMoonshineModelConfig moonshine = OfflineMoonshineModelConfig.builder().build(); private OfflineNemoEncDecCtcModelConfig nemo = OfflineNemoEncDecCtcModelConfig.builder().build(); private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build(); @@ -137,6 +140,11 @@ public class OfflineModelConfig { return this; } + public Builder setFireRedAsr(OfflineFireRedAsrModelConfig fireRedAsr) { + this.fireRedAsr = fireRedAsr; + return this; + } + public Builder setSenseVoice(OfflineSenseVoiceModelConfig senseVoice) { this.senseVoice = senseVoice; return this; diff --git a/sherpa-onnx/jni/offline-recognizer.cc b/sherpa-onnx/jni/offline-recognizer.cc index 7df79f34..e274add2 100644 --- a/sherpa-onnx/jni/offline-recognizer.cc +++ b/sherpa-onnx/jni/offline-recognizer.cc @@ -174,6 +174,26 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) { ans.model_config.whisper.tail_paddings = env->GetIntField(whisper_config, fid); + // FireRedAsr + fid = env->GetFieldID(model_config_cls, "fireRedAsr", + "Lcom/k2fsa/sherpa/onnx/OfflineFireRedAsrModelConfig;"); + jobject fire_red_asr_config = env->GetObjectField(model_config, fid); + jclass fire_red_asr_config_cls = env->GetObjectClass(fire_red_asr_config); + + fid = + env->GetFieldID(fire_red_asr_config_cls, "encoder", "Ljava/lang/String;"); + s = (jstring)env->GetObjectField(fire_red_asr_config, fid); + p = env->GetStringUTFChars(s, nullptr); + ans.model_config.fire_red_asr.encoder = p; + env->ReleaseStringUTFChars(s, p); + + fid = + env->GetFieldID(fire_red_asr_config_cls, "decoder", "Ljava/lang/String;"); + s = (jstring)env->GetObjectField(fire_red_asr_config, fid); + p = env->GetStringUTFChars(s, nullptr); + ans.model_config.fire_red_asr.decoder = p; + env->ReleaseStringUTFChars(s, p); + // moonshine fid = env->GetFieldID(model_config_cls, "moonshine", "Lcom/k2fsa/sherpa/onnx/OfflineMoonshineModelConfig;"); diff --git a/sherpa-onnx/kotlin-api/OfflineRecognizer.kt b/sherpa-onnx/kotlin-api/OfflineRecognizer.kt index 3f329323..4c584710 100644 --- a/sherpa-onnx/kotlin-api/OfflineRecognizer.kt +++ b/sherpa-onnx/kotlin-api/OfflineRecognizer.kt @@ -33,6 +33,11 @@ data class OfflineWhisperModelConfig( var tailPaddings: Int = 1000, // Padding added at the end of the samples ) +data class OfflineFireRedAsrModelConfig( + var encoder: String = "", + var decoder: String = "", +) + data class OfflineMoonshineModelConfig( var preprocessor: String = "", var encoder: String = "", @@ -50,6 +55,7 @@ data class OfflineModelConfig( var transducer: OfflineTransducerModelConfig = OfflineTransducerModelConfig(), var paraformer: OfflineParaformerModelConfig = OfflineParaformerModelConfig(), var whisper: OfflineWhisperModelConfig = OfflineWhisperModelConfig(), + var fireRedAsr: OfflineFireRedAsrModelConfig = OfflineFireRedAsrModelConfig(), var moonshine: OfflineMoonshineModelConfig = OfflineMoonshineModelConfig(), var nemo: OfflineNemoEncDecCtcModelConfig = OfflineNemoEncDecCtcModelConfig(), var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(), @@ -464,6 +470,17 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { modelType = "transducer", ) } + + 24 -> { + val modelDir = "sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16" + return OfflineModelConfig( + fireRedAsr = OfflineFireRedAsrModelConfig( + encoder = "$modelDir/encoder.int8.onnx", + decoder = "$modelDir/decoder.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } } return null }