diff --git a/.github/workflows/apk-vad-asr.yaml b/.github/workflows/apk-vad-asr.yaml index 4c587f1b..fe706aa1 100644 --- a/.github/workflows/apk-vad-asr.yaml +++ b/.github/workflows/apk-vad-asr.yaml @@ -23,8 +23,8 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - total: ["5"] - index: ["0", "1", "2", "3", "4"] + total: ["10"] + index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] steps: - uses: actions/checkout@v4 @@ -165,6 +165,7 @@ jobs: git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface cd huggingface + du -h -d1 . git fetch git pull git merge -m "merge remote" --ff origin main diff --git a/.github/workflows/run-java-test.yaml b/.github/workflows/run-java-test.yaml index 5759ea5d..cca94422 100644 --- a/.github/workflows/run-java-test.yaml +++ b/.github/workflows/run-java-test.yaml @@ -107,6 +107,31 @@ jobs: make -j4 ls -lh lib + - name: Run java test (Non-Streaming ASR) + shell: bash + run: | + cd ./java-api-examples + + ./run-non-streaming-decode-file-moonshine.sh + rm -rf sherpa-onnx-moonshine-* + + ./run-non-streaming-decode-file-sense-voice.sh + rm -rf sherpa-onnx-sense-voice-* + + ./run-inverse-text-normalization-paraformer.sh + + ./run-non-streaming-decode-file-paraformer.sh + rm -rf sherpa-onnx-paraformer-zh-* + + ./run-non-streaming-decode-file-transducer.sh + rm -rf sherpa-onnx-zipformer-* + + ./run-non-streaming-decode-file-whisper.sh + rm -rf sherpa-onnx-whisper-* + + ./run-non-streaming-decode-file-nemo.sh + rm -rf sherpa-onnx-nemo-* + - name: Run java test (speaker diarization) shell: bash run: | @@ -206,28 +231,6 @@ jobs: ./run-streaming-decode-file-transducer.sh rm -rf sherpa-onnx-streaming-* - - name: Run java test (Non-Streaming ASR) - shell: bash - run: | - cd ./java-api-examples - - ./run-non-streaming-decode-file-sense-voice.sh - rm -rf sherpa-onnx-sense-voice-* - - ./run-inverse-text-normalization-paraformer.sh - - ./run-non-streaming-decode-file-paraformer.sh - rm -rf sherpa-onnx-paraformer-zh-* - - ./run-non-streaming-decode-file-transducer.sh - rm -rf sherpa-onnx-zipformer-* - - ./run-non-streaming-decode-file-whisper.sh - rm -rf sherpa-onnx-whisper-* - - ./run-non-streaming-decode-file-nemo.sh - rm -rf sherpa-onnx-nemo-* - - name: Run java test (Non-Streaming TTS) shell: bash run: | diff --git a/.gitignore b/.gitignore index 7e6708be..18178f5f 100644 --- a/.gitignore +++ b/.gitignore @@ -121,3 +121,5 @@ sherpa-onnx-online-punct-en-2024-08-06 *.mp4 *.mp3 sherpa-onnx-pyannote-segmentation-3-0 +sherpa-onnx-moonshine-tiny-en-int8 +sherpa-onnx-moonshine-base-en-int8 diff --git a/java-api-examples/NonStreamingDecodeFileMoonshine.java b/java-api-examples/NonStreamingDecodeFileMoonshine.java new file mode 100644 index 00000000..232ceb69 --- /dev/null +++ b/java-api-examples/NonStreamingDecodeFileMoonshine.java @@ -0,0 +1,60 @@ +// Copyright 2024 Xiaomi Corporation + +// This file shows how to use an offline Moonshine, +// i.e., non-streaming Moonshine model, +// to decode files. +import com.k2fsa.sherpa.onnx.*; + +public class NonStreamingDecodeFileMoonshine { + public static void main(String[] args) { + // please refer to + // https://k2-fsa.github.io/sherpa/onnx/moonshine/index.html + // to download model files + + String preprocessor = "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx"; + String encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx"; + String uncachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx"; + String cachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx"; + + String tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt"; + + String waveFilename = "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav"; + + WaveReader reader = new WaveReader(waveFilename); + + OfflineMoonshineModelConfig moonshine = + OfflineMoonshineModelConfig.builder() + .setPreprocessor(preprocessor) + .setEncoder(encoder) + .setUncachedDecoder(uncachedDecoder) + .setCachedDecoder(cachedDecoder) + .build(); + + OfflineModelConfig modelConfig = + OfflineModelConfig.builder() + .setMoonshine(moonshine) + .setTokens(tokens) + .setNumThreads(1) + .setDebug(true) + .build(); + + OfflineRecognizerConfig config = + OfflineRecognizerConfig.builder() + .setOfflineModelConfig(modelConfig) + .setDecodingMethod("greedy_search") + .build(); + + OfflineRecognizer recognizer = new OfflineRecognizer(config); + OfflineStream stream = recognizer.createStream(); + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate()); + + recognizer.decode(stream); + + String text = recognizer.getResult(stream).getText(); + + System.out.printf("filename:%s\nresult:%s\n", waveFilename, text); + + stream.release(); + recognizer.release(); + } +} diff --git a/java-api-examples/VadFromMicWithNonStreamingMoonshine.java b/java-api-examples/VadFromMicWithNonStreamingMoonshine.java new file mode 100644 index 00000000..bfedf3fb --- /dev/null +++ b/java-api-examples/VadFromMicWithNonStreamingMoonshine.java @@ -0,0 +1,152 @@ +// Copyright 2024 Xiaomi Corporation + +// This file shows how to use a silero_vad model with a non-streaming +// Moonshine tiny for speech recognition. + +import com.k2fsa.sherpa.onnx.*; +import javax.sound.sampled.*; + +public class VadFromMicNonStreamingMoonshine { + private static final int sampleRate = 16000; + private static final int windowSize = 512; + + public static Vad createVad() { + // please download ./silero_vad.onnx from + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models + String model = "./silero_vad.onnx"; + SileroVadModelConfig sileroVad = + SileroVadModelConfig.builder() + .setModel(model) + .setThreshold(0.5f) + .setMinSilenceDuration(0.25f) + .setMinSpeechDuration(0.5f) + .setWindowSize(windowSize) + .build(); + + VadModelConfig config = + VadModelConfig.builder() + .setSileroVadModelConfig(sileroVad) + .setSampleRate(sampleRate) + .setNumThreads(1) + .setDebug(true) + .setProvider("cpu") + .build(); + + return new Vad(config); + } + + public static OfflineRecognizer createOfflineRecognizer() { + // please refer to + // https://k2-fsa.github.io/sherpa/onnx/moonshine/index.html + // to download model files + + String preprocessor = "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx"; + String encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx"; + String uncachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx"; + String cachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx"; + + String tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt"; + + OfflineMoonshineModelConfig moonshine = + OfflineMoonshineModelConfig.builder() + .setPreprocessor(preprocessor) + .setEncoder(encoder) + .setUncachedDecoder(uncachedDecoder) + .setCachedDecoder(cachedDecoder) + .build(); + + OfflineModelConfig modelConfig = + OfflineModelConfig.builder() + .setMoonshine(moonshine) + .setTokens(tokens) + .setNumThreads(1) + .setDebug(true) + .build(); + + OfflineRecognizerConfig config = + OfflineRecognizerConfig.builder() + .setOfflineModelConfig(modelConfig) + .setDecodingMethod("greedy_search") + .build(); + + return new OfflineRecognizer(config); + } + + public static void main(String[] args) { + Vad vad = createVad(); + OfflineRecognizer recognizer = createOfflineRecognizer(); + + // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/AudioFormat.html + // Linear PCM, 16000Hz, 16-bit, 1 channel, signed, little endian + AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false); + + // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/DataLine.Info.html#Info-java.lang.Class-javax.sound.sampled.AudioFormat-int- + DataLine.Info info = new DataLine.Info(TargetDataLine.class, format); + TargetDataLine targetDataLine; + try { + targetDataLine = (TargetDataLine) AudioSystem.getLine(info); + targetDataLine.open(format); + targetDataLine.start(); + } catch (LineUnavailableException e) { + System.out.println("Failed to open target data line: " + e.getMessage()); + vad.release(); + recognizer.release(); + return; + } + + boolean printed = false; + byte[] buffer = new byte[windowSize * 2]; + float[] samples = new float[windowSize]; + + System.out.println("Started. Please speak"); + boolean running = true; + while (targetDataLine.isOpen() && running) { + int n = targetDataLine.read(buffer, 0, buffer.length); + if (n <= 0) { + System.out.printf("Got %d bytes. Expected %d bytes.\n", n, buffer.length); + continue; + } + for (int i = 0; i != windowSize; ++i) { + short low = buffer[2 * i]; + short high = buffer[2 * i + 1]; + int s = (high << 8) + low; + samples[i] = (float) s / 32768; + } + + vad.acceptWaveform(samples); + if (vad.isSpeechDetected() && !printed) { + System.out.println("Detected speech"); + printed = true; + } + + if (!vad.isSpeechDetected()) { + printed = false; + } + + while (!vad.empty()) { + SpeechSegment segment = vad.front(); + float startTime = segment.getStart() / (float) sampleRate; + float duration = segment.getSamples().length / (float) sampleRate; + + OfflineStream stream = recognizer.createStream(); + stream.acceptWaveform(segment.getSamples(), sampleRate); + recognizer.decode(stream); + String text = recognizer.getResult(stream).getText(); + stream.release(); + + if (!text.isEmpty()) { + System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text); + } + + if (text.contains("exit the program")) { + running = false; + } + + vad.pop(); + } + } + + vad.release(); + recognizer.release(); + } +} diff --git a/java-api-examples/run-non-streaming-decode-file-moonshine.sh b/java-api-examples/run-non-streaming-decode-file-moonshine.sh new file mode 100755 index 00000000..b0a09db4 --- /dev/null +++ b/java-api-examples/run-non-streaming-decode-file-moonshine.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +set -ex + +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then + mkdir -p ../build + pushd ../build + cmake \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=ON \ + .. + + make -j4 + ls -lh lib + popd +fi + +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then + pushd ../sherpa-onnx/java-api + make + popd +fi + +if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 + tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 + rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 +fi + +java \ + -Djava.library.path=$PWD/../build/lib \ + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ + NonStreamingDecodeFileMoonshine.java diff --git a/java-api-examples/run-vad-from-mic-non-streaming-moonshine.sh b/java-api-examples/run-vad-from-mic-non-streaming-moonshine.sh new file mode 100755 index 00000000..78147e7f --- /dev/null +++ b/java-api-examples/run-vad-from-mic-non-streaming-moonshine.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +set -ex + +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then + mkdir -p ../build + pushd ../build + cmake \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=ON \ + .. + + make -j4 + ls -lh lib + popd +fi + +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then + pushd ../sherpa-onnx/java-api + make + popd +fi + +if [ ! -f ./silero_vad.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx +fi + +if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 + tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 + rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 +fi + +java \ + -Djava.library.path=$PWD/../build/lib \ + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ + ./VadFromMicWithNonStreamingMoonshine.java diff --git a/kotlin-api-examples/run.sh b/kotlin-api-examples/run.sh index a2adab41..3b3d1593 100755 --- a/kotlin-api-examples/run.sh +++ b/kotlin-api-examples/run.sh @@ -168,6 +168,12 @@ function testSpokenLanguageIdentification() { } function testOfflineAsr() { + if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 + tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 + rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 + fi + if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 diff --git a/kotlin-api-examples/test_offline_asr.kt b/kotlin-api-examples/test_offline_asr.kt index d0940656..6c2c10a6 100644 --- a/kotlin-api-examples/test_offline_asr.kt +++ b/kotlin-api-examples/test_offline_asr.kt @@ -1,7 +1,7 @@ package com.k2fsa.sherpa.onnx fun main() { - val types = arrayOf(0, 2, 5, 6, 15) + val types = arrayOf(0, 2, 5, 6, 15, 21) for (type in types) { test(type) } @@ -16,6 +16,7 @@ fun test(type: Int) { 5 -> "./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/test_wavs/1.wav" 6 -> "./sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/8k.wav" 15 -> "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav" + 21 -> "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav" else -> null } diff --git a/scripts/apk/generate-vad-asr-apk-script.py b/scripts/apk/generate-vad-asr-apk-script.py index 7671e975..5c3694d6 100755 --- a/scripts/apk/generate-vad-asr-apk-script.py +++ b/scripts/apk/generate-vad-asr-apk-script.py @@ -369,6 +369,21 @@ def get_models(): ls -lh + popd + """, + ), + Model( + model_name="sherpa-onnx-moonshine-tiny-en-int8", + idx=21, + lang="en", + short_name="moonshine_tiny_int8", + cmd=""" + pushd $model_name + + rm -rfv test_wavs + + ls -lh + popd """, ), diff --git a/sherpa-onnx/java-api/Makefile b/sherpa-onnx/java-api/Makefile index 6e4778ae..8b9278fc 100644 --- a/sherpa-onnx/java-api/Makefile +++ b/sherpa-onnx/java-api/Makefile @@ -26,6 +26,7 @@ java_files += OnlineRecognizer.java java_files += OfflineTransducerModelConfig.java java_files += OfflineParaformerModelConfig.java java_files += OfflineWhisperModelConfig.java +java_files += OfflineMoonshineModelConfig.java java_files += OfflineNemoEncDecCtcModelConfig.java java_files += OfflineSenseVoiceModelConfig.java java_files += OfflineModelConfig.java diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineModelConfig.java index 9e23a3a7..4d0192b6 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineModelConfig.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineModelConfig.java @@ -6,6 +6,7 @@ public class OfflineModelConfig { private final OfflineTransducerModelConfig transducer; private final OfflineParaformerModelConfig paraformer; private final OfflineWhisperModelConfig whisper; + private final OfflineMoonshineModelConfig moonshine; private final OfflineNemoEncDecCtcModelConfig nemo; private final OfflineSenseVoiceModelConfig senseVoice; private final String teleSpeech; @@ -22,6 +23,7 @@ public class OfflineModelConfig { this.transducer = builder.transducer; this.paraformer = builder.paraformer; this.whisper = builder.whisper; + this.moonshine = builder.moonshine; this.nemo = builder.nemo; this.senseVoice = builder.senseVoice; this.teleSpeech = builder.teleSpeech; @@ -50,6 +52,10 @@ public class OfflineModelConfig { return whisper; } + public OfflineMoonshineModelConfig getMoonshine() { + return moonshine; + } + public OfflineSenseVoiceModelConfig getSenseVoice() { return senseVoice; } @@ -90,6 +96,7 @@ public class OfflineModelConfig { private OfflineParaformerModelConfig paraformer = OfflineParaformerModelConfig.builder().build(); private OfflineTransducerModelConfig transducer = OfflineTransducerModelConfig.builder().build(); private OfflineWhisperModelConfig whisper = OfflineWhisperModelConfig.builder().build(); + private OfflineMoonshineModelConfig moonshine = OfflineMoonshineModelConfig.builder().build(); private OfflineNemoEncDecCtcModelConfig nemo = OfflineNemoEncDecCtcModelConfig.builder().build(); private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build(); private String teleSpeech = ""; @@ -135,6 +142,11 @@ public class OfflineModelConfig { return this; } + public Builder setMoonshine(OfflineMoonshineModelConfig moonshine) { + this.moonshine = moonshine; + return this; + } + public Builder setTokens(String tokens) { this.tokens = tokens; return this; diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineMoonshineModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineMoonshineModelConfig.java new file mode 100644 index 00000000..1a324bba --- /dev/null +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineMoonshineModelConfig.java @@ -0,0 +1,70 @@ +// Copyright 2024 Xiaomi Corporation + +package com.k2fsa.sherpa.onnx; + +public class OfflineMoonshineModelConfig { + private final String preprocessor; + private final String encoder; + private final String uncachedDecoder; + private final String cachedDecoder; + + private OfflineMoonshineModelConfig(Builder builder) { + this.preprocessor = builder.preprocessor; + this.encoder = builder.encoder; + this.uncachedDecoder = builder.uncachedDecoder; + this.cachedDecoder = builder.cachedDecoder; + } + + public static Builder builder() { + return new Builder(); + } + + public String getPreprocessor() { + return preprocessor; + } + + public String getEncoder() { + return encoder; + } + + public String getUncachedDecoder() { + return uncachedDecoder; + } + + public String getCachedDecoder() { + return cachedDecoder; + } + + public static class Builder { + private String preprocessor = ""; + private String encoder = ""; + private String uncachedDecoder = ""; + private String cachedDecoder = ""; + + public OfflineMoonshineModelConfig build() { + return new OfflineMoonshineModelConfig(this); + } + + public Builder setPreprocessor(String preprocessor) { + this.preprocessor = preprocessor; + return this; + } + + public Builder setEncoder(String encoder) { + this.encoder = encoder; + return this; + } + + public Builder setUncachedDecoder(String uncachedDecoder) { + this.uncachedDecoder = uncachedDecoder; + return this; + } + + public Builder setCachedDecoder(String cachedDecoder) { + this.cachedDecoder = cachedDecoder; + return this; + } + } + + +} diff --git a/sherpa-onnx/jni/offline-recognizer.cc b/sherpa-onnx/jni/offline-recognizer.cc index 5e4b359b..7df79f34 100644 --- a/sherpa-onnx/jni/offline-recognizer.cc +++ b/sherpa-onnx/jni/offline-recognizer.cc @@ -174,6 +174,39 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) { ans.model_config.whisper.tail_paddings = env->GetIntField(whisper_config, fid); + // moonshine + fid = env->GetFieldID(model_config_cls, "moonshine", + "Lcom/k2fsa/sherpa/onnx/OfflineMoonshineModelConfig;"); + jobject moonshine_config = env->GetObjectField(model_config, fid); + jclass moonshine_config_cls = env->GetObjectClass(moonshine_config); + + fid = env->GetFieldID(moonshine_config_cls, "preprocessor", + "Ljava/lang/String;"); + s = (jstring)env->GetObjectField(moonshine_config, fid); + p = env->GetStringUTFChars(s, nullptr); + ans.model_config.moonshine.preprocessor = p; + env->ReleaseStringUTFChars(s, p); + + fid = env->GetFieldID(moonshine_config_cls, "encoder", "Ljava/lang/String;"); + s = (jstring)env->GetObjectField(moonshine_config, fid); + p = env->GetStringUTFChars(s, nullptr); + ans.model_config.moonshine.encoder = p; + env->ReleaseStringUTFChars(s, p); + + fid = env->GetFieldID(moonshine_config_cls, "uncachedDecoder", + "Ljava/lang/String;"); + s = (jstring)env->GetObjectField(moonshine_config, fid); + p = env->GetStringUTFChars(s, nullptr); + ans.model_config.moonshine.uncached_decoder = p; + env->ReleaseStringUTFChars(s, p); + + fid = env->GetFieldID(moonshine_config_cls, "cachedDecoder", + "Ljava/lang/String;"); + s = (jstring)env->GetObjectField(moonshine_config, fid); + p = env->GetStringUTFChars(s, nullptr); + ans.model_config.moonshine.cached_decoder = p; + env->ReleaseStringUTFChars(s, p); + // sense voice fid = env->GetFieldID(model_config_cls, "senseVoice", "Lcom/k2fsa/sherpa/onnx/OfflineSenseVoiceModelConfig;"); diff --git a/sherpa-onnx/kotlin-api/OfflineRecognizer.kt b/sherpa-onnx/kotlin-api/OfflineRecognizer.kt index b8243635..76dc8214 100644 --- a/sherpa-onnx/kotlin-api/OfflineRecognizer.kt +++ b/sherpa-onnx/kotlin-api/OfflineRecognizer.kt @@ -33,6 +33,13 @@ data class OfflineWhisperModelConfig( var tailPaddings: Int = 1000, // Padding added at the end of the samples ) +data class OfflineMoonshineModelConfig( + var preprocessor: String = "", + var encoder: String = "", + var uncachedDecoder: String = "", + var cachedDecoder: String = "", +) + data class OfflineSenseVoiceModelConfig( var model: String = "", var language: String = "", @@ -43,6 +50,7 @@ data class OfflineModelConfig( var transducer: OfflineTransducerModelConfig = OfflineTransducerModelConfig(), var paraformer: OfflineParaformerModelConfig = OfflineParaformerModelConfig(), var whisper: OfflineWhisperModelConfig = OfflineWhisperModelConfig(), + var moonshine: OfflineMoonshineModelConfig = OfflineMoonshineModelConfig(), var nemo: OfflineNemoEncDecCtcModelConfig = OfflineNemoEncDecCtcModelConfig(), var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(), var teleSpeech: String = "", @@ -417,6 +425,19 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { modelType = "nemo_transducer", ) } + + 21 -> { + val modelDir = "sherpa-onnx-moonshine-tiny-en-int8" + return OfflineModelConfig( + moonshine = OfflineMoonshineModelConfig( + preprocessor = "$modelDir/preprocess.onnx", + encoder = "$modelDir/encode.int8.onnx", + uncachedDecoder = "$modelDir/uncached_decode.int8.onnx", + cachedDecoder = "$modelDir/cached_decode.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } } return null }