Add Java and Kotlin API for NeMo Canary models (#2359)

Add support for the NeMo Canary model in both Java and Kotlin APIs, wiring it through
JNI and updating examples and CI.

- Introduce OfflineCanaryModelConfig in Kotlin and Java with builder patterns
- Extend OfflineRecognizer to accept and apply the new canary config via setConfig
- Update JNI binding (GetOfflineConfig) and getOfflineModelConfig mapping (type 32), 
   plus examples and CI workflows
This commit is contained in:
Fangjun Kuang
2025-07-08 13:45:26 +08:00
committed by GitHub
parent df4615ca1d
commit 103e93d9f6
12 changed files with 363 additions and 11 deletions

View File

@@ -455,8 +455,31 @@ function testOfflineSenseVoiceWithHr() {
ls -lh $out_filename
java -Djava.library.path=../build/lib -jar $out_filename
}
testVersion
function testOfflineNeMoCanary() {
if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
fi
out_filename=test_offline_nemo_canary.jar
kotlinc-jvm -include-runtime -d $out_filename \
test_offline_nemo_canary.kt \
FeatureConfig.kt \
HomophoneReplacerConfig.kt \
OfflineRecognizer.kt \
OfflineStream.kt \
WaveReader.kt \
faked-asset-manager.kt
ls -lh $out_filename
java -Djava.library.path=../build/lib -jar $out_filename
}
# testVersion
testOfflineNeMoCanary
testOfflineSenseVoiceWithHr
testOfflineSpeechDenoiser
testOfflineSpeakerDiarization

View File

@@ -0,0 +1,48 @@
package com.k2fsa.sherpa.onnx
fun main() {
val recognizer = createOfflineRecognizer()
val waveFilename = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav"
val objArray = WaveReader.readWaveFromFile(
filename = waveFilename,
)
val samples: FloatArray = objArray[0] as FloatArray
val sampleRate: Int = objArray[1] as Int
var stream = recognizer.createStream()
stream.acceptWaveform(samples, sampleRate=sampleRate)
recognizer.decode(stream)
var result = recognizer.getResult(stream)
println("English: $result")
stream.release()
// now output text in German
val config = recognizer.config.copy(modelConfig=recognizer.config.modelConfig.copy(
canary=recognizer.config.modelConfig.canary.copy(
tgtLang="de"
)
))
recognizer.setConfig(config)
stream = recognizer.createStream()
stream.acceptWaveform(samples, sampleRate=sampleRate)
recognizer.decode(stream)
result = recognizer.getResult(stream)
println("German: $result")
stream.release()
recognizer.release()
}
fun createOfflineRecognizer(): OfflineRecognizer {
val config = OfflineRecognizerConfig(
modelConfig = getOfflineModelConfig(type = 32)!!,
)
return OfflineRecognizer(config = config)
}