Add JNI support for spoken language identification (#782)

2024-04-17 19:27:15 +08:00
parent 69440e481f
commit 3a43049ba1
8 changed files with 189 additions and 42 deletions
--- a/kotlin-api-examples/Main.kt
+++ b/kotlin-api-examples/Main.kt
@@ -7,6 +7,7 @@ fun callback(samples: FloatArray): Unit {
 }

 fun main() {
+  testSpokenLanguageIdentifcation()
  testAudioTagging()
  testSpeakerRecognition()
  testTts()
@@ -14,6 +15,41 @@ fun main() {
  testAsr("zipformer2-ctc")
 }

+fun testSpokenLanguageIdentifcation() {
+  val config = SpokenLanguageIdentificationConfig(
+    whisper = SpokenLanguageIdentificationWhisperConfig(
+      encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx",
+      decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx",
+      tailPaddings = 33,
+    ),
+    numThreads=1,
+    debug=true,
+    provider="cpu",
+  )
+  val slid = SpokenLanguageIdentification(assetManager=null, config=config)
+
+  val testFiles = arrayOf(
+    "./spoken-language-identification-test-wavs/ar-arabic.wav",
+    "./spoken-language-identification-test-wavs/bg-bulgarian.wav",
+    "./spoken-language-identification-test-wavs/de-german.wav",
+  )
+
+  for (waveFilename in testFiles) {
+    val objArray = WaveReader.readWaveFromFile(
+        filename = waveFilename,
+    )
+    val samples: FloatArray = objArray[0] as FloatArray
+    val sampleRate: Int = objArray[1] as Int
+
+    val stream = slid.createStream()
+    stream.acceptWaveform(samples, sampleRate = sampleRate)
+    val lang = slid.compute(stream)
+    stream.release()
+    println(waveFilename)
+    println(lang)
+  }
+}
+
 fun testAudioTagging() {
  val config = AudioTaggingConfig(
      model=AudioTaggingModelConfig(