Add JNI support for spoken language identification (#782)

This commit is contained in:
Fangjun Kuang
2024-04-17 19:27:15 +08:00
committed by GitHub
parent 69440e481f
commit 3a43049ba1
8 changed files with 189 additions and 42 deletions

View File

@@ -7,6 +7,7 @@ fun callback(samples: FloatArray): Unit {
}
fun main() {
testSpokenLanguageIdentifcation()
testAudioTagging()
testSpeakerRecognition()
testTts()
@@ -14,6 +15,41 @@ fun main() {
testAsr("zipformer2-ctc")
}
fun testSpokenLanguageIdentifcation() {
val config = SpokenLanguageIdentificationConfig(
whisper = SpokenLanguageIdentificationWhisperConfig(
encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx",
decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx",
tailPaddings = 33,
),
numThreads=1,
debug=true,
provider="cpu",
)
val slid = SpokenLanguageIdentification(assetManager=null, config=config)
val testFiles = arrayOf(
"./spoken-language-identification-test-wavs/ar-arabic.wav",
"./spoken-language-identification-test-wavs/bg-bulgarian.wav",
"./spoken-language-identification-test-wavs/de-german.wav",
)
for (waveFilename in testFiles) {
val objArray = WaveReader.readWaveFromFile(
filename = waveFilename,
)
val samples: FloatArray = objArray[0] as FloatArray
val sampleRate: Int = objArray[1] as Int
val stream = slid.createStream()
stream.acceptWaveform(samples, sampleRate = sampleRate)
val lang = slid.compute(stream)
stream.release()
println(waveFilename)
println(lang)
}
}
fun testAudioTagging() {
val config = AudioTaggingConfig(
model=AudioTaggingModelConfig(