Add Android demo for speaker recognition (#536)
See pre-built Android APKs at https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk.html
This commit is contained in:
@@ -7,11 +7,67 @@ fun callback(samples: FloatArray): Unit {
|
||||
}
|
||||
|
||||
fun main() {
|
||||
testSpeakerRecognition()
|
||||
testTts()
|
||||
testAsr("transducer")
|
||||
testAsr("zipformer2-ctc")
|
||||
}
|
||||
|
||||
fun computeEmbedding(extractor: SpeakerEmbeddingExtractor, filename: String): FloatArray {
|
||||
var objArray = WaveReader.readWaveFromFile(
|
||||
filename = filename,
|
||||
)
|
||||
var samples: FloatArray = objArray[0] as FloatArray
|
||||
var sampleRate: Int = objArray[1] as Int
|
||||
|
||||
val stream = extractor.createStream()
|
||||
stream.acceptWaveform(sampleRate = sampleRate, samples=samples)
|
||||
stream.inputFinished()
|
||||
check(extractor.isReady(stream))
|
||||
|
||||
val embedding = extractor.compute(stream)
|
||||
|
||||
stream.release()
|
||||
|
||||
return embedding
|
||||
}
|
||||
|
||||
fun testSpeakerRecognition() {
|
||||
val config = SpeakerEmbeddingExtractorConfig(
|
||||
model="./3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx",
|
||||
)
|
||||
val extractor = SpeakerEmbeddingExtractor(config = config)
|
||||
|
||||
val embedding1a = computeEmbedding(extractor, "./speaker1_a_cn_16k.wav")
|
||||
val embedding2a = computeEmbedding(extractor, "./speaker2_a_cn_16k.wav")
|
||||
val embedding1b = computeEmbedding(extractor, "./speaker1_b_cn_16k.wav")
|
||||
|
||||
var manager = SpeakerEmbeddingManager(extractor.dim())
|
||||
var ok = manager.add(name = "speaker1", embedding=embedding1a)
|
||||
check(ok)
|
||||
|
||||
manager.add(name = "speaker2", embedding=embedding2a)
|
||||
check(ok)
|
||||
|
||||
var name = manager.search(embedding=embedding1b, threshold=0.5f)
|
||||
check(name == "speaker1")
|
||||
|
||||
manager.release()
|
||||
|
||||
manager = SpeakerEmbeddingManager(extractor.dim())
|
||||
val embeddingList = mutableListOf(embedding1a, embedding1b)
|
||||
ok = manager.add(name = "s1", embedding=embeddingList.toTypedArray())
|
||||
check(ok)
|
||||
|
||||
name = manager.search(embedding=embedding1b, threshold=0.5f)
|
||||
check(name == "s1")
|
||||
|
||||
name = manager.search(embedding=embedding2a, threshold=0.5f)
|
||||
check(name.length == 0)
|
||||
|
||||
manager.release()
|
||||
}
|
||||
|
||||
fun testTts() {
|
||||
// see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
|
||||
|
||||
Reference in New Issue
Block a user