Kotlin API for speaker diarization (#1415)
This commit is contained in:
1
kotlin-api-examples/OfflineSpeakerDiarization.kt
Symbolic link
1
kotlin-api-examples/OfflineSpeakerDiarization.kt
Symbolic link
@@ -0,0 +1 @@
|
||||
../sherpa-onnx/kotlin-api/OfflineSpeakerDiarization.kt
|
||||
@@ -285,6 +285,37 @@ function testPunctuation() {
|
||||
java -Djava.library.path=../build/lib -jar $out_filename
|
||||
}
|
||||
|
||||
function testOfflineSpeakerDiarization() {
|
||||
if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
fi
|
||||
|
||||
if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
||||
fi
|
||||
|
||||
if [ ! -f ./0-four-speakers-zh.wav ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
|
||||
fi
|
||||
|
||||
out_filename=test_offline_speaker_diarization.jar
|
||||
kotlinc-jvm -include-runtime -d $out_filename \
|
||||
test_offline_speaker_diarization.kt \
|
||||
OfflineSpeakerDiarization.kt \
|
||||
Speaker.kt \
|
||||
OnlineStream.kt \
|
||||
WaveReader.kt \
|
||||
faked-asset-manager.kt \
|
||||
faked-log.kt
|
||||
|
||||
ls -lh $out_filename
|
||||
|
||||
java -Djava.library.path=../build/lib -jar $out_filename
|
||||
}
|
||||
|
||||
testOfflineSpeakerDiarization
|
||||
testSpeakerEmbeddingExtractor
|
||||
testOnlineAsr
|
||||
testTts
|
||||
|
||||
53
kotlin-api-examples/test_offline_speaker_diarization.kt
Normal file
53
kotlin-api-examples/test_offline_speaker_diarization.kt
Normal file
@@ -0,0 +1,53 @@
|
||||
package com.k2fsa.sherpa.onnx
|
||||
|
||||
fun main() {
|
||||
testOfflineSpeakerDiarization()
|
||||
}
|
||||
|
||||
fun callback(numProcessedChunks: Int, numTotalChunks: Int, arg: Long): Int {
|
||||
val progress = numProcessedChunks.toFloat() / numTotalChunks * 100
|
||||
val s = "%.2f".format(progress)
|
||||
println("Progress: ${s}%");
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
fun testOfflineSpeakerDiarization() {
|
||||
var config = OfflineSpeakerDiarizationConfig(
|
||||
segmentation=OfflineSpeakerSegmentationModelConfig(
|
||||
pyannote=OfflineSpeakerSegmentationPyannoteModelConfig("./sherpa-onnx-pyannote-segmentation-3-0/model.onnx"),
|
||||
),
|
||||
embedding=SpeakerEmbeddingExtractorConfig(
|
||||
model="./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx",
|
||||
),
|
||||
|
||||
// The test wave file ./0-four-speakers-zh.wav contains four speakers, so
|
||||
// we use numClusters=4 here. If you don't know the number of speakers
|
||||
// in the test wave file, please set the threshold like below.
|
||||
//
|
||||
// clustering=FastClusteringConfig(threshold=0.5),
|
||||
//
|
||||
// WARNING: You need to tune threshold by yourself.
|
||||
// A larger threshold leads to fewer clusters, i.e., few speakers.
|
||||
// A smaller threshold leads to more clusters, i.e., more speakers.
|
||||
//
|
||||
clustering=FastClusteringConfig(numClusters=4),
|
||||
)
|
||||
|
||||
val sd = OfflineSpeakerDiarization(config=config)
|
||||
|
||||
val waveData = WaveReader.readWave(
|
||||
filename = "./0-four-speakers-zh.wav",
|
||||
)
|
||||
|
||||
if (sd.sampleRate() != waveData.sampleRate) {
|
||||
println("Expected sample rate: ${sd.sampleRate()}, given: ${waveData.sampleRate}")
|
||||
return
|
||||
}
|
||||
|
||||
// val segments = sd.process(waveData.samples) // this one is also ok
|
||||
val segments = sd.processWithCallback(waveData.samples, callback=::callback)
|
||||
for (segment in segments) {
|
||||
println("${segment.start} -- ${segment.end} speaker_${segment.speaker}")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user