Go API for speaker diarization (#1403)
This commit is contained in:
3
go-api-examples/non-streaming-speaker-diarization/go.mod
Normal file
3
go-api-examples/non-streaming-speaker-diarization/go.mod
Normal file
@@ -0,0 +1,3 @@
|
||||
module non-streaming-speaker-diarization
|
||||
|
||||
go 1.12
|
||||
87
go-api-examples/non-streaming-speaker-diarization/main.go
Normal file
87
go-api-examples/non-streaming-speaker-diarization/main.go
Normal file
@@ -0,0 +1,87 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
|
||||
"log"
|
||||
)
|
||||
|
||||
/*
|
||||
Usage:
|
||||
|
||||
Step 1: Download a speaker segmentation model
|
||||
|
||||
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
|
||||
for a list of available models. The following is an example
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
|
||||
Step 2: Download a speaker embedding extractor model
|
||||
|
||||
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
|
||||
for a list of available models. The following is an example
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
||||
|
||||
Step 3. Download test wave files
|
||||
|
||||
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
|
||||
for a list of available test wave files. The following is an example
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
|
||||
|
||||
Step 4. Run it
|
||||
*/
|
||||
|
||||
func initSpeakerDiarization() *sherpa.OfflineSpeakerDiarization {
|
||||
config := sherpa.OfflineSpeakerDiarizationConfig{}
|
||||
|
||||
config.Segmentation.Pyannote.Model = "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx"
|
||||
config.Embedding.Model = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"
|
||||
|
||||
// The test wave file contains 4 speakers, so we use 4 here
|
||||
config.Clustering.NumClusters = 4
|
||||
|
||||
// if you don't know the actual numbers in the wave file,
|
||||
// then please don't set NumClusters; you need to use
|
||||
//
|
||||
// config.Clustering.Threshold = 0.5
|
||||
//
|
||||
|
||||
// A larger Threshold leads to fewer clusters
|
||||
// A smaller Threshold leads to more clusters
|
||||
|
||||
sd := sherpa.NewOfflineSpeakerDiarization(&config)
|
||||
return sd
|
||||
}
|
||||
|
||||
func main() {
|
||||
wave_filename := "./0-four-speakers-zh.wav"
|
||||
wave := sherpa.ReadWave(wave_filename)
|
||||
if wave == nil {
|
||||
log.Printf("Failed to read %v", wave_filename)
|
||||
return
|
||||
}
|
||||
|
||||
sd := initSpeakerDiarization()
|
||||
if sd == nil {
|
||||
log.Printf("Please check your config")
|
||||
return
|
||||
}
|
||||
|
||||
defer sherpa.DeleteOfflineSpeakerDiarization(sd)
|
||||
|
||||
if wave.SampleRate != sd.SampleRate() {
|
||||
log.Printf("Expected sample rate: %v, given: %d\n", sd.SampleRate(), wave.SampleRate)
|
||||
return
|
||||
}
|
||||
|
||||
log.Println("Started")
|
||||
segments := sd.Process(wave.Samples)
|
||||
n := len(segments)
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
log.Printf("%.3f -- %.3f speaker_%02d\n", segments[i].Start, segments[i].End, segments[i].Speaker)
|
||||
}
|
||||
}
|
||||
20
go-api-examples/non-streaming-speaker-diarization/run.sh
Executable file
20
go-api-examples/non-streaming-speaker-diarization/run.sh
Executable file
@@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
|
||||
if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
fi
|
||||
|
||||
if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
||||
fi
|
||||
|
||||
if [ ! -f ./0-four-speakers-zh.wav ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
|
||||
fi
|
||||
|
||||
go mod tidy
|
||||
go build
|
||||
./non-streaming-speaker-diarization
|
||||
Reference in New Issue
Block a user