C# API for speaker diarization (#1407)
This commit is contained in:
83
dotnet-examples/offline-speaker-diarization/Program.cs
Normal file
83
dotnet-examples/offline-speaker-diarization/Program.cs
Normal file
@@ -0,0 +1,83 @@
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
//
|
||||
|
||||
// This file shows how to use sherpa-onnx C# API for speaker diarization
|
||||
/*
|
||||
Usage:
|
||||
|
||||
Step 1: Download a speaker segmentation model
|
||||
|
||||
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
|
||||
for a list of available models. The following is an example
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
|
||||
Step 2: Download a speaker embedding extractor model
|
||||
|
||||
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
|
||||
for a list of available models. The following is an example
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
||||
|
||||
Step 3. Download test wave files
|
||||
|
||||
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
|
||||
for a list of available test wave files. The following is an example
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
|
||||
|
||||
Step 4. Run it
|
||||
|
||||
dotnet run
|
||||
*/
|
||||
|
||||
using SherpaOnnx;
|
||||
using System;
|
||||
|
||||
class OfflineSpeakerDiarizationDemo
|
||||
{
|
||||
static void Main(string[] args)
|
||||
{
|
||||
var config = new OfflineSpeakerDiarizationConfig();
|
||||
config.Segmentation.Pyannote.Model = "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx";
|
||||
config.Embedding.Model = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";
|
||||
|
||||
// the test wave ./0-four-speakers-zh.wav has 4 speakers, so
|
||||
// we set num_clusters to 4
|
||||
//
|
||||
config.Clustering.NumClusters = 4;
|
||||
// If you don't know the number of speakers in the test wave file, please
|
||||
// use
|
||||
// config.Clustering.Threshold = 0.5; // You need to tune this threshold
|
||||
var sd = new OfflineSpeakerDiarization(config);
|
||||
|
||||
var testWaveFile = "./0-four-speakers-zh.wav";
|
||||
WaveReader waveReader = new WaveReader(testWaveFile);
|
||||
if (sd.SampleRate != waveReader.SampleRate)
|
||||
{
|
||||
Console.WriteLine($"Expected sample rate: {sd.SampleRate}. Given: {waveReader.SampleRate}");
|
||||
return;
|
||||
}
|
||||
|
||||
Console.WriteLine("Started");
|
||||
|
||||
// var segments = sd.Process(waveReader.Samples); // this one is also ok
|
||||
|
||||
var MyProgressCallback = (int numProcessedChunks, int numTotalChunks, IntPtr arg) =>
|
||||
{
|
||||
float progress = 100.0F * numProcessedChunks / numTotalChunks;
|
||||
Console.WriteLine("Progress {0}%", String.Format("{0:0.00}", progress));
|
||||
return 0;
|
||||
};
|
||||
|
||||
var callback = new OfflineSpeakerDiarizationProgressCallback(MyProgressCallback);
|
||||
var segments = sd.ProcessWithCallback(waveReader.Samples, callback, IntPtr.Zero);
|
||||
|
||||
foreach (var s in segments)
|
||||
{
|
||||
Console.WriteLine("{0} -- {1} speaker_{2}", String.Format("{0:0.00}", s.Start), String.Format("{0:0.00}", s.End), s.Speaker);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net6.0</TargetFramework>
|
||||
<RootNamespace>offline_speaker_diarization</RootNamespace>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Common\Common.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
18
dotnet-examples/offline-speaker-diarization/run.sh
Executable file
18
dotnet-examples/offline-speaker-diarization/run.sh
Executable file
@@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
|
||||
if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
fi
|
||||
|
||||
if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
||||
fi
|
||||
|
||||
if [ ! -f ./0-four-speakers-zh.wav ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
|
||||
fi
|
||||
|
||||
dotnet run
|
||||
@@ -31,6 +31,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-micro
|
||||
EndProject
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TTS", "TTS\TTS.csproj", "{DACE4A18-4FC8-4437-92BF-5A90BA81286C}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-speaker-diarization", "offline-speaker-diarization\offline-speaker-diarization.csproj", "{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
@@ -93,6 +95,10 @@ Global
|
||||
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
||||
Reference in New Issue
Block a user