Add Speaker ID demo for C# (#862)
This commit is contained in:
@@ -17,6 +17,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "spoken-language-identificat
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "streaming-hlg-decoding", "streaming-hlg-decoding\streaming-hlg-decoding.csproj", "{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speaker-identification", "speaker-identification\speaker-identification.csproj", "{2B1B140E-A92F-426B-B0DF-5D916B67304F}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
@@ -54,5 +56,9 @@ Global
|
||||
{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
|
||||
155
dotnet-examples/speaker-identification/Program.cs
Normal file
155
dotnet-examples/speaker-identification/Program.cs
Normal file
@@ -0,0 +1,155 @@
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
//
|
||||
// This file shows how to do speaker identification with sherpa-onnx.
|
||||
//
|
||||
// 1. Download a model from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
|
||||
//
|
||||
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
||||
//
|
||||
// 2. Download test data from
|
||||
//
|
||||
// git clone https://github.com/csukuangfj/sr-data
|
||||
//
|
||||
// 3. Now run it
|
||||
//
|
||||
// dotnet run
|
||||
|
||||
using SherpaOnnx;
|
||||
using System.Collections.Generic;
|
||||
using System;
|
||||
|
||||
class SpeakerIdentificationDemo
|
||||
{
|
||||
public static float[] ComputeEmbedding(SpeakerEmbeddingExtractor extractor, String filename)
|
||||
{
|
||||
WaveReader reader = new WaveReader(filename);
|
||||
|
||||
OnlineStream stream = extractor.CreateStream();
|
||||
stream.AcceptWaveform(reader.SampleRate, reader.Samples);
|
||||
stream.InputFinished();
|
||||
|
||||
float[] embedding = extractor.Compute(stream);
|
||||
|
||||
return embedding;
|
||||
}
|
||||
|
||||
static void Main(string[] args)
|
||||
{
|
||||
var config = new SpeakerEmbeddingExtractorConfig();
|
||||
config.Model = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";
|
||||
config.Debug = 1;
|
||||
var extractor = new SpeakerEmbeddingExtractor(config);
|
||||
|
||||
var manager = new SpeakerEmbeddingManager(extractor.Dim);
|
||||
|
||||
string[] spk1Files =
|
||||
new string[] {
|
||||
"./sr-data/enroll/fangjun-sr-1.wav",
|
||||
"./sr-data/enroll/fangjun-sr-2.wav",
|
||||
"./sr-data/enroll/fangjun-sr-3.wav",
|
||||
};
|
||||
float[][] spk1Vec = new float[spk1Files.Length][];
|
||||
|
||||
for (int i = 0; i < spk1Files.Length; ++i)
|
||||
{
|
||||
spk1Vec[i] = ComputeEmbedding(extractor, spk1Files[i]);
|
||||
}
|
||||
|
||||
string[] spk2Files =
|
||||
new string[] {
|
||||
"./sr-data/enroll/leijun-sr-1.wav", "./sr-data/enroll/leijun-sr-2.wav",
|
||||
};
|
||||
|
||||
float[][] spk2Vec = new float[spk2Files.Length][];
|
||||
|
||||
for (int i = 0; i < spk2Files.Length; ++i)
|
||||
{
|
||||
spk2Vec[i] = ComputeEmbedding(extractor, spk2Files[i]);
|
||||
}
|
||||
|
||||
if (!manager.Add("fangjun", spk1Vec))
|
||||
{
|
||||
Console.WriteLine("Failed to register fangjun");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!manager.Add("leijun", spk2Vec))
|
||||
{
|
||||
Console.WriteLine("Failed to register leijun");
|
||||
return;
|
||||
}
|
||||
|
||||
if (manager.NumSpeakers != 2)
|
||||
{
|
||||
Console.WriteLine("There should be two speakers");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!manager.Contains("fangjun"))
|
||||
{
|
||||
Console.WriteLine("It should contain the speaker fangjun");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!manager.Contains("leijun"))
|
||||
{
|
||||
Console.WriteLine("It should contain the speaker leijun");
|
||||
return;
|
||||
}
|
||||
|
||||
Console.WriteLine("---All speakers---");
|
||||
|
||||
string[] allSpeakers = manager.GetAllSpeakers();
|
||||
foreach (var s in allSpeakers)
|
||||
{
|
||||
Console.WriteLine(s);
|
||||
}
|
||||
Console.WriteLine("------------");
|
||||
|
||||
string[] testFiles =
|
||||
new string[] {
|
||||
"./sr-data/test/fangjun-test-sr-1.wav",
|
||||
"./sr-data/test/leijun-test-sr-1.wav",
|
||||
"./sr-data/test/liudehua-test-sr-1.wav"
|
||||
};
|
||||
|
||||
float threshold = 0.6f;
|
||||
foreach (var file in testFiles)
|
||||
{
|
||||
float[] embedding = ComputeEmbedding(extractor, file);
|
||||
|
||||
String name = manager.Search(embedding, threshold);
|
||||
if (name == "")
|
||||
{
|
||||
name = "<Unknown>";
|
||||
}
|
||||
Console.WriteLine("{0}: {1}", file, name);
|
||||
}
|
||||
|
||||
// test verify
|
||||
if (!manager.Verify("fangjun", ComputeEmbedding(extractor, testFiles[0]), threshold))
|
||||
{
|
||||
Console.WriteLine("testFiles[0] should match fangjun!");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!manager.Remove("fangjun"))
|
||||
{
|
||||
Console.WriteLine("Failed to remove fangjun");
|
||||
return;
|
||||
}
|
||||
|
||||
if (manager.Verify("fangjun", ComputeEmbedding(extractor, testFiles[0]), threshold))
|
||||
{
|
||||
Console.WriteLine("{0} should match no one!", testFiles[0]);
|
||||
return;
|
||||
}
|
||||
|
||||
if (manager.NumSpeakers != 1)
|
||||
{
|
||||
Console.WriteLine("There should only 1 speaker left.");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
1
dotnet-examples/speaker-identification/WaveReader.cs
Symbolic link
1
dotnet-examples/speaker-identification/WaveReader.cs
Symbolic link
@@ -0,0 +1 @@
|
||||
../offline-decode-files/WaveReader.cs
|
||||
13
dotnet-examples/speaker-identification/run.sh
Executable file
13
dotnet-examples/speaker-identification/run.sh
Executable file
@@ -0,0 +1,13 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [ ! -e ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
||||
fi
|
||||
|
||||
if [ ! -d ./sr-data ]; then
|
||||
git clone https://github.com/csukuangfj/sr-data
|
||||
fi
|
||||
|
||||
dotnet run
|
||||
@@ -0,0 +1,15 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net6.0</TargetFramework>
|
||||
<RootNamespace>speaker_identification</RootNamespace>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
Reference in New Issue
Block a user