This repository has been archived on 2025-08-26. You can view files and clone it, but cannot push or open issues or pull requests.
Files
enginex_bi_series-sherpa-onnx/dotnet-examples/speaker-identification/Program.cs
2024-05-11 13:27:33 +08:00

156 lines
4.0 KiB
C#

// Copyright (c) 2024 Xiaomi Corporation
//
// This file shows how to do speaker identification with sherpa-onnx.
//
// 1. Download a model from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
//
// 2. Download test data from
//
// git clone https://github.com/csukuangfj/sr-data
//
// 3. Now run it
//
// dotnet run
using SherpaOnnx;
using System.Collections.Generic;
using System;
class SpeakerIdentificationDemo
{
public static float[] ComputeEmbedding(SpeakerEmbeddingExtractor extractor, String filename)
{
WaveReader reader = new WaveReader(filename);
OnlineStream stream = extractor.CreateStream();
stream.AcceptWaveform(reader.SampleRate, reader.Samples);
stream.InputFinished();
float[] embedding = extractor.Compute(stream);
return embedding;
}
static void Main(string[] args)
{
var config = new SpeakerEmbeddingExtractorConfig();
config.Model = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";
config.Debug = 1;
var extractor = new SpeakerEmbeddingExtractor(config);
var manager = new SpeakerEmbeddingManager(extractor.Dim);
string[] spk1Files =
new string[] {
"./sr-data/enroll/fangjun-sr-1.wav",
"./sr-data/enroll/fangjun-sr-2.wav",
"./sr-data/enroll/fangjun-sr-3.wav",
};
float[][] spk1Vec = new float[spk1Files.Length][];
for (int i = 0; i < spk1Files.Length; ++i)
{
spk1Vec[i] = ComputeEmbedding(extractor, spk1Files[i]);
}
string[] spk2Files =
new string[] {
"./sr-data/enroll/leijun-sr-1.wav", "./sr-data/enroll/leijun-sr-2.wav",
};
float[][] spk2Vec = new float[spk2Files.Length][];
for (int i = 0; i < spk2Files.Length; ++i)
{
spk2Vec[i] = ComputeEmbedding(extractor, spk2Files[i]);
}
if (!manager.Add("fangjun", spk1Vec))
{
Console.WriteLine("Failed to register fangjun");
return;
}
if (!manager.Add("leijun", spk2Vec))
{
Console.WriteLine("Failed to register leijun");
return;
}
if (manager.NumSpeakers != 2)
{
Console.WriteLine("There should be two speakers");
return;
}
if (!manager.Contains("fangjun"))
{
Console.WriteLine("It should contain the speaker fangjun");
return;
}
if (!manager.Contains("leijun"))
{
Console.WriteLine("It should contain the speaker leijun");
return;
}
Console.WriteLine("---All speakers---");
string[] allSpeakers = manager.GetAllSpeakers();
foreach (var s in allSpeakers)
{
Console.WriteLine(s);
}
Console.WriteLine("------------");
string[] testFiles =
new string[] {
"./sr-data/test/fangjun-test-sr-1.wav",
"./sr-data/test/leijun-test-sr-1.wav",
"./sr-data/test/liudehua-test-sr-1.wav"
};
float threshold = 0.6f;
foreach (var file in testFiles)
{
float[] embedding = ComputeEmbedding(extractor, file);
String name = manager.Search(embedding, threshold);
if (name == "")
{
name = "<Unknown>";
}
Console.WriteLine("{0}: {1}", file, name);
}
// test verify
if (!manager.Verify("fangjun", ComputeEmbedding(extractor, testFiles[0]), threshold))
{
Console.WriteLine("testFiles[0] should match fangjun!");
return;
}
if (!manager.Remove("fangjun"))
{
Console.WriteLine("Failed to remove fangjun");
return;
}
if (manager.Verify("fangjun", ComputeEmbedding(extractor, testFiles[0]), threshold))
{
Console.WriteLine("{0} should match no one!", testFiles[0]);
return;
}
if (manager.NumSpeakers != 1)
{
Console.WriteLine("There should only 1 speaker left.");
return;
}
}
}