C# API for speaker diarization (#1407)
This commit is contained in:
8
.github/scripts/test-dot-net.sh
vendored
8
.github/scripts/test-dot-net.sh
vendored
@@ -2,7 +2,13 @@
|
|||||||
|
|
||||||
cd dotnet-examples/
|
cd dotnet-examples/
|
||||||
|
|
||||||
cd ./offline-decode-files
|
cd ./offline-speaker-diarization
|
||||||
|
./run.sh
|
||||||
|
rm -rfv *.onnx
|
||||||
|
rm -fv *.wav
|
||||||
|
rm -rfv sherpa-onnx-pyannote-*
|
||||||
|
|
||||||
|
cd ../offline-decode-files
|
||||||
./run-sense-voice-ctc.sh
|
./run-sense-voice-ctc.sh
|
||||||
rm -rf sherpa-onnx-*
|
rm -rf sherpa-onnx-*
|
||||||
|
|
||||||
|
|||||||
71
.github/workflows/test-dot-net.yaml
vendored
71
.github/workflows/test-dot-net.yaml
vendored
@@ -47,53 +47,10 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
- name: Free space
|
|
||||||
if: matrix.os == 'ubuntu-latest'
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
df -h
|
|
||||||
rm -rf /opt/hostedtoolcache
|
|
||||||
df -h
|
|
||||||
|
|
||||||
- name: Free more space
|
|
||||||
if: matrix.os == 'ubuntu-latest'
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
# https://github.com/orgs/community/discussions/25678
|
|
||||||
cd /opt
|
|
||||||
find . -maxdepth 1 -mindepth 1 '!' -path ./containerd '!' -path ./actionarchivecache '!' -path ./runner '!' -path ./runner-cache -exec rm -rf '{}' ';'
|
|
||||||
|
|
||||||
sudo rm -rf /usr/share/dotnet
|
|
||||||
sudo rm -rf "/usr/local/share/boost"
|
|
||||||
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
|
|
||||||
|
|
||||||
- name: Free Disk Space (Ubuntu)
|
|
||||||
if: matrix.os == 'ubuntu-latest'
|
|
||||||
uses: jlumbroso/free-disk-space@main
|
|
||||||
with:
|
|
||||||
# this might remove tools that are actually needed,
|
|
||||||
# if set to "true" but frees about 6 GB
|
|
||||||
tool-cache: false
|
|
||||||
|
|
||||||
# all of these default to true, but feel free to set to
|
|
||||||
# "false" if necessary for your workflow
|
|
||||||
android: true
|
|
||||||
dotnet: false
|
|
||||||
haskell: true
|
|
||||||
large-packages: true
|
|
||||||
docker-images: false
|
|
||||||
swap-storage: true
|
|
||||||
|
|
||||||
- name: Check space
|
|
||||||
if: matrix.os == 'ubuntu-latest'
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
df -h
|
|
||||||
|
|
||||||
- name: ccache
|
- name: ccache
|
||||||
uses: hendrikmuhs/ccache-action@v1.2
|
uses: hendrikmuhs/ccache-action@v1.2
|
||||||
with:
|
with:
|
||||||
key: ${{ matrix.os }}-release-shared
|
key: ${{ matrix.os }}-dotnet-release-shared
|
||||||
|
|
||||||
- name: Build sherpa-onnx
|
- name: Build sherpa-onnx
|
||||||
shell: bash
|
shell: bash
|
||||||
@@ -110,11 +67,16 @@ jobs:
|
|||||||
-DCMAKE_BUILD_TYPE=Release \
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
-DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
|
-DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
|
||||||
-DBUILD_ESPEAK_NG_EXE=OFF \
|
-DBUILD_ESPEAK_NG_EXE=OFF \
|
||||||
-DSHERPA_ONNX_ENABLE_BINARY=ON \
|
-DSHERPA_ONNX_ENABLE_BINARY=OFF \
|
||||||
..
|
..
|
||||||
|
|
||||||
cmake --build . --target install --config Release
|
cmake --build . --target install --config Release
|
||||||
|
|
||||||
|
rm -rf install/share
|
||||||
|
rm -rf install/lib/pkg*
|
||||||
|
|
||||||
|
ls -lh ./install/lib
|
||||||
|
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: ${{ matrix.os }}
|
name: ${{ matrix.os }}
|
||||||
@@ -148,7 +110,7 @@ jobs:
|
|||||||
uses: actions/download-artifact@v4
|
uses: actions/download-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: ubuntu-latest
|
name: ubuntu-latest
|
||||||
path: /tmp/linux
|
path: /tmp/linux-x64
|
||||||
|
|
||||||
- name: Setup .NET
|
- name: Setup .NET
|
||||||
uses: actions/setup-dotnet@v4
|
uses: actions/setup-dotnet@v4
|
||||||
@@ -162,17 +124,21 @@ jobs:
|
|||||||
- name: Display files
|
- name: Display files
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
echo "----------/tmp/----------"
|
echo "----------/tmp----------"
|
||||||
ls -lh /tmp/
|
ls -lh /tmp
|
||||||
|
|
||||||
echo "----------/tmp/linux----------"
|
echo "----------/tmp/linux-x64----------"
|
||||||
ls -lh /tmp/linux
|
ls -lh /tmp/linux-x64
|
||||||
|
df -h
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
cd scripts/dotnet
|
cd scripts/dotnet
|
||||||
./run.sh
|
./run.sh
|
||||||
|
df -h
|
||||||
|
|
||||||
|
ls -lh /tmp/packages
|
||||||
|
|
||||||
- name: Copy files
|
- name: Copy files
|
||||||
shell: bash
|
shell: bash
|
||||||
@@ -181,9 +147,14 @@ jobs:
|
|||||||
|
|
||||||
ls -lh /tmp
|
ls -lh /tmp
|
||||||
|
|
||||||
|
df -h
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
dotnet nuget locals all --clear
|
||||||
|
df -h
|
||||||
|
|
||||||
.github/scripts/test-dot-net.sh
|
.github/scripts/test-dot-net.sh
|
||||||
|
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
|
|||||||
83
dotnet-examples/offline-speaker-diarization/Program.cs
Normal file
83
dotnet-examples/offline-speaker-diarization/Program.cs
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
// Copyright (c) 2024 Xiaomi Corporation
|
||||||
|
//
|
||||||
|
|
||||||
|
// This file shows how to use sherpa-onnx C# API for speaker diarization
|
||||||
|
/*
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
Step 1: Download a speaker segmentation model
|
||||||
|
|
||||||
|
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
|
||||||
|
for a list of available models. The following is an example
|
||||||
|
|
||||||
|
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||||
|
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||||
|
|
||||||
|
Step 2: Download a speaker embedding extractor model
|
||||||
|
|
||||||
|
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
|
||||||
|
for a list of available models. The following is an example
|
||||||
|
|
||||||
|
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
||||||
|
|
||||||
|
Step 3. Download test wave files
|
||||||
|
|
||||||
|
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
|
||||||
|
for a list of available test wave files. The following is an example
|
||||||
|
|
||||||
|
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
|
||||||
|
|
||||||
|
Step 4. Run it
|
||||||
|
|
||||||
|
dotnet run
|
||||||
|
*/
|
||||||
|
|
||||||
|
using SherpaOnnx;
|
||||||
|
using System;
|
||||||
|
|
||||||
|
class OfflineSpeakerDiarizationDemo
|
||||||
|
{
|
||||||
|
static void Main(string[] args)
|
||||||
|
{
|
||||||
|
var config = new OfflineSpeakerDiarizationConfig();
|
||||||
|
config.Segmentation.Pyannote.Model = "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx";
|
||||||
|
config.Embedding.Model = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";
|
||||||
|
|
||||||
|
// the test wave ./0-four-speakers-zh.wav has 4 speakers, so
|
||||||
|
// we set num_clusters to 4
|
||||||
|
//
|
||||||
|
config.Clustering.NumClusters = 4;
|
||||||
|
// If you don't know the number of speakers in the test wave file, please
|
||||||
|
// use
|
||||||
|
// config.Clustering.Threshold = 0.5; // You need to tune this threshold
|
||||||
|
var sd = new OfflineSpeakerDiarization(config);
|
||||||
|
|
||||||
|
var testWaveFile = "./0-four-speakers-zh.wav";
|
||||||
|
WaveReader waveReader = new WaveReader(testWaveFile);
|
||||||
|
if (sd.SampleRate != waveReader.SampleRate)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Expected sample rate: {sd.SampleRate}. Given: {waveReader.SampleRate}");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Console.WriteLine("Started");
|
||||||
|
|
||||||
|
// var segments = sd.Process(waveReader.Samples); // this one is also ok
|
||||||
|
|
||||||
|
var MyProgressCallback = (int numProcessedChunks, int numTotalChunks, IntPtr arg) =>
|
||||||
|
{
|
||||||
|
float progress = 100.0F * numProcessedChunks / numTotalChunks;
|
||||||
|
Console.WriteLine("Progress {0}%", String.Format("{0:0.00}", progress));
|
||||||
|
return 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
var callback = new OfflineSpeakerDiarizationProgressCallback(MyProgressCallback);
|
||||||
|
var segments = sd.ProcessWithCallback(waveReader.Samples, callback, IntPtr.Zero);
|
||||||
|
|
||||||
|
foreach (var s in segments)
|
||||||
|
{
|
||||||
|
Console.WriteLine("{0} -- {1} speaker_{2}", String.Format("{0:0.00}", s.Start), String.Format("{0:0.00}", s.End), s.Speaker);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<OutputType>Exe</OutputType>
|
||||||
|
<TargetFramework>net6.0</TargetFramework>
|
||||||
|
<RootNamespace>offline_speaker_diarization</RootNamespace>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="..\Common\Common.csproj" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
18
dotnet-examples/offline-speaker-diarization/run.sh
Executable file
18
dotnet-examples/offline-speaker-diarization/run.sh
Executable file
@@ -0,0 +1,18 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
|
||||||
|
if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||||
|
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./0-four-speakers-zh.wav ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
|
||||||
|
fi
|
||||||
|
|
||||||
|
dotnet run
|
||||||
@@ -31,6 +31,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-micro
|
|||||||
EndProject
|
EndProject
|
||||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TTS", "TTS\TTS.csproj", "{DACE4A18-4FC8-4437-92BF-5A90BA81286C}"
|
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TTS", "TTS\TTS.csproj", "{DACE4A18-4FC8-4437-92BF-5A90BA81286C}"
|
||||||
EndProject
|
EndProject
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-speaker-diarization", "offline-speaker-diarization\offline-speaker-diarization.csproj", "{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}"
|
||||||
|
EndProject
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
Debug|Any CPU = Debug|Any CPU
|
Debug|Any CPU = Debug|Any CPU
|
||||||
@@ -93,6 +95,10 @@ Global
|
|||||||
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.Build.0 = Release|Any CPU
|
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
EndGlobalSection
|
EndGlobalSection
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
HideSolutionNode = FALSE
|
HideSolutionNode = FALSE
|
||||||
|
|||||||
20
scripts/dotnet/FastClusteringConfig.cs
Normal file
20
scripts/dotnet/FastClusteringConfig.cs
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
/// Copyright (c) 2024 Xiaomi Corporation
|
||||||
|
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
|
|
||||||
|
namespace SherpaOnnx
|
||||||
|
{
|
||||||
|
|
||||||
|
[StructLayout(LayoutKind.Sequential)]
|
||||||
|
public struct FastClusteringConfig
|
||||||
|
{
|
||||||
|
public FastClusteringConfig()
|
||||||
|
{
|
||||||
|
NumClusters = -1;
|
||||||
|
Threshold = 0.5F;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int NumClusters;
|
||||||
|
public float Threshold;
|
||||||
|
}
|
||||||
|
}
|
||||||
122
scripts/dotnet/OfflineSpeakerDiarization.cs
Normal file
122
scripts/dotnet/OfflineSpeakerDiarization.cs
Normal file
@@ -0,0 +1,122 @@
|
|||||||
|
/// Copyright (c) 2024 Xiaomi Corporation
|
||||||
|
using System;
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
|
namespace SherpaOnnx
|
||||||
|
{
|
||||||
|
// IntPtr is actually a `const float*` from C++
|
||||||
|
public delegate int OfflineSpeakerDiarizationProgressCallback(int numProcessedChunks, int numTotalChunks, IntPtr arg);
|
||||||
|
|
||||||
|
public class OfflineSpeakerDiarization : IDisposable
|
||||||
|
{
|
||||||
|
public OfflineSpeakerDiarization(OfflineSpeakerDiarizationConfig config)
|
||||||
|
{
|
||||||
|
IntPtr h = SherpaOnnxCreateOfflineSpeakerDiarization(ref config);
|
||||||
|
_handle = new HandleRef(this, h);
|
||||||
|
}
|
||||||
|
|
||||||
|
public OfflineSpeakerDiarizationSegment[] Process(float[] samples)
|
||||||
|
{
|
||||||
|
IntPtr result = SherpaOnnxOfflineSpeakerDiarizationProcess(_handle.Handle, samples, samples.Length);
|
||||||
|
return ProcessImpl(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
public OfflineSpeakerDiarizationSegment[] ProcessWithCallback(float[] samples, OfflineSpeakerDiarizationProgressCallback callback, IntPtr arg)
|
||||||
|
{
|
||||||
|
IntPtr result = SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(_handle.Handle, samples, samples.Length, callback, arg);
|
||||||
|
return ProcessImpl(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
private OfflineSpeakerDiarizationSegment[] ProcessImpl(IntPtr result)
|
||||||
|
{
|
||||||
|
if (result == IntPtr.Zero)
|
||||||
|
{
|
||||||
|
return new OfflineSpeakerDiarizationSegment[] {};
|
||||||
|
}
|
||||||
|
|
||||||
|
int numSegments = SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(result);
|
||||||
|
IntPtr p = SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(result);
|
||||||
|
|
||||||
|
OfflineSpeakerDiarizationSegment[] ans = new OfflineSpeakerDiarizationSegment[numSegments];
|
||||||
|
unsafe
|
||||||
|
{
|
||||||
|
int size = sizeof(float) * 2 + sizeof(int);
|
||||||
|
for (int i = 0; i != numSegments; ++i)
|
||||||
|
{
|
||||||
|
IntPtr t = new IntPtr((byte*)p + i * size);
|
||||||
|
ans[i] = new OfflineSpeakerDiarizationSegment(t);
|
||||||
|
|
||||||
|
// The following IntPtr.Add() does not support net20
|
||||||
|
// ans[i] = new OfflineSpeakerDiarizationSegment(IntPtr.Add(p, i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
SherpaOnnxOfflineSpeakerDiarizationDestroySegment(p);
|
||||||
|
SherpaOnnxOfflineSpeakerDiarizationDestroyResult(result);
|
||||||
|
|
||||||
|
return ans;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Dispose()
|
||||||
|
{
|
||||||
|
Cleanup();
|
||||||
|
// Prevent the object from being placed on the
|
||||||
|
// finalization queue
|
||||||
|
System.GC.SuppressFinalize(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
~OfflineSpeakerDiarization()
|
||||||
|
{
|
||||||
|
Cleanup();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void Cleanup()
|
||||||
|
{
|
||||||
|
SherpaOnnxDestroyOfflineSpeakerDiarization(_handle.Handle);
|
||||||
|
|
||||||
|
// Don't permit the handle to be used again.
|
||||||
|
_handle = new HandleRef(this, IntPtr.Zero);
|
||||||
|
}
|
||||||
|
|
||||||
|
private HandleRef _handle;
|
||||||
|
|
||||||
|
public int SampleRate
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
return SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(_handle.Handle);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[DllImport(Dll.Filename)]
|
||||||
|
private static extern IntPtr SherpaOnnxCreateOfflineSpeakerDiarization(ref OfflineSpeakerDiarizationConfig config);
|
||||||
|
|
||||||
|
[DllImport(Dll.Filename)]
|
||||||
|
private static extern void SherpaOnnxDestroyOfflineSpeakerDiarization(IntPtr handle);
|
||||||
|
|
||||||
|
[DllImport(Dll.Filename)]
|
||||||
|
private static extern int SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(IntPtr handle);
|
||||||
|
|
||||||
|
[DllImport(Dll.Filename)]
|
||||||
|
private static extern int SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(IntPtr handle);
|
||||||
|
|
||||||
|
[DllImport(Dll.Filename)]
|
||||||
|
private static extern IntPtr SherpaOnnxOfflineSpeakerDiarizationProcess(IntPtr handle, float[] samples, int n);
|
||||||
|
|
||||||
|
[DllImport(Dll.Filename, CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
private static extern IntPtr SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(IntPtr handle, float[] samples, int n, OfflineSpeakerDiarizationProgressCallback callback, IntPtr arg);
|
||||||
|
|
||||||
|
[DllImport(Dll.Filename)]
|
||||||
|
private static extern void SherpaOnnxOfflineSpeakerDiarizationDestroyResult(IntPtr handle);
|
||||||
|
|
||||||
|
[DllImport(Dll.Filename)]
|
||||||
|
private static extern IntPtr SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(IntPtr handle);
|
||||||
|
|
||||||
|
[DllImport(Dll.Filename)]
|
||||||
|
private static extern void SherpaOnnxOfflineSpeakerDiarizationDestroySegment(IntPtr handle);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
31
scripts/dotnet/OfflineSpeakerDiarizationConfig.cs
Normal file
31
scripts/dotnet/OfflineSpeakerDiarizationConfig.cs
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
/// Copyright (c) 2024 Xiaomi Corporation
|
||||||
|
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
|
|
||||||
|
namespace SherpaOnnx
|
||||||
|
{
|
||||||
|
|
||||||
|
[StructLayout(LayoutKind.Sequential)]
|
||||||
|
public struct OfflineSpeakerDiarizationConfig
|
||||||
|
{
|
||||||
|
public OfflineSpeakerDiarizationConfig()
|
||||||
|
{
|
||||||
|
Segmentation = new OfflineSpeakerSegmentationModelConfig();
|
||||||
|
Embedding = new SpeakerEmbeddingExtractorConfig();
|
||||||
|
Clustering = new FastClusteringConfig();
|
||||||
|
|
||||||
|
MinDurationOn = 0.3F;
|
||||||
|
MinDurationOff = 0.5F;
|
||||||
|
}
|
||||||
|
|
||||||
|
public OfflineSpeakerSegmentationModelConfig Segmentation;
|
||||||
|
public SpeakerEmbeddingExtractorConfig Embedding;
|
||||||
|
public FastClusteringConfig Clustering;
|
||||||
|
|
||||||
|
public float MinDurationOn;
|
||||||
|
public float MinDurationOff;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
33
scripts/dotnet/OfflineSpeakerDiarizationSegment.cs
Normal file
33
scripts/dotnet/OfflineSpeakerDiarizationSegment.cs
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
/// Copyright (c) 2024 Xiaomi Corporation
|
||||||
|
using System;
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
|
namespace SherpaOnnx
|
||||||
|
{
|
||||||
|
|
||||||
|
public class OfflineSpeakerDiarizationSegment
|
||||||
|
{
|
||||||
|
public OfflineSpeakerDiarizationSegment(IntPtr handle)
|
||||||
|
{
|
||||||
|
Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));
|
||||||
|
|
||||||
|
Start = impl.Start;
|
||||||
|
End = impl.End;
|
||||||
|
Speaker = impl.Speaker;
|
||||||
|
}
|
||||||
|
|
||||||
|
[StructLayout(LayoutKind.Sequential)]
|
||||||
|
struct Impl
|
||||||
|
{
|
||||||
|
public float Start;
|
||||||
|
public float End;
|
||||||
|
public int Speaker;
|
||||||
|
}
|
||||||
|
|
||||||
|
public float Start;
|
||||||
|
public float End;
|
||||||
|
public int Speaker;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
32
scripts/dotnet/OfflineSpeakerSegmentationModelConfig.cs
Normal file
32
scripts/dotnet/OfflineSpeakerSegmentationModelConfig.cs
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
/// Copyright (c) 2024 Xiaomi Corporation
|
||||||
|
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
|
|
||||||
|
namespace SherpaOnnx
|
||||||
|
{
|
||||||
|
|
||||||
|
[StructLayout(LayoutKind.Sequential)]
|
||||||
|
public struct OfflineSpeakerSegmentationModelConfig
|
||||||
|
{
|
||||||
|
public OfflineSpeakerSegmentationModelConfig()
|
||||||
|
{
|
||||||
|
Pyannote = new OfflineSpeakerSegmentationPyannoteModelConfig();
|
||||||
|
NumThreads = 1;
|
||||||
|
Debug = 0;
|
||||||
|
Provider = "cpu";
|
||||||
|
}
|
||||||
|
|
||||||
|
public OfflineSpeakerSegmentationPyannoteModelConfig Pyannote;
|
||||||
|
|
||||||
|
/// Number of threads used to run the neural network model
|
||||||
|
public int NumThreads;
|
||||||
|
|
||||||
|
/// true to print debug information of the model
|
||||||
|
public int Debug;
|
||||||
|
|
||||||
|
[MarshalAs(UnmanagedType.LPStr)]
|
||||||
|
public string Provider;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
/// Copyright (c) 2024 Xiaomi Corporation
|
||||||
|
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
|
|
||||||
|
namespace SherpaOnnx
|
||||||
|
{
|
||||||
|
|
||||||
|
[StructLayout(LayoutKind.Sequential)]
|
||||||
|
public struct OfflineSpeakerSegmentationPyannoteModelConfig
|
||||||
|
{
|
||||||
|
public OfflineSpeakerSegmentationPyannoteModelConfig()
|
||||||
|
{
|
||||||
|
Model = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
[MarshalAs(UnmanagedType.LPStr)]
|
||||||
|
public string Model;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Reference in New Issue
Block a user