C# API for speaker diarization (#1407)
This commit is contained in:
20
scripts/dotnet/FastClusteringConfig.cs
Normal file
20
scripts/dotnet/FastClusteringConfig.cs
Normal file
@@ -0,0 +1,20 @@
|
||||
/// Copyright (c) 2024 Xiaomi Corporation
|
||||
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace SherpaOnnx
|
||||
{
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public struct FastClusteringConfig
|
||||
{
|
||||
public FastClusteringConfig()
|
||||
{
|
||||
NumClusters = -1;
|
||||
Threshold = 0.5F;
|
||||
}
|
||||
|
||||
public int NumClusters;
|
||||
public float Threshold;
|
||||
}
|
||||
}
|
||||
122
scripts/dotnet/OfflineSpeakerDiarization.cs
Normal file
122
scripts/dotnet/OfflineSpeakerDiarization.cs
Normal file
@@ -0,0 +1,122 @@
|
||||
/// Copyright (c) 2024 Xiaomi Corporation
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Text;
|
||||
|
||||
namespace SherpaOnnx
|
||||
{
|
||||
// IntPtr is actually a `const float*` from C++
|
||||
public delegate int OfflineSpeakerDiarizationProgressCallback(int numProcessedChunks, int numTotalChunks, IntPtr arg);
|
||||
|
||||
public class OfflineSpeakerDiarization : IDisposable
|
||||
{
|
||||
public OfflineSpeakerDiarization(OfflineSpeakerDiarizationConfig config)
|
||||
{
|
||||
IntPtr h = SherpaOnnxCreateOfflineSpeakerDiarization(ref config);
|
||||
_handle = new HandleRef(this, h);
|
||||
}
|
||||
|
||||
public OfflineSpeakerDiarizationSegment[] Process(float[] samples)
|
||||
{
|
||||
IntPtr result = SherpaOnnxOfflineSpeakerDiarizationProcess(_handle.Handle, samples, samples.Length);
|
||||
return ProcessImpl(result);
|
||||
}
|
||||
|
||||
public OfflineSpeakerDiarizationSegment[] ProcessWithCallback(float[] samples, OfflineSpeakerDiarizationProgressCallback callback, IntPtr arg)
|
||||
{
|
||||
IntPtr result = SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(_handle.Handle, samples, samples.Length, callback, arg);
|
||||
return ProcessImpl(result);
|
||||
}
|
||||
|
||||
private OfflineSpeakerDiarizationSegment[] ProcessImpl(IntPtr result)
|
||||
{
|
||||
if (result == IntPtr.Zero)
|
||||
{
|
||||
return new OfflineSpeakerDiarizationSegment[] {};
|
||||
}
|
||||
|
||||
int numSegments = SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(result);
|
||||
IntPtr p = SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(result);
|
||||
|
||||
OfflineSpeakerDiarizationSegment[] ans = new OfflineSpeakerDiarizationSegment[numSegments];
|
||||
unsafe
|
||||
{
|
||||
int size = sizeof(float) * 2 + sizeof(int);
|
||||
for (int i = 0; i != numSegments; ++i)
|
||||
{
|
||||
IntPtr t = new IntPtr((byte*)p + i * size);
|
||||
ans[i] = new OfflineSpeakerDiarizationSegment(t);
|
||||
|
||||
// The following IntPtr.Add() does not support net20
|
||||
// ans[i] = new OfflineSpeakerDiarizationSegment(IntPtr.Add(p, i));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
SherpaOnnxOfflineSpeakerDiarizationDestroySegment(p);
|
||||
SherpaOnnxOfflineSpeakerDiarizationDestroyResult(result);
|
||||
|
||||
return ans;
|
||||
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
Cleanup();
|
||||
// Prevent the object from being placed on the
|
||||
// finalization queue
|
||||
System.GC.SuppressFinalize(this);
|
||||
}
|
||||
|
||||
~OfflineSpeakerDiarization()
|
||||
{
|
||||
Cleanup();
|
||||
}
|
||||
|
||||
private void Cleanup()
|
||||
{
|
||||
SherpaOnnxDestroyOfflineSpeakerDiarization(_handle.Handle);
|
||||
|
||||
// Don't permit the handle to be used again.
|
||||
_handle = new HandleRef(this, IntPtr.Zero);
|
||||
}
|
||||
|
||||
private HandleRef _handle;
|
||||
|
||||
public int SampleRate
|
||||
{
|
||||
get
|
||||
{
|
||||
return SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(_handle.Handle);
|
||||
}
|
||||
}
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern IntPtr SherpaOnnxCreateOfflineSpeakerDiarization(ref OfflineSpeakerDiarizationConfig config);
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern void SherpaOnnxDestroyOfflineSpeakerDiarization(IntPtr handle);
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern int SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(IntPtr handle);
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern int SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(IntPtr handle);
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern IntPtr SherpaOnnxOfflineSpeakerDiarizationProcess(IntPtr handle, float[] samples, int n);
|
||||
|
||||
[DllImport(Dll.Filename, CallingConvention = CallingConvention.Cdecl)]
|
||||
private static extern IntPtr SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(IntPtr handle, float[] samples, int n, OfflineSpeakerDiarizationProgressCallback callback, IntPtr arg);
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern void SherpaOnnxOfflineSpeakerDiarizationDestroyResult(IntPtr handle);
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern IntPtr SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(IntPtr handle);
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern void SherpaOnnxOfflineSpeakerDiarizationDestroySegment(IntPtr handle);
|
||||
}
|
||||
}
|
||||
|
||||
31
scripts/dotnet/OfflineSpeakerDiarizationConfig.cs
Normal file
31
scripts/dotnet/OfflineSpeakerDiarizationConfig.cs
Normal file
@@ -0,0 +1,31 @@
|
||||
/// Copyright (c) 2024 Xiaomi Corporation
|
||||
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace SherpaOnnx
|
||||
{
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public struct OfflineSpeakerDiarizationConfig
|
||||
{
|
||||
public OfflineSpeakerDiarizationConfig()
|
||||
{
|
||||
Segmentation = new OfflineSpeakerSegmentationModelConfig();
|
||||
Embedding = new SpeakerEmbeddingExtractorConfig();
|
||||
Clustering = new FastClusteringConfig();
|
||||
|
||||
MinDurationOn = 0.3F;
|
||||
MinDurationOff = 0.5F;
|
||||
}
|
||||
|
||||
public OfflineSpeakerSegmentationModelConfig Segmentation;
|
||||
public SpeakerEmbeddingExtractorConfig Embedding;
|
||||
public FastClusteringConfig Clustering;
|
||||
|
||||
public float MinDurationOn;
|
||||
public float MinDurationOff;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
33
scripts/dotnet/OfflineSpeakerDiarizationSegment.cs
Normal file
33
scripts/dotnet/OfflineSpeakerDiarizationSegment.cs
Normal file
@@ -0,0 +1,33 @@
|
||||
/// Copyright (c) 2024 Xiaomi Corporation
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Text;
|
||||
|
||||
namespace SherpaOnnx
|
||||
{
|
||||
|
||||
public class OfflineSpeakerDiarizationSegment
|
||||
{
|
||||
public OfflineSpeakerDiarizationSegment(IntPtr handle)
|
||||
{
|
||||
Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));
|
||||
|
||||
Start = impl.Start;
|
||||
End = impl.End;
|
||||
Speaker = impl.Speaker;
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
struct Impl
|
||||
{
|
||||
public float Start;
|
||||
public float End;
|
||||
public int Speaker;
|
||||
}
|
||||
|
||||
public float Start;
|
||||
public float End;
|
||||
public int Speaker;
|
||||
}
|
||||
}
|
||||
|
||||
32
scripts/dotnet/OfflineSpeakerSegmentationModelConfig.cs
Normal file
32
scripts/dotnet/OfflineSpeakerSegmentationModelConfig.cs
Normal file
@@ -0,0 +1,32 @@
|
||||
/// Copyright (c) 2024 Xiaomi Corporation
|
||||
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace SherpaOnnx
|
||||
{
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public struct OfflineSpeakerSegmentationModelConfig
|
||||
{
|
||||
public OfflineSpeakerSegmentationModelConfig()
|
||||
{
|
||||
Pyannote = new OfflineSpeakerSegmentationPyannoteModelConfig();
|
||||
NumThreads = 1;
|
||||
Debug = 0;
|
||||
Provider = "cpu";
|
||||
}
|
||||
|
||||
public OfflineSpeakerSegmentationPyannoteModelConfig Pyannote;
|
||||
|
||||
/// Number of threads used to run the neural network model
|
||||
public int NumThreads;
|
||||
|
||||
/// true to print debug information of the model
|
||||
public int Debug;
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Provider;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
/// Copyright (c) 2024 Xiaomi Corporation
|
||||
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace SherpaOnnx
|
||||
{
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public struct OfflineSpeakerSegmentationPyannoteModelConfig
|
||||
{
|
||||
public OfflineSpeakerSegmentationPyannoteModelConfig()
|
||||
{
|
||||
Model = "";
|
||||
}
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Model;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user