C# API for speaker diarization (#1407)

This commit is contained in:
Fangjun Kuang
2024-10-10 14:29:05 +08:00
committed by GitHub
parent bd50e79590
commit a45e5dba99
12 changed files with 408 additions and 51 deletions

View File

@@ -0,0 +1,20 @@
/// Copyright (c) 2024 Xiaomi Corporation
using System.Runtime.InteropServices;
namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct FastClusteringConfig
{
public FastClusteringConfig()
{
NumClusters = -1;
Threshold = 0.5F;
}
public int NumClusters;
public float Threshold;
}
}

View File

@@ -0,0 +1,122 @@
/// Copyright (c) 2024 Xiaomi Corporation
using System;
using System.Runtime.InteropServices;
using System.Text;
namespace SherpaOnnx
{
// IntPtr is actually a `const float*` from C++
public delegate int OfflineSpeakerDiarizationProgressCallback(int numProcessedChunks, int numTotalChunks, IntPtr arg);
public class OfflineSpeakerDiarization : IDisposable
{
public OfflineSpeakerDiarization(OfflineSpeakerDiarizationConfig config)
{
IntPtr h = SherpaOnnxCreateOfflineSpeakerDiarization(ref config);
_handle = new HandleRef(this, h);
}
public OfflineSpeakerDiarizationSegment[] Process(float[] samples)
{
IntPtr result = SherpaOnnxOfflineSpeakerDiarizationProcess(_handle.Handle, samples, samples.Length);
return ProcessImpl(result);
}
public OfflineSpeakerDiarizationSegment[] ProcessWithCallback(float[] samples, OfflineSpeakerDiarizationProgressCallback callback, IntPtr arg)
{
IntPtr result = SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(_handle.Handle, samples, samples.Length, callback, arg);
return ProcessImpl(result);
}
private OfflineSpeakerDiarizationSegment[] ProcessImpl(IntPtr result)
{
if (result == IntPtr.Zero)
{
return new OfflineSpeakerDiarizationSegment[] {};
}
int numSegments = SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(result);
IntPtr p = SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(result);
OfflineSpeakerDiarizationSegment[] ans = new OfflineSpeakerDiarizationSegment[numSegments];
unsafe
{
int size = sizeof(float) * 2 + sizeof(int);
for (int i = 0; i != numSegments; ++i)
{
IntPtr t = new IntPtr((byte*)p + i * size);
ans[i] = new OfflineSpeakerDiarizationSegment(t);
// The following IntPtr.Add() does not support net20
// ans[i] = new OfflineSpeakerDiarizationSegment(IntPtr.Add(p, i));
}
}
SherpaOnnxOfflineSpeakerDiarizationDestroySegment(p);
SherpaOnnxOfflineSpeakerDiarizationDestroyResult(result);
return ans;
}
public void Dispose()
{
Cleanup();
// Prevent the object from being placed on the
// finalization queue
System.GC.SuppressFinalize(this);
}
~OfflineSpeakerDiarization()
{
Cleanup();
}
private void Cleanup()
{
SherpaOnnxDestroyOfflineSpeakerDiarization(_handle.Handle);
// Don't permit the handle to be used again.
_handle = new HandleRef(this, IntPtr.Zero);
}
private HandleRef _handle;
public int SampleRate
{
get
{
return SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(_handle.Handle);
}
}
[DllImport(Dll.Filename)]
private static extern IntPtr SherpaOnnxCreateOfflineSpeakerDiarization(ref OfflineSpeakerDiarizationConfig config);
[DllImport(Dll.Filename)]
private static extern void SherpaOnnxDestroyOfflineSpeakerDiarization(IntPtr handle);
[DllImport(Dll.Filename)]
private static extern int SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(IntPtr handle);
[DllImport(Dll.Filename)]
private static extern int SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(IntPtr handle);
[DllImport(Dll.Filename)]
private static extern IntPtr SherpaOnnxOfflineSpeakerDiarizationProcess(IntPtr handle, float[] samples, int n);
[DllImport(Dll.Filename, CallingConvention = CallingConvention.Cdecl)]
private static extern IntPtr SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(IntPtr handle, float[] samples, int n, OfflineSpeakerDiarizationProgressCallback callback, IntPtr arg);
[DllImport(Dll.Filename)]
private static extern void SherpaOnnxOfflineSpeakerDiarizationDestroyResult(IntPtr handle);
[DllImport(Dll.Filename)]
private static extern IntPtr SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(IntPtr handle);
[DllImport(Dll.Filename)]
private static extern void SherpaOnnxOfflineSpeakerDiarizationDestroySegment(IntPtr handle);
}
}

View File

@@ -0,0 +1,31 @@
/// Copyright (c) 2024 Xiaomi Corporation
using System.Runtime.InteropServices;
namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct OfflineSpeakerDiarizationConfig
{
public OfflineSpeakerDiarizationConfig()
{
Segmentation = new OfflineSpeakerSegmentationModelConfig();
Embedding = new SpeakerEmbeddingExtractorConfig();
Clustering = new FastClusteringConfig();
MinDurationOn = 0.3F;
MinDurationOff = 0.5F;
}
public OfflineSpeakerSegmentationModelConfig Segmentation;
public SpeakerEmbeddingExtractorConfig Embedding;
public FastClusteringConfig Clustering;
public float MinDurationOn;
public float MinDurationOff;
}
}

View File

@@ -0,0 +1,33 @@
/// Copyright (c) 2024 Xiaomi Corporation
using System;
using System.Runtime.InteropServices;
using System.Text;
namespace SherpaOnnx
{
public class OfflineSpeakerDiarizationSegment
{
public OfflineSpeakerDiarizationSegment(IntPtr handle)
{
Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));
Start = impl.Start;
End = impl.End;
Speaker = impl.Speaker;
}
[StructLayout(LayoutKind.Sequential)]
struct Impl
{
public float Start;
public float End;
public int Speaker;
}
public float Start;
public float End;
public int Speaker;
}
}

View File

@@ -0,0 +1,32 @@
/// Copyright (c) 2024 Xiaomi Corporation
using System.Runtime.InteropServices;
namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct OfflineSpeakerSegmentationModelConfig
{
public OfflineSpeakerSegmentationModelConfig()
{
Pyannote = new OfflineSpeakerSegmentationPyannoteModelConfig();
NumThreads = 1;
Debug = 0;
Provider = "cpu";
}
public OfflineSpeakerSegmentationPyannoteModelConfig Pyannote;
/// Number of threads used to run the neural network model
public int NumThreads;
/// true to print debug information of the model
public int Debug;
[MarshalAs(UnmanagedType.LPStr)]
public string Provider;
}
}

View File

@@ -0,0 +1,20 @@
/// Copyright (c) 2024 Xiaomi Corporation
using System.Runtime.InteropServices;
namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct OfflineSpeakerSegmentationPyannoteModelConfig
{
public OfflineSpeakerSegmentationPyannoteModelConfig()
{
Model = "";
}
[MarshalAs(UnmanagedType.LPStr)]
public string Model;
}
}