diff --git a/scripts/dotnet/Dll.cs b/scripts/dotnet/Dll.cs new file mode 100644 index 00000000..3ae93b8b --- /dev/null +++ b/scripts/dotnet/Dll.cs @@ -0,0 +1,17 @@ +/// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) +/// Copyright (c) 2023 by manyeyes +/// Copyright (c) 2024.5 by 东风破 + +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + internal static class Dll + { + public const string Filename = "sherpa-onnx-c-api"; + } +} \ No newline at end of file diff --git a/scripts/dotnet/FeatureConfig.cs b/scripts/dotnet/FeatureConfig.cs new file mode 100644 index 00000000..3ad48be4 --- /dev/null +++ b/scripts/dotnet/FeatureConfig.cs @@ -0,0 +1,32 @@ +/// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) +/// Copyright (c) 2023 by manyeyes +/// Copyright (c) 2024.5 by 东风破 + +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + /// It expects 16 kHz 16-bit single channel wave format. + [StructLayout(LayoutKind.Sequential)] + public struct FeatureConfig + { + public FeatureConfig() + { + SampleRate = 16000; + FeatureDim = 80; + } + /// Sample rate of the input data. MUST match the one expected + /// by the model. For instance, it should be 16000 for models provided + /// by us. + public int SampleRate; + + /// Feature dimension of the model. + /// For instance, it should be 80 for models provided by us. + public int FeatureDim; + } + +} \ No newline at end of file diff --git a/scripts/dotnet/OfflineLMConfig.cs b/scripts/dotnet/OfflineLMConfig.cs new file mode 100644 index 00000000..b561821a --- /dev/null +++ b/scripts/dotnet/OfflineLMConfig.cs @@ -0,0 +1,26 @@ +/// Copyright (c) 2024.5 by 东风破 + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + + [StructLayout(LayoutKind.Sequential)] + public struct OfflineLMConfig + { + public OfflineLMConfig() + { + Model = ""; + Scale = 0.5F; + } + [MarshalAs(UnmanagedType.LPStr)] + public string Model; + + public float Scale; + } + +} \ No newline at end of file diff --git a/scripts/dotnet/OfflineModelConfig.cs b/scripts/dotnet/OfflineModelConfig.cs new file mode 100644 index 00000000..9ed5eb53 --- /dev/null +++ b/scripts/dotnet/OfflineModelConfig.cs @@ -0,0 +1,49 @@ +/// Copyright (c) 2024.5 by 东风破 + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + + [StructLayout(LayoutKind.Sequential)] + public struct OfflineModelConfig + { + public OfflineModelConfig() + { + Transducer = new OfflineTransducerModelConfig(); + Paraformer = new OfflineParaformerModelConfig(); + NeMoCtc = new OfflineNemoEncDecCtcModelConfig(); + Whisper = new OfflineWhisperModelConfig(); + Tdnn = new OfflineTdnnModelConfig(); + Tokens = ""; + NumThreads = 1; + Debug = 0; + Provider = "cpu"; + ModelType = ""; + } + public OfflineTransducerModelConfig Transducer; + public OfflineParaformerModelConfig Paraformer; + public OfflineNemoEncDecCtcModelConfig NeMoCtc; + public OfflineWhisperModelConfig Whisper; + public OfflineTdnnModelConfig Tdnn; + + [MarshalAs(UnmanagedType.LPStr)] + public string Tokens; + + public int NumThreads; + + public int Debug; + + [MarshalAs(UnmanagedType.LPStr)] + public string Provider; + + [MarshalAs(UnmanagedType.LPStr)] + public string ModelType; + } + + +} \ No newline at end of file diff --git a/scripts/dotnet/OfflineNemoEncDecCtcModelConfig.cs b/scripts/dotnet/OfflineNemoEncDecCtcModelConfig.cs new file mode 100644 index 00000000..00ac91a9 --- /dev/null +++ b/scripts/dotnet/OfflineNemoEncDecCtcModelConfig.cs @@ -0,0 +1,22 @@ +/// Copyright (c) 2024.5 by 东风破 + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + + [StructLayout(LayoutKind.Sequential)] + public struct OfflineNemoEncDecCtcModelConfig + { + public OfflineNemoEncDecCtcModelConfig() + { + Model = ""; + } + [MarshalAs(UnmanagedType.LPStr)] + public string Model; + } +} \ No newline at end of file diff --git a/scripts/dotnet/OfflineParaformerModelConfig.cs b/scripts/dotnet/OfflineParaformerModelConfig.cs new file mode 100644 index 00000000..0fe99ad4 --- /dev/null +++ b/scripts/dotnet/OfflineParaformerModelConfig.cs @@ -0,0 +1,22 @@ +/// Copyright (c) 2024.5 by 东风破 + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + [StructLayout(LayoutKind.Sequential)] + public struct OfflineParaformerModelConfig + { + public OfflineParaformerModelConfig() + { + Model = ""; + } + [MarshalAs(UnmanagedType.LPStr)] + public string Model; + } + +} \ No newline at end of file diff --git a/scripts/dotnet/OfflineRecognizer.cs b/scripts/dotnet/OfflineRecognizer.cs new file mode 100644 index 00000000..2114bee4 --- /dev/null +++ b/scripts/dotnet/OfflineRecognizer.cs @@ -0,0 +1,76 @@ +/// Copyright (c) 2024.5 by 东风破 + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + public class OfflineRecognizer : IDisposable + { + public OfflineRecognizer(OfflineRecognizerConfig config) + { + IntPtr h = CreateOfflineRecognizer(ref config); + _handle = new HandleRef(this, h); + } + + public OfflineStream CreateStream() + { + IntPtr p = CreateOfflineStream(_handle.Handle); + return new OfflineStream(p); + } + + public void Decode(OfflineStream stream) + { + Decode(_handle.Handle, stream.Handle); + } + + // The caller should ensure all passed streams are ready for decoding. + public void Decode(IEnumerable streams) + { + IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray(); + Decode(_handle.Handle, ptrs, ptrs.Length); + } + + public void Dispose() + { + Cleanup(); + // Prevent the object from being placed on the + // finalization queue + System.GC.SuppressFinalize(this); + } + + ~OfflineRecognizer() + { + Cleanup(); + } + + private void Cleanup() + { + DestroyOfflineRecognizer(_handle.Handle); + + // Don't permit the handle to be used again. + _handle = new HandleRef(this, IntPtr.Zero); + } + + private HandleRef _handle; + + [DllImport(Dll.Filename)] + private static extern IntPtr CreateOfflineRecognizer(ref OfflineRecognizerConfig config); + + [DllImport(Dll.Filename)] + private static extern void DestroyOfflineRecognizer(IntPtr handle); + + [DllImport(Dll.Filename)] + private static extern IntPtr CreateOfflineStream(IntPtr handle); + + [DllImport(Dll.Filename, EntryPoint = "DecodeOfflineStream")] + private static extern void Decode(IntPtr handle, IntPtr stream); + + [DllImport(Dll.Filename, EntryPoint = "DecodeMultipleOfflineStreams")] + private static extern void Decode(IntPtr handle, IntPtr[] streams, int n); + } + +} \ No newline at end of file diff --git a/scripts/dotnet/OfflineRecognizerConfig.cs b/scripts/dotnet/OfflineRecognizerConfig.cs new file mode 100644 index 00000000..2594b692 --- /dev/null +++ b/scripts/dotnet/OfflineRecognizerConfig.cs @@ -0,0 +1,43 @@ +/// Copyright (c) 2024.5 by 东风破 + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + + [StructLayout(LayoutKind.Sequential)] + public struct OfflineRecognizerConfig + { + public OfflineRecognizerConfig() + { + FeatConfig = new FeatureConfig(); + ModelConfig = new OfflineModelConfig(); + LmConfig = new OfflineLMConfig(); + + DecodingMethod = "greedy_search"; + MaxActivePaths = 4; + HotwordsFile = ""; + HotwordsScore = 1.5F; + + } + public FeatureConfig FeatConfig; + public OfflineModelConfig ModelConfig; + public OfflineLMConfig LmConfig; + + [MarshalAs(UnmanagedType.LPStr)] + public string DecodingMethod; + + public int MaxActivePaths; + + [MarshalAs(UnmanagedType.LPStr)] + public string HotwordsFile; + + public float HotwordsScore; + } + + +} \ No newline at end of file diff --git a/scripts/dotnet/OfflineRecognizerResult.cs b/scripts/dotnet/OfflineRecognizerResult.cs new file mode 100644 index 00000000..f5925a39 --- /dev/null +++ b/scripts/dotnet/OfflineRecognizerResult.cs @@ -0,0 +1,49 @@ +/// Copyright (c) 2024.5 by 东风破 + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + + public class OfflineRecognizerResult + { + public OfflineRecognizerResult(IntPtr handle) + { + Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl)); + + // PtrToStringUTF8() requires .net standard 2.1 + // _text = Marshal.PtrToStringUTF8(impl.Text); + + int length = 0; + + unsafe + { + byte* buffer = (byte*)impl.Text; + while (*buffer != 0) + { + ++buffer; + length += 1; + } + } + + byte[] stringBuffer = new byte[length]; + Marshal.Copy(impl.Text, stringBuffer, 0, length); + _text = Encoding.UTF8.GetString(stringBuffer); + } + + [StructLayout(LayoutKind.Sequential)] + struct Impl + { + public IntPtr Text; + } + + private String _text; + public String Text => _text; + } + + +} \ No newline at end of file diff --git a/scripts/dotnet/OfflineStream.cs b/scripts/dotnet/OfflineStream.cs new file mode 100644 index 00000000..d9ac2467 --- /dev/null +++ b/scripts/dotnet/OfflineStream.cs @@ -0,0 +1,72 @@ +/// Copyright (c) 2024.5 by 东风破 + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + + public class OfflineStream : IDisposable + { + public OfflineStream(IntPtr p) + { + _handle = new HandleRef(this, p); + } + + public void AcceptWaveform(int sampleRate, float[] samples) + { + AcceptWaveform(Handle, sampleRate, samples, samples.Length); + } + + public OfflineRecognizerResult Result + { + get + { + IntPtr h = GetResult(_handle.Handle); + OfflineRecognizerResult result = new OfflineRecognizerResult(h); + DestroyResult(h); + return result; + } + } + + ~OfflineStream() + { + Cleanup(); + } + + public void Dispose() + { + Cleanup(); + // Prevent the object from being placed on the + // finalization queue + System.GC.SuppressFinalize(this); + } + + private void Cleanup() + { + DestroyOfflineStream(Handle); + + // Don't permit the handle to be used again. + _handle = new HandleRef(this, IntPtr.Zero); + } + + private HandleRef _handle; + public IntPtr Handle => _handle.Handle; + + [DllImport(Dll.Filename)] + private static extern void DestroyOfflineStream(IntPtr handle); + + [DllImport(Dll.Filename, EntryPoint = "AcceptWaveformOffline")] + private static extern void AcceptWaveform(IntPtr handle, int sampleRate, float[] samples, int n); + + [DllImport(Dll.Filename, EntryPoint = "GetOfflineStreamResult")] + private static extern IntPtr GetResult(IntPtr handle); + + [DllImport(Dll.Filename, EntryPoint = "DestroyOfflineRecognizerResult")] + private static extern void DestroyResult(IntPtr handle); + } + +} \ No newline at end of file diff --git a/scripts/dotnet/OfflineTdnnModelConfig.cs b/scripts/dotnet/OfflineTdnnModelConfig.cs new file mode 100644 index 00000000..6b8d72e0 --- /dev/null +++ b/scripts/dotnet/OfflineTdnnModelConfig.cs @@ -0,0 +1,22 @@ +/// Copyright (c) 2024.5 by 东风破 + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + [StructLayout(LayoutKind.Sequential)] + public struct OfflineTdnnModelConfig + { + public OfflineTdnnModelConfig() + { + Model = ""; + } + [MarshalAs(UnmanagedType.LPStr)] + public string Model; + } + +} \ No newline at end of file diff --git a/scripts/dotnet/OfflineTransducerModelConfig.cs b/scripts/dotnet/OfflineTransducerModelConfig.cs new file mode 100644 index 00000000..f3c5f2f0 --- /dev/null +++ b/scripts/dotnet/OfflineTransducerModelConfig.cs @@ -0,0 +1,30 @@ +/// Copyright (c) 2024.5 by 东风破 + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + [StructLayout(LayoutKind.Sequential)] + public struct OfflineTransducerModelConfig + { + public OfflineTransducerModelConfig() + { + Encoder = ""; + Decoder = ""; + Joiner = ""; + } + [MarshalAs(UnmanagedType.LPStr)] + public string Encoder; + + [MarshalAs(UnmanagedType.LPStr)] + public string Decoder; + + [MarshalAs(UnmanagedType.LPStr)] + public string Joiner; + } + +} \ No newline at end of file diff --git a/scripts/dotnet/OfflineTts.cs b/scripts/dotnet/OfflineTts.cs new file mode 100644 index 00000000..215a3b48 --- /dev/null +++ b/scripts/dotnet/OfflineTts.cs @@ -0,0 +1,91 @@ +/// Copyright (c) 2024.5 by 东风破 + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + // IntPtr is actuallly a `const float*` from C++ + public delegate void OfflineTtsCallback(IntPtr samples, int n); + + public class OfflineTts : IDisposable + { + public OfflineTts(OfflineTtsConfig config) + { + IntPtr h = SherpaOnnxCreateOfflineTts(ref config); + _handle = new HandleRef(this, h); + } + + public OfflineTtsGeneratedAudio Generate(String text, float speed, int speakerId) + { + IntPtr p = SherpaOnnxOfflineTtsGenerate(_handle.Handle, text, speakerId, speed); + return new OfflineTtsGeneratedAudio(p); + } + + public OfflineTtsGeneratedAudio GenerateWithCallback(String text, float speed, int speakerId, OfflineTtsCallback callback) + { + IntPtr p = SherpaOnnxOfflineTtsGenerateWithCallback(_handle.Handle, text, speakerId, speed, callback); + return new OfflineTtsGeneratedAudio(p); + } + + public void Dispose() + { + Cleanup(); + // Prevent the object from being placed on the + // finalization queue + System.GC.SuppressFinalize(this); + } + + ~OfflineTts() + { + Cleanup(); + } + + private void Cleanup() + { + SherpaOnnxDestroyOfflineTts(_handle.Handle); + + // Don't permit the handle to be used again. + _handle = new HandleRef(this, IntPtr.Zero); + } + + private HandleRef _handle; + + public int SampleRate + { + get + { + return SherpaOnnxOfflineTtsSampleRate(_handle.Handle); + } + } + + public int NumSpeakers + { + get + { + return SherpaOnnxOfflineTtsNumSpeakers(_handle.Handle); + } + } + + [DllImport(Dll.Filename)] + private static extern IntPtr SherpaOnnxCreateOfflineTts(ref OfflineTtsConfig config); + + [DllImport(Dll.Filename)] + private static extern void SherpaOnnxDestroyOfflineTts(IntPtr handle); + + [DllImport(Dll.Filename)] + private static extern int SherpaOnnxOfflineTtsSampleRate(IntPtr handle); + + [DllImport(Dll.Filename)] + private static extern int SherpaOnnxOfflineTtsNumSpeakers(IntPtr handle); + + [DllImport(Dll.Filename)] + private static extern IntPtr SherpaOnnxOfflineTtsGenerate(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string text, int sid, float speed); + + [DllImport(Dll.Filename, CallingConvention = CallingConvention.Cdecl)] + private static extern IntPtr SherpaOnnxOfflineTtsGenerateWithCallback(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string text, int sid, float speed, OfflineTtsCallback callback); + } +} \ No newline at end of file diff --git a/scripts/dotnet/OfflineTtsConfig.cs b/scripts/dotnet/OfflineTtsConfig.cs new file mode 100644 index 00000000..0eb1d1ba --- /dev/null +++ b/scripts/dotnet/OfflineTtsConfig.cs @@ -0,0 +1,32 @@ +/// Copyright (c) 2024.5 by 东风破 + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + [StructLayout(LayoutKind.Sequential)] + public struct OfflineTtsConfig + { + public OfflineTtsConfig() + { + Model = new OfflineTtsModelConfig(); + RuleFsts = ""; + MaxNumSentences = 1; + RuleFars = ""; + } + public OfflineTtsModelConfig Model; + + [MarshalAs(UnmanagedType.LPStr)] + public string RuleFsts; + + public int MaxNumSentences; + + [MarshalAs(UnmanagedType.LPStr)] + public string RuleFars; + } + +} \ No newline at end of file diff --git a/scripts/dotnet/OfflineTtsGeneratedAudio.cs b/scripts/dotnet/OfflineTtsGeneratedAudio.cs new file mode 100644 index 00000000..89f77604 --- /dev/null +++ b/scripts/dotnet/OfflineTtsGeneratedAudio.cs @@ -0,0 +1,93 @@ +/// Copyright (c) 2024.5 by 东风破 + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + public class OfflineTtsGeneratedAudio + { + public OfflineTtsGeneratedAudio(IntPtr p) + { + _handle = new HandleRef(this, p); + } + + public bool SaveToWaveFile(String filename) + { + Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl)); + int status = SherpaOnnxWriteWave(impl.Samples, impl.NumSamples, impl.SampleRate, filename); + return status == 1; + } + + ~OfflineTtsGeneratedAudio() + { + Cleanup(); + } + + public void Dispose() + { + Cleanup(); + // Prevent the object from being placed on the + // finalization queue + System.GC.SuppressFinalize(this); + } + + private void Cleanup() + { + SherpaOnnxDestroyOfflineTtsGeneratedAudio(Handle); + + // Don't permit the handle to be used again. + _handle = new HandleRef(this, IntPtr.Zero); + } + + [StructLayout(LayoutKind.Sequential)] + struct Impl + { + public IntPtr Samples; + public int NumSamples; + public int SampleRate; + } + + private HandleRef _handle; + public IntPtr Handle => _handle.Handle; + + public int NumSamples + { + get + { + Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl)); + return impl.NumSamples; + } + } + + public int SampleRate + { + get + { + Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl)); + return impl.SampleRate; + } + } + + public float[] Samples + { + get + { + Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl)); + + float[] samples = new float[impl.NumSamples]; + Marshal.Copy(impl.Samples, samples, 0, impl.NumSamples); + return samples; + } + } + + [DllImport(Dll.Filename)] + private static extern void SherpaOnnxDestroyOfflineTtsGeneratedAudio(IntPtr handle); + + [DllImport(Dll.Filename)] + private static extern int SherpaOnnxWriteWave(IntPtr samples, int n, int sample_rate, [MarshalAs(UnmanagedType.LPStr)] string filename); + } +} \ No newline at end of file diff --git a/scripts/dotnet/OfflineTtsModelConfig.cs b/scripts/dotnet/OfflineTtsModelConfig.cs new file mode 100644 index 00000000..ce8739ec --- /dev/null +++ b/scripts/dotnet/OfflineTtsModelConfig.cs @@ -0,0 +1,29 @@ +/// Copyright (c) 2024.5 by 东风破 + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + + [StructLayout(LayoutKind.Sequential)] + public struct OfflineTtsModelConfig + { + public OfflineTtsModelConfig() + { + Vits = new OfflineTtsVitsModelConfig(); + NumThreads = 1; + Debug = 0; + Provider = "cpu"; + } + + public OfflineTtsVitsModelConfig Vits; + public int NumThreads; + public int Debug; + [MarshalAs(UnmanagedType.LPStr)] + public string Provider; + } +} \ No newline at end of file diff --git a/scripts/dotnet/OfflineTtsVitsModelConfig.cs b/scripts/dotnet/OfflineTtsVitsModelConfig.cs new file mode 100644 index 00000000..820964c8 --- /dev/null +++ b/scripts/dotnet/OfflineTtsVitsModelConfig.cs @@ -0,0 +1,46 @@ +/// Copyright (c) 2024.5 by 东风破 + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + [StructLayout(LayoutKind.Sequential)] + public struct OfflineTtsVitsModelConfig + { + public OfflineTtsVitsModelConfig() + { + Model = ""; + Lexicon = ""; + Tokens = ""; + DataDir = ""; + + NoiseScale = 0.667F; + NoiseScaleW = 0.8F; + LengthScale = 1.0F; + + DictDir = ""; + } + [MarshalAs(UnmanagedType.LPStr)] + public string Model; + + [MarshalAs(UnmanagedType.LPStr)] + public string Lexicon; + + [MarshalAs(UnmanagedType.LPStr)] + public string Tokens; + + [MarshalAs(UnmanagedType.LPStr)] + public string DataDir; + + public float NoiseScale; + public float NoiseScaleW; + public float LengthScale; + + [MarshalAs(UnmanagedType.LPStr)] + public string DictDir; + } +} \ No newline at end of file diff --git a/scripts/dotnet/OfflineWhisperModelConfig.cs b/scripts/dotnet/OfflineWhisperModelConfig.cs new file mode 100644 index 00000000..f66d2d32 --- /dev/null +++ b/scripts/dotnet/OfflineWhisperModelConfig.cs @@ -0,0 +1,37 @@ +/// Copyright (c) 2024.5 by 东风破 + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + [StructLayout(LayoutKind.Sequential)] + public struct OfflineWhisperModelConfig + { + public OfflineWhisperModelConfig() + { + Encoder = ""; + Decoder = ""; + Language = ""; + Task = "transcribe"; + TailPaddings = -1; + } + [MarshalAs(UnmanagedType.LPStr)] + public string Encoder; + + [MarshalAs(UnmanagedType.LPStr)] + public string Decoder; + + [MarshalAs(UnmanagedType.LPStr)] + public string Language; + + [MarshalAs(UnmanagedType.LPStr)] + public string Task; + + public int TailPaddings; + } + +} \ No newline at end of file diff --git a/scripts/dotnet/OnlineCtcFstDecoderConfig.cs b/scripts/dotnet/OnlineCtcFstDecoderConfig.cs new file mode 100644 index 00000000..e7ab8263 --- /dev/null +++ b/scripts/dotnet/OnlineCtcFstDecoderConfig.cs @@ -0,0 +1,28 @@ +/// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) +/// Copyright (c) 2023 by manyeyes +/// Copyright (c) 2024.5 by 东风破 + +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + [StructLayout(LayoutKind.Sequential)] + public struct OnlineCtcFstDecoderConfig + { + public OnlineCtcFstDecoderConfig() + { + Graph = ""; + MaxActive = 3000; + } + + [MarshalAs(UnmanagedType.LPStr)] + public string Graph; + + public int MaxActive; + } + +} \ No newline at end of file diff --git a/scripts/dotnet/OnlineModelConfig.cs b/scripts/dotnet/OnlineModelConfig.cs new file mode 100644 index 00000000..b9c07467 --- /dev/null +++ b/scripts/dotnet/OnlineModelConfig.cs @@ -0,0 +1,49 @@ +/// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) +/// Copyright (c) 2023 by manyeyes +/// Copyright (c) 2024.5 by 东风破 + +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + + [StructLayout(LayoutKind.Sequential)] + public struct OnlineModelConfig + { + public OnlineModelConfig() + { + Transducer = new OnlineTransducerModelConfig(); + Paraformer = new OnlineParaformerModelConfig(); + Zipformer2Ctc = new OnlineZipformer2CtcModelConfig(); + Tokens = ""; + NumThreads = 1; + Provider = "cpu"; + Debug = 0; + ModelType = ""; + } + + public OnlineTransducerModelConfig Transducer; + public OnlineParaformerModelConfig Paraformer; + public OnlineZipformer2CtcModelConfig Zipformer2Ctc; + + [MarshalAs(UnmanagedType.LPStr)] + public string Tokens; + + /// Number of threads used to run the neural network model + public int NumThreads; + + [MarshalAs(UnmanagedType.LPStr)] + public string Provider; + + /// true to print debug information of the model + public int Debug; + + [MarshalAs(UnmanagedType.LPStr)] + public string ModelType; + } + +} \ No newline at end of file diff --git a/scripts/dotnet/OnlineParaformerModelConfig.cs b/scripts/dotnet/OnlineParaformerModelConfig.cs new file mode 100644 index 00000000..5a24265f --- /dev/null +++ b/scripts/dotnet/OnlineParaformerModelConfig.cs @@ -0,0 +1,30 @@ +/// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) +/// Copyright (c) 2023 by manyeyes +/// Copyright (c) 2024.5 by 东风破 + +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + + [StructLayout(LayoutKind.Sequential)] + public struct OnlineParaformerModelConfig + { + public OnlineParaformerModelConfig() + { + Encoder = ""; + Decoder = ""; + } + + [MarshalAs(UnmanagedType.LPStr)] + public string Encoder; + + [MarshalAs(UnmanagedType.LPStr)] + public string Decoder; + } + +} \ No newline at end of file diff --git a/scripts/dotnet/OnlineRecognizer.cs b/scripts/dotnet/OnlineRecognizer.cs new file mode 100644 index 00000000..b881798e --- /dev/null +++ b/scripts/dotnet/OnlineRecognizer.cs @@ -0,0 +1,125 @@ +/// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) +/// Copyright (c) 2023 by manyeyes +/// Copyright (c) 2024.5 by 东风破 + +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + // please see + // https://www.mono-project.com/docs/advanced/pinvoke/#gc-safe-pinvoke-code + // https://www.mono-project.com/docs/advanced/pinvoke/#properly-disposing-of-resources + public class OnlineRecognizer : IDisposable + { + public OnlineRecognizer(OnlineRecognizerConfig config) + { + IntPtr h = CreateOnlineRecognizer(ref config); + _handle = new HandleRef(this, h); + } + + public OnlineStream CreateStream() + { + IntPtr p = CreateOnlineStream(_handle.Handle); + return new OnlineStream(p); + } + + /// Return true if the passed stream is ready for decoding. + public bool IsReady(OnlineStream stream) + { + return IsReady(_handle.Handle, stream.Handle) != 0; + } + + /// Return true if an endpoint is detected for this stream. + /// You probably need to invoke Reset(stream) when this method returns + /// true. + public bool IsEndpoint(OnlineStream stream) + { + return IsEndpoint(_handle.Handle, stream.Handle) != 0; + } + + /// You have to ensure that IsReady(stream) returns true before + /// you call this method + public void Decode(OnlineStream stream) + { + Decode(_handle.Handle, stream.Handle); + } + + // The caller should ensure all passed streams are ready for decoding. + public void Decode(IEnumerable streams) + { + IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray(); + Decode(_handle.Handle, ptrs, ptrs.Length); + } + + public OnlineRecognizerResult GetResult(OnlineStream stream) + { + IntPtr h = GetResult(_handle.Handle, stream.Handle); + OnlineRecognizerResult result = new OnlineRecognizerResult(h); + DestroyResult(h); + return result; + } + + /// When this method returns, IsEndpoint(stream) will return false. + public void Reset(OnlineStream stream) + { + Reset(_handle.Handle, stream.Handle); + } + + public void Dispose() + { + Cleanup(); + // Prevent the object from being placed on the + // finalization queue + System.GC.SuppressFinalize(this); + } + + ~OnlineRecognizer() + { + Cleanup(); + } + + private void Cleanup() + { + DestroyOnlineRecognizer(_handle.Handle); + + // Don't permit the handle to be used again. + _handle = new HandleRef(this, IntPtr.Zero); + } + + private HandleRef _handle; + + [DllImport(Dll.Filename)] + private static extern IntPtr CreateOnlineRecognizer(ref OnlineRecognizerConfig config); + + [DllImport(Dll.Filename)] + private static extern void DestroyOnlineRecognizer(IntPtr handle); + + [DllImport(Dll.Filename)] + private static extern IntPtr CreateOnlineStream(IntPtr handle); + + [DllImport(Dll.Filename, EntryPoint = "IsOnlineStreamReady")] + private static extern int IsReady(IntPtr handle, IntPtr stream); + + [DllImport(Dll.Filename, EntryPoint = "DecodeOnlineStream")] + private static extern void Decode(IntPtr handle, IntPtr stream); + + [DllImport(Dll.Filename, EntryPoint = "DecodeMultipleOnlineStreams")] + private static extern void Decode(IntPtr handle, IntPtr[] streams, int n); + + [DllImport(Dll.Filename, EntryPoint = "GetOnlineStreamResult")] + private static extern IntPtr GetResult(IntPtr handle, IntPtr stream); + + [DllImport(Dll.Filename, EntryPoint = "DestroyOnlineRecognizerResult")] + private static extern void DestroyResult(IntPtr result); + + [DllImport(Dll.Filename)] + private static extern void Reset(IntPtr handle, IntPtr stream); + + [DllImport(Dll.Filename)] + private static extern int IsEndpoint(IntPtr handle, IntPtr stream); + } +} \ No newline at end of file diff --git a/scripts/dotnet/OnlineRecognizerConfig.cs b/scripts/dotnet/OnlineRecognizerConfig.cs new file mode 100644 index 00000000..78afce4e --- /dev/null +++ b/scripts/dotnet/OnlineRecognizerConfig.cs @@ -0,0 +1,70 @@ +/// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) +/// Copyright (c) 2023 by manyeyes +/// Copyright (c) 2024.5 by 东风破 + +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + + [StructLayout(LayoutKind.Sequential)] + public struct OnlineRecognizerConfig + { + public OnlineRecognizerConfig() + { + FeatConfig = new FeatureConfig(); + ModelConfig = new OnlineModelConfig(); + DecodingMethod = "greedy_search"; + MaxActivePaths = 4; + EnableEndpoint = 0; + Rule1MinTrailingSilence = 1.2F; + Rule2MinTrailingSilence = 2.4F; + Rule3MinUtteranceLength = 20.0F; + HotwordsFile = ""; + HotwordsScore = 1.5F; + CtcFstDecoderConfig = new OnlineCtcFstDecoderConfig(); + } + public FeatureConfig FeatConfig; + public OnlineModelConfig ModelConfig; + + [MarshalAs(UnmanagedType.LPStr)] + public string DecodingMethod; + + /// Used only when decoding_method is modified_beam_search + /// Example value: 4 + public int MaxActivePaths; + + /// 0 to disable endpoint detection. + /// A non-zero value to enable endpoint detection. + public int EnableEndpoint; + + /// An endpoint is detected if trailing silence in seconds is larger than + /// this value even if nothing has been decoded. + /// Used only when enable_endpoint is not 0. + public float Rule1MinTrailingSilence; + + /// An endpoint is detected if trailing silence in seconds is larger than + /// this value after something that is not blank has been decoded. + /// Used only when enable_endpoint is not 0. + public float Rule2MinTrailingSilence; + + /// An endpoint is detected if the utterance in seconds is larger than + /// this value. + /// Used only when enable_endpoint is not 0. + public float Rule3MinUtteranceLength; + + /// Path to the hotwords. + [MarshalAs(UnmanagedType.LPStr)] + public string HotwordsFile; + + /// Bonus score for each token in hotwords. + public float HotwordsScore; + + public OnlineCtcFstDecoderConfig CtcFstDecoderConfig; + } + +} \ No newline at end of file diff --git a/scripts/dotnet/OnlineRecognizerResult.cs b/scripts/dotnet/OnlineRecognizerResult.cs new file mode 100644 index 00000000..0cab9db4 --- /dev/null +++ b/scripts/dotnet/OnlineRecognizerResult.cs @@ -0,0 +1,106 @@ +/// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) +/// Copyright (c) 2023 by manyeyes +/// Copyright (c) 2024.5 by 东风破 + +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + + public class OnlineRecognizerResult + { + public OnlineRecognizerResult(IntPtr handle) + { + Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl)); + // PtrToStringUTF8() requires .net standard 2.1 + // _text = Marshal.PtrToStringUTF8(impl.Text); + + int length = 0; + + unsafe + { + byte* buffer = (byte*)impl.Text; + while (*buffer != 0) + { + ++buffer; + length += 1; + } + } + + byte[] stringBuffer = new byte[length]; + Marshal.Copy(impl.Text, stringBuffer, 0, length); + _text = Encoding.UTF8.GetString(stringBuffer); + + _tokens = new String[impl.Count]; + + unsafe + { + byte* buf = (byte*)impl.Tokens; + for (int i = 0; i < impl.Count; i++) + { + length = 0; + byte* start = buf; + while (*buf != 0) + { + ++buf; + length += 1; + } + ++buf; + + stringBuffer = new byte[length]; + fixed (byte* pTarget = stringBuffer) + { + for (int k = 0; k < length; k++) + { + pTarget[k] = start[k]; + } + } + + _tokens[i] = Encoding.UTF8.GetString(stringBuffer); + } + } + + unsafe + { + float* t = (float*)impl.Timestamps; + if (t != null) + { + _timestamps = new float[impl.Count]; + fixed (float* pTarget = _timestamps) + { + for (int i = 0; i < impl.Count; i++) + { + pTarget[i] = t[i]; + } + } + } + else + { + _timestamps = Array.Empty(); + } + } + } + [StructLayout(LayoutKind.Sequential)] + struct Impl + { + public IntPtr Text; + public IntPtr Tokens; + public IntPtr TokensArr; + public IntPtr Timestamps; + public int Count; + } + + private String _text; + public String Text => _text; + + private String[] _tokens; + public String[] Tokens => _tokens; + + private float[] _timestamps; + public float[] Timestamps => _timestamps; + } +} \ No newline at end of file diff --git a/scripts/dotnet/OnlineStream.cs b/scripts/dotnet/OnlineStream.cs new file mode 100644 index 00000000..fd5c07f0 --- /dev/null +++ b/scripts/dotnet/OnlineStream.cs @@ -0,0 +1,65 @@ +/// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) +/// Copyright (c) 2023 by manyeyes +/// Copyright (c) 2024.5 by 东风破 + +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + + public class OnlineStream : IDisposable + { + public OnlineStream(IntPtr p) + { + _handle = new HandleRef(this, p); + } + + public void AcceptWaveform(int sampleRate, float[] samples) + { + AcceptWaveform(Handle, sampleRate, samples, samples.Length); + } + + public void InputFinished() + { + InputFinished(Handle); + } + + ~OnlineStream() + { + Cleanup(); + } + + public void Dispose() + { + Cleanup(); + // Prevent the object from being placed on the + // finalization queue + System.GC.SuppressFinalize(this); + } + + private void Cleanup() + { + DestroyOnlineStream(Handle); + + // Don't permit the handle to be used again. + _handle = new HandleRef(this, IntPtr.Zero); + } + + private HandleRef _handle; + public IntPtr Handle => _handle.Handle; + + [DllImport(Dll.Filename)] + private static extern void DestroyOnlineStream(IntPtr handle); + + [DllImport(Dll.Filename)] + private static extern void AcceptWaveform(IntPtr handle, int sampleRate, float[] samples, int n); + + [DllImport(Dll.Filename)] + private static extern void InputFinished(IntPtr handle); + } + +} \ No newline at end of file diff --git a/scripts/dotnet/OnlineTransducerModelConfig.cs b/scripts/dotnet/OnlineTransducerModelConfig.cs new file mode 100644 index 00000000..0a24dd15 --- /dev/null +++ b/scripts/dotnet/OnlineTransducerModelConfig.cs @@ -0,0 +1,34 @@ +/// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) +/// Copyright (c) 2023 by manyeyes +/// Copyright (c) 2024.5 by 东风破 + +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + + [StructLayout(LayoutKind.Sequential)] + public struct OnlineTransducerModelConfig + { + public OnlineTransducerModelConfig() + { + Encoder = ""; + Decoder = ""; + Joiner = ""; + } + + [MarshalAs(UnmanagedType.LPStr)] + public string Encoder; + + [MarshalAs(UnmanagedType.LPStr)] + public string Decoder; + + [MarshalAs(UnmanagedType.LPStr)] + public string Joiner; + } + +} \ No newline at end of file diff --git a/scripts/dotnet/OnlineZipformer2CtcModelConfig.cs b/scripts/dotnet/OnlineZipformer2CtcModelConfig.cs new file mode 100644 index 00000000..a3dad8cd --- /dev/null +++ b/scripts/dotnet/OnlineZipformer2CtcModelConfig.cs @@ -0,0 +1,24 @@ +/// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) +/// Copyright (c) 2023 by manyeyes +/// Copyright (c) 2024.5 by 东风破 + +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + [StructLayout(LayoutKind.Sequential)] + public struct OnlineZipformer2CtcModelConfig + { + public OnlineZipformer2CtcModelConfig() + { + Model = ""; + } + + [MarshalAs(UnmanagedType.LPStr)] + public string Model; + } +} \ No newline at end of file diff --git a/scripts/dotnet/SpeakerEmbeddingExtractor.cs b/scripts/dotnet/SpeakerEmbeddingExtractor.cs new file mode 100644 index 00000000..c0b8c72f --- /dev/null +++ b/scripts/dotnet/SpeakerEmbeddingExtractor.cs @@ -0,0 +1,96 @@ +/// Copyright (c) 2024.5 by 东风破 + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + public class SpeakerEmbeddingExtractor : IDisposable + { + public SpeakerEmbeddingExtractor(SpeakerEmbeddingExtractorConfig config) + { + IntPtr h = SherpaOnnxCreateSpeakerEmbeddingExtractor(ref config); + _handle = new HandleRef(this, h); + } + + public OnlineStream CreateStream() + { + IntPtr p = SherpaOnnxSpeakerEmbeddingExtractorCreateStream(_handle.Handle); + return new OnlineStream(p); + } + + public bool IsReady(OnlineStream stream) + { + return SherpaOnnxSpeakerEmbeddingExtractorIsReady(_handle.Handle, stream.Handle) != 0; + } + + public float[] Compute(OnlineStream stream) + { + IntPtr p = SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(_handle.Handle, stream.Handle); + + int dim = Dim; + float[] ans = new float[dim]; + Marshal.Copy(p, ans, 0, dim); + + SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(p); + + return ans; + } + + public int Dim + { + get + { + return SherpaOnnxSpeakerEmbeddingExtractorDim(_handle.Handle); + } + } + + public void Dispose() + { + Cleanup(); + // Prevent the object from being placed on the + // finalization queue + System.GC.SuppressFinalize(this); + } + + ~SpeakerEmbeddingExtractor() + { + Cleanup(); + } + + private void Cleanup() + { + SherpaOnnxDestroySpeakerEmbeddingExtractor(_handle.Handle); + + // Don't permit the handle to be used again. + _handle = new HandleRef(this, IntPtr.Zero); + } + + private HandleRef _handle; + + [DllImport(Dll.Filename)] + private static extern IntPtr SherpaOnnxCreateSpeakerEmbeddingExtractor(ref SpeakerEmbeddingExtractorConfig config); + + [DllImport(Dll.Filename)] + private static extern void SherpaOnnxDestroySpeakerEmbeddingExtractor(IntPtr handle); + + [DllImport(Dll.Filename)] + private static extern int SherpaOnnxSpeakerEmbeddingExtractorDim(IntPtr handle); + + [DllImport(Dll.Filename)] + private static extern IntPtr SherpaOnnxSpeakerEmbeddingExtractorCreateStream(IntPtr handle); + + [DllImport(Dll.Filename)] + private static extern int SherpaOnnxSpeakerEmbeddingExtractorIsReady(IntPtr handle, IntPtr stream); + + [DllImport(Dll.Filename)] + private static extern IntPtr SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(IntPtr handle, IntPtr stream); + + [DllImport(Dll.Filename)] + private static extern void SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(IntPtr p); + } + +} \ No newline at end of file diff --git a/scripts/dotnet/SpeakerEmbeddingExtractorConfig.cs b/scripts/dotnet/SpeakerEmbeddingExtractorConfig.cs new file mode 100644 index 00000000..6c58a7e6 --- /dev/null +++ b/scripts/dotnet/SpeakerEmbeddingExtractorConfig.cs @@ -0,0 +1,32 @@ +/// Copyright (c) 2024.5 by 东风破 + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + [StructLayout(LayoutKind.Sequential)] + public struct SpeakerEmbeddingExtractorConfig + { + public SpeakerEmbeddingExtractorConfig() + { + Model = ""; + NumThreads = 1; + Debug = 0; + Provider = "cpu"; + } + + [MarshalAs(UnmanagedType.LPStr)] + public string Model; + + public int NumThreads; + public int Debug; + + [MarshalAs(UnmanagedType.LPStr)] + public string Provider; + } + +} \ No newline at end of file diff --git a/scripts/dotnet/SpeakerEmbeddingManager.cs b/scripts/dotnet/SpeakerEmbeddingManager.cs new file mode 100644 index 00000000..54814c72 --- /dev/null +++ b/scripts/dotnet/SpeakerEmbeddingManager.cs @@ -0,0 +1,189 @@ +/// Copyright (c) 2024.5 by 东风破 + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + public class SpeakerEmbeddingManager : IDisposable + { + public SpeakerEmbeddingManager(int dim) + { + IntPtr h = SherpaOnnxCreateSpeakerEmbeddingManager(dim); + _handle = new HandleRef(this, h); + this._dim = dim; + } + + public bool Add(string name, float[] v) + { + return SherpaOnnxSpeakerEmbeddingManagerAdd(_handle.Handle, name, v) == 1; + } + + public bool Add(string name, ICollection v_list) + { + int n = v_list.Count; + float[] v = new float[n * _dim]; + int i = 0; + foreach (var item in v_list) + { + item.CopyTo(v, i); + i += _dim; + } + + return SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(_handle.Handle, name, v, n) == 1; + } + + public bool Remove(string name) + { + return SherpaOnnxSpeakerEmbeddingManagerRemove(_handle.Handle, name) == 1; + } + + public string Search(float[] v, float threshold) + { + IntPtr p = SherpaOnnxSpeakerEmbeddingManagerSearch(_handle.Handle, v, threshold); + + string s = ""; + int length = 0; + + unsafe + { + byte* b = (byte*)p; + if (b != null) + { + while (*b != 0) + { + ++b; + length += 1; + } + } + } + + if (length > 0) + { + byte[] stringBuffer = new byte[length]; + Marshal.Copy(p, stringBuffer, 0, length); + s = Encoding.UTF8.GetString(stringBuffer); + } + + SherpaOnnxSpeakerEmbeddingManagerFreeSearch(p); + + return s; + } + + public bool Verify(string name, float[] v, float threshold) + { + return SherpaOnnxSpeakerEmbeddingManagerVerify(_handle.Handle, name, v, threshold) == 1; + } + + public bool Contains(string name) + { + return SherpaOnnxSpeakerEmbeddingManagerContains(_handle.Handle, name) == 1; + } + + public string[] GetAllSpeakers() + { + if (NumSpeakers == 0) + { + return new string[] { }; + } + + IntPtr names = SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(_handle.Handle); + + string[] ans = new string[NumSpeakers]; + + unsafe + { + byte** p = (byte**)names; + for (int i = 0; i != NumSpeakers; i++) + { + int length = 0; + byte* s = p[i]; + while (*s != 0) + { + ++s; + length += 1; + } + byte[] stringBuffer = new byte[length]; + Marshal.Copy((IntPtr)p[i], stringBuffer, 0, length); + ans[i] = Encoding.UTF8.GetString(stringBuffer); + } + } + + SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(names); + + return ans; + } + + public void Dispose() + { + Cleanup(); + // Prevent the object from being placed on the + // finalization queue + System.GC.SuppressFinalize(this); + } + + ~SpeakerEmbeddingManager() + { + Cleanup(); + } + + private void Cleanup() + { + SherpaOnnxDestroySpeakerEmbeddingManager(_handle.Handle); + + // Don't permit the handle to be used again. + _handle = new HandleRef(this, IntPtr.Zero); + } + + public int NumSpeakers + { + get + { + return SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(_handle.Handle); + } + } + + private HandleRef _handle; + private int _dim; + + + [DllImport(Dll.Filename)] + private static extern IntPtr SherpaOnnxCreateSpeakerEmbeddingManager(int dim); + + [DllImport(Dll.Filename)] + private static extern void SherpaOnnxDestroySpeakerEmbeddingManager(IntPtr handle); + + [DllImport(Dll.Filename)] + private static extern int SherpaOnnxSpeakerEmbeddingManagerAdd(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string name, float[] v); + + [DllImport(Dll.Filename)] + private static extern int SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string name, float[] v, int n); + + [DllImport(Dll.Filename)] + private static extern int SherpaOnnxSpeakerEmbeddingManagerRemove(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string name); + + [DllImport(Dll.Filename)] + private static extern IntPtr SherpaOnnxSpeakerEmbeddingManagerSearch(IntPtr handle, float[] v, float threshold); + + [DllImport(Dll.Filename)] + private static extern void SherpaOnnxSpeakerEmbeddingManagerFreeSearch(IntPtr p); + + [DllImport(Dll.Filename)] + private static extern int SherpaOnnxSpeakerEmbeddingManagerVerify(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string name, float[] v, float threshold); + + [DllImport(Dll.Filename)] + private static extern int SherpaOnnxSpeakerEmbeddingManagerContains(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string name); + + [DllImport(Dll.Filename)] + private static extern int SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(IntPtr handle); + + [DllImport(Dll.Filename)] + private static extern IntPtr SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(IntPtr handle); + + [DllImport(Dll.Filename)] + private static extern void SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(IntPtr names); + } +} \ No newline at end of file diff --git a/scripts/dotnet/SpokenLanguageIdentification.cs b/scripts/dotnet/SpokenLanguageIdentification.cs new file mode 100644 index 00000000..a125d0c5 --- /dev/null +++ b/scripts/dotnet/SpokenLanguageIdentification.cs @@ -0,0 +1,71 @@ +/// Copyright (c) 2024.5 by 东风破 + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + public class SpokenLanguageIdentification : IDisposable +{ + public SpokenLanguageIdentification(SpokenLanguageIdentificationConfig config) + { + IntPtr h = SherpaOnnxCreateSpokenLanguageIdentification(ref config); + _handle = new HandleRef(this, h); + } + + public OfflineStream CreateStream() + { + IntPtr p = SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(_handle.Handle); + return new OfflineStream(p); + } + + public SpokenLanguageIdentificationResult Compute(OfflineStream stream) + { + IntPtr h = SherpaOnnxSpokenLanguageIdentificationCompute(_handle.Handle, stream.Handle); + SpokenLanguageIdentificationResult result = new SpokenLanguageIdentificationResult(h); + SherpaOnnxDestroySpokenLanguageIdentificationResult(h); + return result; + } + + public void Dispose() + { + Cleanup(); + // Prevent the object from being placed on the + // finalization queue + System.GC.SuppressFinalize(this); + } + + ~SpokenLanguageIdentification() + { + Cleanup(); + } + + private void Cleanup() + { + SherpaOnnxDestroySpokenLanguageIdentification(_handle.Handle); + + // Don't permit the handle to be used again. + _handle = new HandleRef(this, IntPtr.Zero); + } + + private HandleRef _handle; + + [DllImport(Dll.Filename)] + private static extern IntPtr SherpaOnnxCreateSpokenLanguageIdentification(ref SpokenLanguageIdentificationConfig config); + + [DllImport(Dll.Filename)] + private static extern void SherpaOnnxDestroySpokenLanguageIdentification(IntPtr handle); + + [DllImport(Dll.Filename)] + private static extern IntPtr SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(IntPtr handle); + + [DllImport(Dll.Filename)] + private static extern IntPtr SherpaOnnxSpokenLanguageIdentificationCompute(IntPtr handle, IntPtr stream); + + [DllImport(Dll.Filename)] + private static extern void SherpaOnnxDestroySpokenLanguageIdentificationResult(IntPtr handle); +} +} \ No newline at end of file diff --git a/scripts/dotnet/SpokenLanguageIdentificationConfig.cs b/scripts/dotnet/SpokenLanguageIdentificationConfig.cs new file mode 100644 index 00000000..517a7fb2 --- /dev/null +++ b/scripts/dotnet/SpokenLanguageIdentificationConfig.cs @@ -0,0 +1,29 @@ +/// Copyright (c) 2024.5 by 东风破 + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + public struct SpokenLanguageIdentificationConfig + { + public SpokenLanguageIdentificationConfig() + { + Whisper = new SpokenLanguageIdentificationWhisperConfig(); + NumThreads = 1; + Debug = 0; + Provider = "cpu"; + } + public SpokenLanguageIdentificationWhisperConfig Whisper; + + public int NumThreads; + public int Debug; + + [MarshalAs(UnmanagedType.LPStr)] + public string Provider; + } + +} \ No newline at end of file diff --git a/scripts/dotnet/SpokenLanguageIdentificationResult.cs b/scripts/dotnet/SpokenLanguageIdentificationResult.cs new file mode 100644 index 00000000..f62d1735 --- /dev/null +++ b/scripts/dotnet/SpokenLanguageIdentificationResult.cs @@ -0,0 +1,46 @@ +/// Copyright (c) 2024.5 by 东风破 + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + public class SpokenLanguageIdentificationResult + { + public SpokenLanguageIdentificationResult(IntPtr handle) + { + Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl)); + + // PtrToStringUTF8() requires .net standard 2.1 + // _text = Marshal.PtrToStringUTF8(impl.Text); + + int length = 0; + + unsafe + { + byte* buffer = (byte*)impl.Lang; + while (*buffer != 0) + { + ++buffer; + length += 1; + } + } + + byte[] stringBuffer = new byte[length]; + Marshal.Copy(impl.Lang, stringBuffer, 0, length); + _lang = Encoding.UTF8.GetString(stringBuffer); + } + + [StructLayout(LayoutKind.Sequential)] + struct Impl + { + public IntPtr Lang; + } + + private String _lang; + public String Lang => _lang; + } +} \ No newline at end of file diff --git a/scripts/dotnet/SpokenLanguageIdentificationWhisperConfig.cs b/scripts/dotnet/SpokenLanguageIdentificationWhisperConfig.cs new file mode 100644 index 00000000..07866d08 --- /dev/null +++ b/scripts/dotnet/SpokenLanguageIdentificationWhisperConfig.cs @@ -0,0 +1,30 @@ +/// Copyright (c) 2024.5 by 东风破 + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + [StructLayout(LayoutKind.Sequential)] + public struct SpokenLanguageIdentificationWhisperConfig + { + public SpokenLanguageIdentificationWhisperConfig() + { + Encoder = ""; + Decoder = ""; + TailPaddings = -1; + } + + [MarshalAs(UnmanagedType.LPStr)] + public string Encoder; + + [MarshalAs(UnmanagedType.LPStr)] + public string Decoder; + + public int TailPaddings; + } + +} \ No newline at end of file diff --git a/scripts/dotnet/offline.cs b/scripts/dotnet/offline.cs deleted file mode 100644 index 59f1d663..00000000 --- a/scripts/dotnet/offline.cs +++ /dev/null @@ -1,991 +0,0 @@ -/// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) -/// Copyright (c) 2023 by manyeyes - -using System.Linq; -using System.Collections.Generic; -using System.Runtime.InteropServices; -using System.Text; -using System; - -namespace SherpaOnnx -{ - - [StructLayout(LayoutKind.Sequential)] - public struct OfflineTtsVitsModelConfig - { - public OfflineTtsVitsModelConfig() - { - Model = ""; - Lexicon = ""; - Tokens = ""; - DataDir = ""; - - NoiseScale = 0.667F; - NoiseScaleW = 0.8F; - LengthScale = 1.0F; - - DictDir = ""; - } - [MarshalAs(UnmanagedType.LPStr)] - public string Model; - - [MarshalAs(UnmanagedType.LPStr)] - public string Lexicon; - - [MarshalAs(UnmanagedType.LPStr)] - public string Tokens; - - [MarshalAs(UnmanagedType.LPStr)] - public string DataDir; - - public float NoiseScale; - public float NoiseScaleW; - public float LengthScale; - - [MarshalAs(UnmanagedType.LPStr)] - public string DictDir; - } - - [StructLayout(LayoutKind.Sequential)] - public struct OfflineTtsModelConfig - { - public OfflineTtsModelConfig() - { - Vits = new OfflineTtsVitsModelConfig(); - NumThreads = 1; - Debug = 0; - Provider = "cpu"; - } - - public OfflineTtsVitsModelConfig Vits; - public int NumThreads; - public int Debug; - [MarshalAs(UnmanagedType.LPStr)] - public string Provider; - } - - [StructLayout(LayoutKind.Sequential)] - public struct OfflineTtsConfig - { - public OfflineTtsConfig() - { - Model = new OfflineTtsModelConfig(); - RuleFsts = ""; - MaxNumSentences = 1; - RuleFars = ""; - } - public OfflineTtsModelConfig Model; - - [MarshalAs(UnmanagedType.LPStr)] - public string RuleFsts; - - public int MaxNumSentences; - - [MarshalAs(UnmanagedType.LPStr)] - public string RuleFars; - } - - public class OfflineTtsGeneratedAudio - { - public OfflineTtsGeneratedAudio(IntPtr p) - { - _handle = new HandleRef(this, p); - } - - public bool SaveToWaveFile(String filename) - { - Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl)); - int status = SherpaOnnxWriteWave(impl.Samples, impl.NumSamples, impl.SampleRate, filename); - return status == 1; - } - - ~OfflineTtsGeneratedAudio() - { - Cleanup(); - } - - public void Dispose() - { - Cleanup(); - // Prevent the object from being placed on the - // finalization queue - System.GC.SuppressFinalize(this); - } - - private void Cleanup() - { - SherpaOnnxDestroyOfflineTtsGeneratedAudio(Handle); - - // Don't permit the handle to be used again. - _handle = new HandleRef(this, IntPtr.Zero); - } - - [StructLayout(LayoutKind.Sequential)] - struct Impl - { - public IntPtr Samples; - public int NumSamples; - public int SampleRate; - } - - private HandleRef _handle; - public IntPtr Handle => _handle.Handle; - - public int NumSamples - { - get - { - Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl)); - return impl.NumSamples; - } - } - - public int SampleRate - { - get - { - Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl)); - return impl.SampleRate; - } - } - - public float[] Samples - { - get - { - Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl)); - - float[] samples = new float[impl.NumSamples]; - Marshal.Copy(impl.Samples, samples, 0, impl.NumSamples); - return samples; - } - } - - [DllImport(Dll.Filename)] - private static extern void SherpaOnnxDestroyOfflineTtsGeneratedAudio(IntPtr handle); - - [DllImport(Dll.Filename)] - private static extern int SherpaOnnxWriteWave(IntPtr samples, int n, int sample_rate, [MarshalAs(UnmanagedType.LPStr)] string filename); - } - - // IntPtr is actuallly a `const float*` from C++ - public delegate void OfflineTtsCallback(IntPtr samples, int n); - - public class OfflineTts : IDisposable - { - public OfflineTts(OfflineTtsConfig config) - { - IntPtr h = SherpaOnnxCreateOfflineTts(ref config); - _handle = new HandleRef(this, h); - } - - public OfflineTtsGeneratedAudio Generate(String text, float speed, int speakerId) - { - IntPtr p = SherpaOnnxOfflineTtsGenerate(_handle.Handle, text, speakerId, speed); - return new OfflineTtsGeneratedAudio(p); - } - - public OfflineTtsGeneratedAudio GenerateWithCallback(String text, float speed, int speakerId, OfflineTtsCallback callback) - { - IntPtr p = SherpaOnnxOfflineTtsGenerateWithCallback(_handle.Handle, text, speakerId, speed, callback); - return new OfflineTtsGeneratedAudio(p); - } - - public void Dispose() - { - Cleanup(); - // Prevent the object from being placed on the - // finalization queue - System.GC.SuppressFinalize(this); - } - - ~OfflineTts() - { - Cleanup(); - } - - private void Cleanup() - { - SherpaOnnxDestroyOfflineTts(_handle.Handle); - - // Don't permit the handle to be used again. - _handle = new HandleRef(this, IntPtr.Zero); - } - - private HandleRef _handle; - - public int SampleRate - { - get - { - return SherpaOnnxOfflineTtsSampleRate(_handle.Handle); - } - } - - public int NumSpeakers - { - get - { - return SherpaOnnxOfflineTtsNumSpeakers(_handle.Handle); - } - } - - [DllImport(Dll.Filename)] - private static extern IntPtr SherpaOnnxCreateOfflineTts(ref OfflineTtsConfig config); - - [DllImport(Dll.Filename)] - private static extern void SherpaOnnxDestroyOfflineTts(IntPtr handle); - - [DllImport(Dll.Filename)] - private static extern int SherpaOnnxOfflineTtsSampleRate(IntPtr handle); - - [DllImport(Dll.Filename)] - private static extern int SherpaOnnxOfflineTtsNumSpeakers(IntPtr handle); - - [DllImport(Dll.Filename)] - private static extern IntPtr SherpaOnnxOfflineTtsGenerate(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string text, int sid, float speed); - - [DllImport(Dll.Filename, CallingConvention = CallingConvention.Cdecl)] - private static extern IntPtr SherpaOnnxOfflineTtsGenerateWithCallback(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string text, int sid, float speed, OfflineTtsCallback callback); - } - - - - [StructLayout(LayoutKind.Sequential)] - public struct OfflineTransducerModelConfig - { - public OfflineTransducerModelConfig() - { - Encoder = ""; - Decoder = ""; - Joiner = ""; - } - [MarshalAs(UnmanagedType.LPStr)] - public string Encoder; - - [MarshalAs(UnmanagedType.LPStr)] - public string Decoder; - - [MarshalAs(UnmanagedType.LPStr)] - public string Joiner; - } - - [StructLayout(LayoutKind.Sequential)] - public struct OfflineParaformerModelConfig - { - public OfflineParaformerModelConfig() - { - Model = ""; - } - [MarshalAs(UnmanagedType.LPStr)] - public string Model; - } - - [StructLayout(LayoutKind.Sequential)] - public struct OfflineNemoEncDecCtcModelConfig - { - public OfflineNemoEncDecCtcModelConfig() - { - Model = ""; - } - [MarshalAs(UnmanagedType.LPStr)] - public string Model; - } - - [StructLayout(LayoutKind.Sequential)] - public struct OfflineWhisperModelConfig - { - public OfflineWhisperModelConfig() - { - Encoder = ""; - Decoder = ""; - Language = ""; - Task = "transcribe"; - TailPaddings = -1; - } - [MarshalAs(UnmanagedType.LPStr)] - public string Encoder; - - [MarshalAs(UnmanagedType.LPStr)] - public string Decoder; - - [MarshalAs(UnmanagedType.LPStr)] - public string Language; - - [MarshalAs(UnmanagedType.LPStr)] - public string Task; - - public int TailPaddings; - } - - [StructLayout(LayoutKind.Sequential)] - public struct OfflineTdnnModelConfig - { - public OfflineTdnnModelConfig() - { - Model = ""; - } - [MarshalAs(UnmanagedType.LPStr)] - public string Model; - } - - [StructLayout(LayoutKind.Sequential)] - public struct OfflineLMConfig - { - public OfflineLMConfig() - { - Model = ""; - Scale = 0.5F; - } - [MarshalAs(UnmanagedType.LPStr)] - public string Model; - - public float Scale; - } - - [StructLayout(LayoutKind.Sequential)] - public struct OfflineModelConfig - { - public OfflineModelConfig() - { - Transducer = new OfflineTransducerModelConfig(); - Paraformer = new OfflineParaformerModelConfig(); - NeMoCtc = new OfflineNemoEncDecCtcModelConfig(); - Whisper = new OfflineWhisperModelConfig(); - Tdnn = new OfflineTdnnModelConfig(); - Tokens = ""; - NumThreads = 1; - Debug = 0; - Provider = "cpu"; - ModelType = ""; - } - public OfflineTransducerModelConfig Transducer; - public OfflineParaformerModelConfig Paraformer; - public OfflineNemoEncDecCtcModelConfig NeMoCtc; - public OfflineWhisperModelConfig Whisper; - public OfflineTdnnModelConfig Tdnn; - - [MarshalAs(UnmanagedType.LPStr)] - public string Tokens; - - public int NumThreads; - - public int Debug; - - [MarshalAs(UnmanagedType.LPStr)] - public string Provider; - - [MarshalAs(UnmanagedType.LPStr)] - public string ModelType; - } - - [StructLayout(LayoutKind.Sequential)] - public struct OfflineRecognizerConfig - { - public OfflineRecognizerConfig() - { - FeatConfig = new FeatureConfig(); - ModelConfig = new OfflineModelConfig(); - LmConfig = new OfflineLMConfig(); - - DecodingMethod = "greedy_search"; - MaxActivePaths = 4; - HotwordsFile = ""; - HotwordsScore = 1.5F; - - } - public FeatureConfig FeatConfig; - public OfflineModelConfig ModelConfig; - public OfflineLMConfig LmConfig; - - [MarshalAs(UnmanagedType.LPStr)] - public string DecodingMethod; - - public int MaxActivePaths; - - [MarshalAs(UnmanagedType.LPStr)] - public string HotwordsFile; - - public float HotwordsScore; - } - - public class OfflineRecognizerResult - { - public OfflineRecognizerResult(IntPtr handle) - { - Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl)); - - // PtrToStringUTF8() requires .net standard 2.1 - // _text = Marshal.PtrToStringUTF8(impl.Text); - - int length = 0; - - unsafe - { - byte* buffer = (byte*)impl.Text; - while (*buffer != 0) - { - ++buffer; - length += 1; - } - } - - byte[] stringBuffer = new byte[length]; - Marshal.Copy(impl.Text, stringBuffer, 0, length); - _text = Encoding.UTF8.GetString(stringBuffer); - } - - [StructLayout(LayoutKind.Sequential)] - struct Impl - { - public IntPtr Text; - } - - private String _text; - public String Text => _text; - } - - public class OfflineStream : IDisposable - { - public OfflineStream(IntPtr p) - { - _handle = new HandleRef(this, p); - } - - public void AcceptWaveform(int sampleRate, float[] samples) - { - AcceptWaveform(Handle, sampleRate, samples, samples.Length); - } - - public OfflineRecognizerResult Result - { - get - { - IntPtr h = GetResult(_handle.Handle); - OfflineRecognizerResult result = new OfflineRecognizerResult(h); - DestroyResult(h); - return result; - } - } - - ~OfflineStream() - { - Cleanup(); - } - - public void Dispose() - { - Cleanup(); - // Prevent the object from being placed on the - // finalization queue - System.GC.SuppressFinalize(this); - } - - private void Cleanup() - { - DestroyOfflineStream(Handle); - - // Don't permit the handle to be used again. - _handle = new HandleRef(this, IntPtr.Zero); - } - - private HandleRef _handle; - public IntPtr Handle => _handle.Handle; - - [DllImport(Dll.Filename)] - private static extern void DestroyOfflineStream(IntPtr handle); - - [DllImport(Dll.Filename, EntryPoint = "AcceptWaveformOffline")] - private static extern void AcceptWaveform(IntPtr handle, int sampleRate, float[] samples, int n); - - [DllImport(Dll.Filename, EntryPoint = "GetOfflineStreamResult")] - private static extern IntPtr GetResult(IntPtr handle); - - [DllImport(Dll.Filename, EntryPoint = "DestroyOfflineRecognizerResult")] - private static extern void DestroyResult(IntPtr handle); - } - - public class OfflineRecognizer : IDisposable - { - public OfflineRecognizer(OfflineRecognizerConfig config) - { - IntPtr h = CreateOfflineRecognizer(ref config); - _handle = new HandleRef(this, h); - } - - public OfflineStream CreateStream() - { - IntPtr p = CreateOfflineStream(_handle.Handle); - return new OfflineStream(p); - } - - public void Decode(OfflineStream stream) - { - Decode(_handle.Handle, stream.Handle); - } - - // The caller should ensure all passed streams are ready for decoding. - public void Decode(IEnumerable streams) - { - IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray(); - Decode(_handle.Handle, ptrs, ptrs.Length); - } - - public void Dispose() - { - Cleanup(); - // Prevent the object from being placed on the - // finalization queue - System.GC.SuppressFinalize(this); - } - - ~OfflineRecognizer() - { - Cleanup(); - } - - private void Cleanup() - { - DestroyOfflineRecognizer(_handle.Handle); - - // Don't permit the handle to be used again. - _handle = new HandleRef(this, IntPtr.Zero); - } - - private HandleRef _handle; - - [DllImport(Dll.Filename)] - private static extern IntPtr CreateOfflineRecognizer(ref OfflineRecognizerConfig config); - - [DllImport(Dll.Filename)] - private static extern void DestroyOfflineRecognizer(IntPtr handle); - - [DllImport(Dll.Filename)] - private static extern IntPtr CreateOfflineStream(IntPtr handle); - - [DllImport(Dll.Filename, EntryPoint = "DecodeOfflineStream")] - private static extern void Decode(IntPtr handle, IntPtr stream); - - [DllImport(Dll.Filename, EntryPoint = "DecodeMultipleOfflineStreams")] - private static extern void Decode(IntPtr handle, IntPtr[] streams, int n); - } - - [StructLayout(LayoutKind.Sequential)] - public struct SpeakerEmbeddingExtractorConfig - { - public SpeakerEmbeddingExtractorConfig() - { - Model = ""; - NumThreads = 1; - Debug = 0; - Provider = "cpu"; - } - - [MarshalAs(UnmanagedType.LPStr)] - public string Model; - - public int NumThreads; - public int Debug; - - [MarshalAs(UnmanagedType.LPStr)] - public string Provider; - } - - public class SpeakerEmbeddingExtractor : IDisposable - { - public SpeakerEmbeddingExtractor(SpeakerEmbeddingExtractorConfig config) - { - IntPtr h = SherpaOnnxCreateSpeakerEmbeddingExtractor(ref config); - _handle = new HandleRef(this, h); - } - - public OnlineStream CreateStream() - { - IntPtr p = SherpaOnnxSpeakerEmbeddingExtractorCreateStream(_handle.Handle); - return new OnlineStream(p); - } - - public bool IsReady(OnlineStream stream) - { - return SherpaOnnxSpeakerEmbeddingExtractorIsReady(_handle.Handle, stream.Handle) != 0; - } - - public float[] Compute(OnlineStream stream) - { - IntPtr p = SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(_handle.Handle, stream.Handle); - - int dim = Dim; - float[] ans = new float[dim]; - Marshal.Copy(p, ans, 0, dim); - - SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(p); - - return ans; - } - - public int Dim - { - get - { - return SherpaOnnxSpeakerEmbeddingExtractorDim(_handle.Handle); - } - } - - public void Dispose() - { - Cleanup(); - // Prevent the object from being placed on the - // finalization queue - System.GC.SuppressFinalize(this); - } - - ~SpeakerEmbeddingExtractor() - { - Cleanup(); - } - - private void Cleanup() - { - SherpaOnnxDestroySpeakerEmbeddingExtractor(_handle.Handle); - - // Don't permit the handle to be used again. - _handle = new HandleRef(this, IntPtr.Zero); - } - - private HandleRef _handle; - - [DllImport(Dll.Filename)] - private static extern IntPtr SherpaOnnxCreateSpeakerEmbeddingExtractor(ref SpeakerEmbeddingExtractorConfig config); - - [DllImport(Dll.Filename)] - private static extern void SherpaOnnxDestroySpeakerEmbeddingExtractor(IntPtr handle); - - [DllImport(Dll.Filename)] - private static extern int SherpaOnnxSpeakerEmbeddingExtractorDim(IntPtr handle); - - [DllImport(Dll.Filename)] - private static extern IntPtr SherpaOnnxSpeakerEmbeddingExtractorCreateStream(IntPtr handle); - - [DllImport(Dll.Filename)] - private static extern int SherpaOnnxSpeakerEmbeddingExtractorIsReady(IntPtr handle, IntPtr stream); - - [DllImport(Dll.Filename)] - private static extern IntPtr SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(IntPtr handle, IntPtr stream); - - [DllImport(Dll.Filename)] - private static extern void SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(IntPtr p); - } - - [StructLayout(LayoutKind.Sequential)] - public struct SpokenLanguageIdentificationWhisperConfig - { - public SpokenLanguageIdentificationWhisperConfig() - { - Encoder = ""; - Decoder = ""; - TailPaddings = -1; - } - - [MarshalAs(UnmanagedType.LPStr)] - public string Encoder; - - [MarshalAs(UnmanagedType.LPStr)] - public string Decoder; - - public int TailPaddings; - } - - public struct SpokenLanguageIdentificationConfig - { - public SpokenLanguageIdentificationConfig() - { - Whisper = new SpokenLanguageIdentificationWhisperConfig(); - NumThreads = 1; - Debug = 0; - Provider = "cpu"; - } - public SpokenLanguageIdentificationWhisperConfig Whisper; - - public int NumThreads; - public int Debug; - - [MarshalAs(UnmanagedType.LPStr)] - public string Provider; - } - - public class SpeakerEmbeddingManager : IDisposable - { - public SpeakerEmbeddingManager(int dim) - { - IntPtr h = SherpaOnnxCreateSpeakerEmbeddingManager(dim); - _handle = new HandleRef(this, h); - this._dim = dim; - } - - public bool Add(string name, float[] v) - { - return SherpaOnnxSpeakerEmbeddingManagerAdd(_handle.Handle, name, v) == 1; - } - - public bool Add(string name, ICollection v_list) - { - int n = v_list.Count; - float[] v = new float[n * _dim]; - int i = 0; - foreach (var item in v_list) - { - item.CopyTo(v, i); - i += _dim; - } - - return SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(_handle.Handle, name, v, n) == 1; - } - - public bool Remove(string name) - { - return SherpaOnnxSpeakerEmbeddingManagerRemove(_handle.Handle, name) == 1; - } - - public string Search(float[] v, float threshold) - { - IntPtr p = SherpaOnnxSpeakerEmbeddingManagerSearch(_handle.Handle, v, threshold); - - string s = ""; - int length = 0; - - unsafe - { - byte* b = (byte*)p; - if (b != null) - { - while (*b != 0) - { - ++b; - length += 1; - } - } - } - - if (length > 0) - { - byte[] stringBuffer = new byte[length]; - Marshal.Copy(p, stringBuffer, 0, length); - s = Encoding.UTF8.GetString(stringBuffer); - } - - SherpaOnnxSpeakerEmbeddingManagerFreeSearch(p); - - return s; - } - - public bool Verify(string name, float[] v, float threshold) - { - return SherpaOnnxSpeakerEmbeddingManagerVerify(_handle.Handle, name, v, threshold) == 1; - } - - public bool Contains(string name) - { - return SherpaOnnxSpeakerEmbeddingManagerContains(_handle.Handle, name) == 1; - } - - public string[] GetAllSpeakers() - { - if (NumSpeakers == 0) - { - return new string[] { }; - } - - IntPtr names = SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(_handle.Handle); - - string[] ans = new string[NumSpeakers]; - - unsafe - { - byte** p = (byte**)names; - for (int i = 0; i != NumSpeakers; i++) - { - int length = 0; - byte* s = p[i]; - while (*s != 0) - { - ++s; - length += 1; - } - byte[] stringBuffer = new byte[length]; - Marshal.Copy((IntPtr)p[i], stringBuffer, 0, length); - ans[i] = Encoding.UTF8.GetString(stringBuffer); - } - } - - SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(names); - - return ans; - } - - public void Dispose() - { - Cleanup(); - // Prevent the object from being placed on the - // finalization queue - System.GC.SuppressFinalize(this); - } - - ~SpeakerEmbeddingManager() - { - Cleanup(); - } - - private void Cleanup() - { - SherpaOnnxDestroySpeakerEmbeddingManager(_handle.Handle); - - // Don't permit the handle to be used again. - _handle = new HandleRef(this, IntPtr.Zero); - } - - public int NumSpeakers - { - get - { - return SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(_handle.Handle); - } - } - - private HandleRef _handle; - private int _dim; - - - [DllImport(Dll.Filename)] - private static extern IntPtr SherpaOnnxCreateSpeakerEmbeddingManager(int dim); - - [DllImport(Dll.Filename)] - private static extern void SherpaOnnxDestroySpeakerEmbeddingManager(IntPtr handle); - - [DllImport(Dll.Filename)] - private static extern int SherpaOnnxSpeakerEmbeddingManagerAdd(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string name, float[] v); - - [DllImport(Dll.Filename)] - private static extern int SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string name, float[] v, int n); - - [DllImport(Dll.Filename)] - private static extern int SherpaOnnxSpeakerEmbeddingManagerRemove(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string name); - - [DllImport(Dll.Filename)] - private static extern IntPtr SherpaOnnxSpeakerEmbeddingManagerSearch(IntPtr handle, float[] v, float threshold); - - [DllImport(Dll.Filename)] - private static extern void SherpaOnnxSpeakerEmbeddingManagerFreeSearch(IntPtr p); - - [DllImport(Dll.Filename)] - private static extern int SherpaOnnxSpeakerEmbeddingManagerVerify(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string name, float[] v, float threshold); - - [DllImport(Dll.Filename)] - private static extern int SherpaOnnxSpeakerEmbeddingManagerContains(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string name); - - [DllImport(Dll.Filename)] - private static extern int SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(IntPtr handle); - - [DllImport(Dll.Filename)] - private static extern IntPtr SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(IntPtr handle); - - [DllImport(Dll.Filename)] - private static extern void SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(IntPtr names); - } - - public class SpokenLanguageIdentificationResult - { - public SpokenLanguageIdentificationResult(IntPtr handle) - { - Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl)); - - // PtrToStringUTF8() requires .net standard 2.1 - // _text = Marshal.PtrToStringUTF8(impl.Text); - - int length = 0; - - unsafe - { - byte* buffer = (byte*)impl.Lang; - while (*buffer != 0) - { - ++buffer; - length += 1; - } - } - - byte[] stringBuffer = new byte[length]; - Marshal.Copy(impl.Lang, stringBuffer, 0, length); - _lang = Encoding.UTF8.GetString(stringBuffer); - } - - [StructLayout(LayoutKind.Sequential)] - struct Impl - { - public IntPtr Lang; - } - - private String _lang; - public String Lang => _lang; - } - - public class SpokenLanguageIdentification : IDisposable - { - public SpokenLanguageIdentification(SpokenLanguageIdentificationConfig config) - { - IntPtr h = SherpaOnnxCreateSpokenLanguageIdentification(ref config); - _handle = new HandleRef(this, h); - } - - public OfflineStream CreateStream() - { - IntPtr p = SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(_handle.Handle); - return new OfflineStream(p); - } - - public SpokenLanguageIdentificationResult Compute(OfflineStream stream) - { - IntPtr h = SherpaOnnxSpokenLanguageIdentificationCompute(_handle.Handle, stream.Handle); - SpokenLanguageIdentificationResult result = new SpokenLanguageIdentificationResult(h); - SherpaOnnxDestroySpokenLanguageIdentificationResult(h); - return result; - } - - public void Dispose() - { - Cleanup(); - // Prevent the object from being placed on the - // finalization queue - System.GC.SuppressFinalize(this); - } - - ~SpokenLanguageIdentification() - { - Cleanup(); - } - - private void Cleanup() - { - SherpaOnnxDestroySpokenLanguageIdentification(_handle.Handle); - - // Don't permit the handle to be used again. - _handle = new HandleRef(this, IntPtr.Zero); - } - - private HandleRef _handle; - - [DllImport(Dll.Filename)] - private static extern IntPtr SherpaOnnxCreateSpokenLanguageIdentification(ref SpokenLanguageIdentificationConfig config); - - [DllImport(Dll.Filename)] - private static extern void SherpaOnnxDestroySpokenLanguageIdentification(IntPtr handle); - - [DllImport(Dll.Filename)] - private static extern IntPtr SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(IntPtr handle); - - [DllImport(Dll.Filename)] - private static extern IntPtr SherpaOnnxSpokenLanguageIdentificationCompute(IntPtr handle, IntPtr stream); - - [DllImport(Dll.Filename)] - private static extern void SherpaOnnxDestroySpokenLanguageIdentificationResult(IntPtr handle); - } -} diff --git a/scripts/dotnet/online.cs b/scripts/dotnet/online.cs deleted file mode 100644 index a9dd95de..00000000 --- a/scripts/dotnet/online.cs +++ /dev/null @@ -1,447 +0,0 @@ -/// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) -/// Copyright (c) 2023 by manyeyes - -using System.Collections.Generic; -using System.Linq; -using System.Runtime.InteropServices; -using System.Text; -using System; - -namespace SherpaOnnx -{ - internal static class Dll - { - public const string Filename = "sherpa-onnx-c-api"; - } - - [StructLayout(LayoutKind.Sequential)] - public struct OnlineTransducerModelConfig - { - public OnlineTransducerModelConfig() - { - Encoder = ""; - Decoder = ""; - Joiner = ""; - } - - [MarshalAs(UnmanagedType.LPStr)] - public string Encoder; - - [MarshalAs(UnmanagedType.LPStr)] - public string Decoder; - - [MarshalAs(UnmanagedType.LPStr)] - public string Joiner; - } - - [StructLayout(LayoutKind.Sequential)] - public struct OnlineParaformerModelConfig - { - public OnlineParaformerModelConfig() - { - Encoder = ""; - Decoder = ""; - } - - [MarshalAs(UnmanagedType.LPStr)] - public string Encoder; - - [MarshalAs(UnmanagedType.LPStr)] - public string Decoder; - } - - [StructLayout(LayoutKind.Sequential)] - public struct OnlineZipformer2CtcModelConfig - { - public OnlineZipformer2CtcModelConfig() - { - Model = ""; - } - - [MarshalAs(UnmanagedType.LPStr)] - public string Model; - } - - [StructLayout(LayoutKind.Sequential)] - public struct OnlineModelConfig - { - public OnlineModelConfig() - { - Transducer = new OnlineTransducerModelConfig(); - Paraformer = new OnlineParaformerModelConfig(); - Zipformer2Ctc = new OnlineZipformer2CtcModelConfig(); - Tokens = ""; - NumThreads = 1; - Provider = "cpu"; - Debug = 0; - ModelType = ""; - } - - public OnlineTransducerModelConfig Transducer; - public OnlineParaformerModelConfig Paraformer; - public OnlineZipformer2CtcModelConfig Zipformer2Ctc; - - [MarshalAs(UnmanagedType.LPStr)] - public string Tokens; - - /// Number of threads used to run the neural network model - public int NumThreads; - - [MarshalAs(UnmanagedType.LPStr)] - public string Provider; - - /// true to print debug information of the model - public int Debug; - - [MarshalAs(UnmanagedType.LPStr)] - public string ModelType; - } - - /// It expects 16 kHz 16-bit single channel wave format. - [StructLayout(LayoutKind.Sequential)] - public struct FeatureConfig - { - public FeatureConfig() - { - SampleRate = 16000; - FeatureDim = 80; - } - /// Sample rate of the input data. MUST match the one expected - /// by the model. For instance, it should be 16000 for models provided - /// by us. - public int SampleRate; - - /// Feature dimension of the model. - /// For instance, it should be 80 for models provided by us. - public int FeatureDim; - } - - [StructLayout(LayoutKind.Sequential)] - public struct OnlineCtcFstDecoderConfig - { - public OnlineCtcFstDecoderConfig() - { - Graph = ""; - MaxActive = 3000; - } - - [MarshalAs(UnmanagedType.LPStr)] - public string Graph; - - public int MaxActive; - } - - [StructLayout(LayoutKind.Sequential)] - public struct OnlineRecognizerConfig - { - public OnlineRecognizerConfig() - { - FeatConfig = new FeatureConfig(); - ModelConfig = new OnlineModelConfig(); - DecodingMethod = "greedy_search"; - MaxActivePaths = 4; - EnableEndpoint = 0; - Rule1MinTrailingSilence = 1.2F; - Rule2MinTrailingSilence = 2.4F; - Rule3MinUtteranceLength = 20.0F; - HotwordsFile = ""; - HotwordsScore = 1.5F; - CtcFstDecoderConfig = new OnlineCtcFstDecoderConfig(); - } - public FeatureConfig FeatConfig; - public OnlineModelConfig ModelConfig; - - [MarshalAs(UnmanagedType.LPStr)] - public string DecodingMethod; - - /// Used only when decoding_method is modified_beam_search - /// Example value: 4 - public int MaxActivePaths; - - /// 0 to disable endpoint detection. - /// A non-zero value to enable endpoint detection. - public int EnableEndpoint; - - /// An endpoint is detected if trailing silence in seconds is larger than - /// this value even if nothing has been decoded. - /// Used only when enable_endpoint is not 0. - public float Rule1MinTrailingSilence; - - /// An endpoint is detected if trailing silence in seconds is larger than - /// this value after something that is not blank has been decoded. - /// Used only when enable_endpoint is not 0. - public float Rule2MinTrailingSilence; - - /// An endpoint is detected if the utterance in seconds is larger than - /// this value. - /// Used only when enable_endpoint is not 0. - public float Rule3MinUtteranceLength; - - /// Path to the hotwords. - [MarshalAs(UnmanagedType.LPStr)] - public string HotwordsFile; - - /// Bonus score for each token in hotwords. - public float HotwordsScore; - - public OnlineCtcFstDecoderConfig CtcFstDecoderConfig; - } - - public class OnlineRecognizerResult - { - public OnlineRecognizerResult(IntPtr handle) - { - Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl)); - // PtrToStringUTF8() requires .net standard 2.1 - // _text = Marshal.PtrToStringUTF8(impl.Text); - - int length = 0; - - unsafe - { - byte* buffer = (byte*)impl.Text; - while (*buffer != 0) - { - ++buffer; - length += 1; - } - } - - byte[] stringBuffer = new byte[length]; - Marshal.Copy(impl.Text, stringBuffer, 0, length); - _text = Encoding.UTF8.GetString(stringBuffer); - - _tokens = new String[impl.Count]; - - unsafe - { - byte* buf = (byte*)impl.Tokens; - for (int i = 0; i < impl.Count; i++) - { - length = 0; - byte* start = buf; - while (*buf != 0) - { - ++buf; - length += 1; - } - ++buf; - - stringBuffer = new byte[length]; - fixed (byte* pTarget = stringBuffer) - { - for (int k = 0; k < length; k++) - { - pTarget[k] = start[k]; - } - } - - _tokens[i] = Encoding.UTF8.GetString(stringBuffer); - } - } - - unsafe - { - float* t = (float*)impl.Timestamps; - if (t != null) - { - _timestamps = new float[impl.Count]; - fixed (float* pTarget = _timestamps) - { - for (int i = 0; i < impl.Count; i++) - { - pTarget[i] = t[i]; - } - } - } - else - { - _timestamps = Array.Empty(); - } - } - } - - [StructLayout(LayoutKind.Sequential)] - struct Impl - { - public IntPtr Text; - public IntPtr Tokens; - public IntPtr TokensArr; - public IntPtr Timestamps; - public int Count; - } - - private String _text; - public String Text => _text; - - private String[] _tokens; - public String[] Tokens => _tokens; - - private float[] _timestamps; - public float[] Timestamps => _timestamps; - } - - public class OnlineStream : IDisposable - { - public OnlineStream(IntPtr p) - { - _handle = new HandleRef(this, p); - } - - public void AcceptWaveform(int sampleRate, float[] samples) - { - AcceptWaveform(Handle, sampleRate, samples, samples.Length); - } - - public void InputFinished() - { - InputFinished(Handle); - } - - ~OnlineStream() - { - Cleanup(); - } - - public void Dispose() - { - Cleanup(); - // Prevent the object from being placed on the - // finalization queue - System.GC.SuppressFinalize(this); - } - - private void Cleanup() - { - DestroyOnlineStream(Handle); - - // Don't permit the handle to be used again. - _handle = new HandleRef(this, IntPtr.Zero); - } - - private HandleRef _handle; - public IntPtr Handle => _handle.Handle; - - [DllImport(Dll.Filename)] - private static extern void DestroyOnlineStream(IntPtr handle); - - [DllImport(Dll.Filename)] - private static extern void AcceptWaveform(IntPtr handle, int sampleRate, float[] samples, int n); - - [DllImport(Dll.Filename)] - private static extern void InputFinished(IntPtr handle); - } - - // please see - // https://www.mono-project.com/docs/advanced/pinvoke/#gc-safe-pinvoke-code - // https://www.mono-project.com/docs/advanced/pinvoke/#properly-disposing-of-resources - public class OnlineRecognizer : IDisposable - { - public OnlineRecognizer(OnlineRecognizerConfig config) - { - IntPtr h = CreateOnlineRecognizer(ref config); - _handle = new HandleRef(this, h); - } - - public OnlineStream CreateStream() - { - IntPtr p = CreateOnlineStream(_handle.Handle); - return new OnlineStream(p); - } - - /// Return true if the passed stream is ready for decoding. - public bool IsReady(OnlineStream stream) - { - return IsReady(_handle.Handle, stream.Handle) != 0; - } - - /// Return true if an endpoint is detected for this stream. - /// You probably need to invoke Reset(stream) when this method returns - /// true. - public bool IsEndpoint(OnlineStream stream) - { - return IsEndpoint(_handle.Handle, stream.Handle) != 0; - } - - /// You have to ensure that IsReady(stream) returns true before - /// you call this method - public void Decode(OnlineStream stream) - { - Decode(_handle.Handle, stream.Handle); - } - - // The caller should ensure all passed streams are ready for decoding. - public void Decode(IEnumerable streams) - { - IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray(); - Decode(_handle.Handle, ptrs, ptrs.Length); - } - - public OnlineRecognizerResult GetResult(OnlineStream stream) - { - IntPtr h = GetResult(_handle.Handle, stream.Handle); - OnlineRecognizerResult result = new OnlineRecognizerResult(h); - DestroyResult(h); - return result; - } - - /// When this method returns, IsEndpoint(stream) will return false. - public void Reset(OnlineStream stream) - { - Reset(_handle.Handle, stream.Handle); - } - - public void Dispose() - { - Cleanup(); - // Prevent the object from being placed on the - // finalization queue - System.GC.SuppressFinalize(this); - } - - ~OnlineRecognizer() - { - Cleanup(); - } - - private void Cleanup() - { - DestroyOnlineRecognizer(_handle.Handle); - - // Don't permit the handle to be used again. - _handle = new HandleRef(this, IntPtr.Zero); - } - - private HandleRef _handle; - - [DllImport(Dll.Filename)] - private static extern IntPtr CreateOnlineRecognizer(ref OnlineRecognizerConfig config); - - [DllImport(Dll.Filename)] - private static extern void DestroyOnlineRecognizer(IntPtr handle); - - [DllImport(Dll.Filename)] - private static extern IntPtr CreateOnlineStream(IntPtr handle); - - [DllImport(Dll.Filename, EntryPoint = "IsOnlineStreamReady")] - private static extern int IsReady(IntPtr handle, IntPtr stream); - - [DllImport(Dll.Filename, EntryPoint = "DecodeOnlineStream")] - private static extern void Decode(IntPtr handle, IntPtr stream); - - [DllImport(Dll.Filename, EntryPoint = "DecodeMultipleOnlineStreams")] - private static extern void Decode(IntPtr handle, IntPtr[] streams, int n); - - [DllImport(Dll.Filename, EntryPoint = "GetOnlineStreamResult")] - private static extern IntPtr GetResult(IntPtr handle, IntPtr stream); - - [DllImport(Dll.Filename, EntryPoint = "DestroyOnlineRecognizerResult")] - private static extern void DestroyResult(IntPtr result); - - [DllImport(Dll.Filename)] - private static extern void Reset(IntPtr handle, IntPtr stream); - - [DllImport(Dll.Filename)] - private static extern int IsEndpoint(IntPtr handle, IntPtr stream); - } -} diff --git a/scripts/dotnet/run.sh b/scripts/dotnet/run.sh index ce729efb..3ce1a0fb 100755 --- a/scripts/dotnet/run.sh +++ b/scripts/dotnet/run.sh @@ -128,8 +128,7 @@ popd mkdir -p macos linux windows-x64 windows-x86 all -cp ./online.cs all -cp ./offline.cs all +cp ./*.cs all ./generate.py