From d3e27d5e21a5f9c862a8c09aaf784da0b6feb727 Mon Sep 17 00:00:00 2001
From: Fangjun Kuang <csukuangfj@gmail.com>
Date: Tue, 11 Mar 2025 18:58:17 +0800
Subject: [PATCH] Add C# API for speech enhancement GTCRN models (#1990)

---
 .github/scripts/test-dot-net.sh               |  6 +-
 dotnet-examples/sherpa-onnx.sln               |  6 ++
 .../speech-enhancement-gtcrn/Program.cs       | 45 +++++++++
 .../speech-enhancement-gtcrn/run.sh           | 12 +++
 .../speech-enhancement-gtcrn.csproj           | 15 +++
 scripts/dotnet/DenoisedAudio.cs               | 94 +++++++++++++++++++
 scripts/dotnet/OfflineSpeechDenoiser.cs       | 64 +++++++++++++
 scripts/dotnet/OfflineSpeechDenoiserConfig.cs | 16 ++++
 .../OfflineSpeechDenoiserGtcrnModelConfig.cs  | 17 ++++
 .../OfflineSpeechDenoiserModelConfig.cs       | 27 ++++++
 10 files changed, 301 insertions(+), 1 deletion(-)
 create mode 100644 dotnet-examples/speech-enhancement-gtcrn/Program.cs
 create mode 100755 dotnet-examples/speech-enhancement-gtcrn/run.sh
 create mode 100644 dotnet-examples/speech-enhancement-gtcrn/speech-enhancement-gtcrn.csproj
 create mode 100644 scripts/dotnet/DenoisedAudio.cs
 create mode 100644 scripts/dotnet/OfflineSpeechDenoiser.cs
 create mode 100644 scripts/dotnet/OfflineSpeechDenoiserConfig.cs
 create mode 100644 scripts/dotnet/OfflineSpeechDenoiserGtcrnModelConfig.cs
 create mode 100644 scripts/dotnet/OfflineSpeechDenoiserModelConfig.cs

diff --git a/.github/scripts/test-dot-net.sh b/.github/scripts/test-dot-net.sh
index 9ee8b9cc..c67db9d1 100755
--- a/.github/scripts/test-dot-net.sh
+++ b/.github/scripts/test-dot-net.sh
@@ -2,7 +2,11 @@
 
 cd dotnet-examples/
 
-cd ./kokoro-tts
+cd ./speech-enhancement-gtcrn
+./run.sh
+ls -lh
+
+cd ../kokoro-tts
 ./run-kokoro.sh
 ls -lh
 
diff --git a/dotnet-examples/sherpa-onnx.sln b/dotnet-examples/sherpa-onnx.sln
index 404c4976..0c8e24ab 100644
--- a/dotnet-examples/sherpa-onnx.sln
+++ b/dotnet-examples/sherpa-onnx.sln
@@ -35,6 +35,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts", "kokoro-tts\ko
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts-play", "kokoro-tts-play\kokoro-tts-play.csproj", "{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}"
 EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speech-enhancement-gtcrn", "speech-enhancement-gtcrn\speech-enhancement-gtcrn.csproj", "{DF2569C6-6011-4716-9538-F9E9069E00EB}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -105,6 +107,10 @@ Global
 		{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.Build.0 = Release|Any CPU
+		{DF2569C6-6011-4716-9538-F9E9069E00EB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{DF2569C6-6011-4716-9538-F9E9069E00EB}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{DF2569C6-6011-4716-9538-F9E9069E00EB}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{DF2569C6-6011-4716-9538-F9E9069E00EB}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
diff --git a/dotnet-examples/speech-enhancement-gtcrn/Program.cs b/dotnet-examples/speech-enhancement-gtcrn/Program.cs
new file mode 100644
index 00000000..4553a6f1
--- /dev/null
+++ b/dotnet-examples/speech-enhancement-gtcrn/Program.cs
@@ -0,0 +1,45 @@
+﻿// Copyright (c)  2025  Xiaomi Corporation
+//
+// This file shows how to use speech enhancement API with GTCRN models.
+//
+// 1. Download a model from
+// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
+//
+// 2. Download a test file
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
+//
+// 3. Now run it
+//
+// dotnet run
+
+using SherpaOnnx;
+
+class OfflineSpeechEnhancementDemo
+{
+  static void Main(string[] args)
+  {
+    var config = new OfflineSpeechDenoiserConfig();
+    config.Model.Gtcrn.Model = "./gtcrn_simple.onnx";
+    config.Model.Debug = 1;
+    config.Model.NumThreads = 1;
+    var sd = new OfflineSpeechDenoiser(config);
+
+    WaveReader waveReader = new WaveReader("./inp_16k.wav");
+    var denoisedAudio =  sd.Run(waveReader.Samples, waveReader.SampleRate);
+
+    var outputFilename = "./enhanced-16k.wav";
+    var ok = denoisedAudio.SaveToWaveFile(outputFilename);
+
+    if (ok)
+    {
+      Console.WriteLine($"Wrote to {outputFilename} succeeded!");
+    }
+    else
+    {
+      Console.WriteLine($"Failed to write {outputFilename}");
+    }
+  }
+}
diff --git a/dotnet-examples/speech-enhancement-gtcrn/run.sh b/dotnet-examples/speech-enhancement-gtcrn/run.sh
new file mode 100755
index 00000000..788525cd
--- /dev/null
+++ b/dotnet-examples/speech-enhancement-gtcrn/run.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+set -ex
+
+if [ ! -f ./gtcrn_simple.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
+fi
+
+if [ ! -f ./inp_16k.wav ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
+fi
+
+dotnet run
diff --git a/dotnet-examples/speech-enhancement-gtcrn/speech-enhancement-gtcrn.csproj b/dotnet-examples/speech-enhancement-gtcrn/speech-enhancement-gtcrn.csproj
new file mode 100644
index 00000000..a7adcc5e
--- /dev/null
+++ b/dotnet-examples/speech-enhancement-gtcrn/speech-enhancement-gtcrn.csproj
@@ -0,0 +1,15 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net8.0</TargetFramework>
+    <RootNamespace>speech_enhancement_gtcrn</RootNamespace>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\Common\Common.csproj" />
+  </ItemGroup>
+
+</Project>
diff --git a/scripts/dotnet/DenoisedAudio.cs b/scripts/dotnet/DenoisedAudio.cs
new file mode 100644
index 00000000..4ec4ecdd
--- /dev/null
+++ b/scripts/dotnet/DenoisedAudio.cs
@@ -0,0 +1,94 @@
+﻿/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
+using System;
+using System.Runtime.InteropServices;
+using System.Text;
+
+namespace SherpaOnnx
+{
+    public class DenoisedAudio
+    {
+        public DenoisedAudio(IntPtr p)
+        {
+            _handle = new HandleRef(this, p);
+        }
+
+        public bool SaveToWaveFile(String filename)
+        {
+            Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
+            byte[] utf8Filename = Encoding.UTF8.GetBytes(filename);
+            byte[] utf8FilenameWithNull = new byte[utf8Filename.Length + 1]; // +1 for null terminator
+            Array.Copy(utf8Filename, utf8FilenameWithNull, utf8Filename.Length);
+            utf8FilenameWithNull[utf8Filename.Length] = 0; // Null terminator
+            int status = SherpaOnnxWriteWave(impl.Samples, impl.NumSamples, impl.SampleRate, utf8FilenameWithNull);
+            return status == 1;
+        }
+
+        ~DenoisedAudio()
+        {
+            Cleanup();
+        }
+
+        public void Dispose()
+        {
+            Cleanup();
+            // Prevent the object from being placed on the
+            // finalization queue
+            System.GC.SuppressFinalize(this);
+        }
+
+        private void Cleanup()
+        {
+            SherpaOnnxDestroyDenoisedAudio(Handle);
+
+            // Don't permit the handle to be used again.
+            _handle = new HandleRef(this, IntPtr.Zero);
+        }
+
+        [StructLayout(LayoutKind.Sequential)]
+        struct Impl
+        {
+            public IntPtr Samples;
+            public int NumSamples;
+            public int SampleRate;
+        }
+
+        private HandleRef _handle;
+        public IntPtr Handle => _handle.Handle;
+
+        public int NumSamples
+        {
+            get
+            {
+                Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
+                return impl.NumSamples;
+            }
+        }
+
+        public int SampleRate
+        {
+            get
+            {
+                Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
+                return impl.SampleRate;
+            }
+        }
+
+        public float[] Samples
+        {
+            get
+            {
+                Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
+
+                float[] samples = new float[impl.NumSamples];
+                Marshal.Copy(impl.Samples, samples, 0, impl.NumSamples);
+                return samples;
+            }
+        }
+
+        [DllImport(Dll.Filename)]
+        private static extern void SherpaOnnxDestroyDenoisedAudio(IntPtr handle);
+
+        [DllImport(Dll.Filename)]
+        private static extern int SherpaOnnxWriteWave(IntPtr samples, int n, int sample_rate, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Filename);
+    }
+}
diff --git a/scripts/dotnet/OfflineSpeechDenoiser.cs b/scripts/dotnet/OfflineSpeechDenoiser.cs
new file mode 100644
index 00000000..429e2924
--- /dev/null
+++ b/scripts/dotnet/OfflineSpeechDenoiser.cs
@@ -0,0 +1,64 @@
+/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
+
+using System.Runtime.InteropServices;
+
+namespace SherpaOnnx
+{
+    public class OfflineSpeechDenoiser: IDisposable
+    {
+        public OfflineSpeechDenoiser(OfflineSpeechDenoiserConfig config)
+        {
+            IntPtr h = SherpaOnnxCreateOfflineSpeechDenoiser(ref config);
+            _handle = new HandleRef(this, h);
+        }
+
+        public DenoisedAudio Run(float[] samples, int sampleRate)
+        {
+            IntPtr p = SherpaOnnxOfflineSpeechDenoiserRun(_handle.Handle, samples, samples.Length, sampleRate);
+            return new DenoisedAudio(p);
+        }
+
+        public void Dispose()
+        {
+            Cleanup();
+            // Prevent the object from being placed on the
+            // finalization queue
+            System.GC.SuppressFinalize(this);
+        }
+
+        ~OfflineSpeechDenoiser()
+        {
+            Cleanup();
+        }
+
+        private void Cleanup()
+        {
+            SherpaOnnxDestroyOfflineSpeechDenoiser(_handle.Handle);
+
+            // Don't permit the handle to be used again.
+            _handle = new HandleRef(this, IntPtr.Zero);
+        }
+
+        private HandleRef _handle;
+
+        public int SampleRate
+        {
+            get
+            {
+                return SherpaOnnxOfflineSpeechDenoiserGetSampleRate(_handle.Handle);
+            }
+        }
+
+        [DllImport(Dll.Filename)]
+        private static extern IntPtr SherpaOnnxCreateOfflineSpeechDenoiser(ref OfflineSpeechDenoiserConfig config);
+
+        [DllImport(Dll.Filename)]
+        private static extern void SherpaOnnxDestroyOfflineSpeechDenoiser(IntPtr handle);
+
+        [DllImport(Dll.Filename)]
+        private static extern int SherpaOnnxOfflineSpeechDenoiserGetSampleRate(IntPtr handle);
+
+        [DllImport(Dll.Filename)]
+        private static extern IntPtr SherpaOnnxOfflineSpeechDenoiserRun(IntPtr handle, float[] samples, int n, int sampleRate);
+    }
+}
diff --git a/scripts/dotnet/OfflineSpeechDenoiserConfig.cs b/scripts/dotnet/OfflineSpeechDenoiserConfig.cs
new file mode 100644
index 00000000..546fe9c8
--- /dev/null
+++ b/scripts/dotnet/OfflineSpeechDenoiserConfig.cs
@@ -0,0 +1,16 @@
+/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
+
+using System.Runtime.InteropServices;
+
+namespace SherpaOnnx
+{
+    [StructLayout(LayoutKind.Sequential)]
+    public struct OfflineSpeechDenoiserConfig
+    {
+        public OfflineSpeechDenoiserConfig()
+        {
+            Model = new OfflineSpeechDenoiserModelConfig();
+        }
+        public OfflineSpeechDenoiserModelConfig Model;
+    }
+}
diff --git a/scripts/dotnet/OfflineSpeechDenoiserGtcrnModelConfig.cs b/scripts/dotnet/OfflineSpeechDenoiserGtcrnModelConfig.cs
new file mode 100644
index 00000000..8a815d0b
--- /dev/null
+++ b/scripts/dotnet/OfflineSpeechDenoiserGtcrnModelConfig.cs
@@ -0,0 +1,17 @@
+/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
+
+using System.Runtime.InteropServices;
+
+namespace SherpaOnnx
+{
+    [StructLayout(LayoutKind.Sequential)]
+    public struct OfflineSpeechDenoiserGtcrnModelConfig
+    {
+        public OfflineSpeechDenoiserGtcrnModelConfig()
+        {
+            Model = "";
+        }
+        [MarshalAs(UnmanagedType.LPStr)]
+        public string Model;
+    }
+}
diff --git a/scripts/dotnet/OfflineSpeechDenoiserModelConfig.cs b/scripts/dotnet/OfflineSpeechDenoiserModelConfig.cs
new file mode 100644
index 00000000..40d4d101
--- /dev/null
+++ b/scripts/dotnet/OfflineSpeechDenoiserModelConfig.cs
@@ -0,0 +1,27 @@
+/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
+
+using System.Runtime.InteropServices;
+
+namespace SherpaOnnx
+{
+    [StructLayout(LayoutKind.Sequential)]
+    public struct OfflineSpeechDenoiserModelConfig
+    {
+        public OfflineSpeechDenoiserModelConfig()
+        {
+            Gtcrn = new OfflineSpeechDenoiserGtcrnModelConfig();
+            NumThreads = 1;
+            Debug = 0;
+            Provider = "cpu";
+        }
+
+        public OfflineSpeechDenoiserGtcrnModelConfig Gtcrn;
+
+        public int NumThreads;
+
+        public int Debug;
+
+        [MarshalAs(UnmanagedType.LPStr)]
+        public string Provider;
+    }
+}