Add C# API for speech enhancement GTCRN models (#1990)

2025-03-11 18:58:17 +08:00
parent c12d1d88c0
commit d3e27d5e21
10 changed files with 301 additions and 1 deletions
--- a/dotnet-examples/sherpa-onnx.sln
+++ b/dotnet-examples/sherpa-onnx.sln
@@ -35,6 +35,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts", "kokoro-tts\ko
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts-play", "kokoro-tts-play\kokoro-tts-play.csproj", "{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}"
 EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speech-enhancement-gtcrn", "speech-enhancement-gtcrn\speech-enhancement-gtcrn.csproj", "{DF2569C6-6011-4716-9538-F9E9069E00EB}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -105,6 +107,10 @@ Global
 		{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.Build.0 = Release|Any CPU
+		{DF2569C6-6011-4716-9538-F9E9069E00EB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{DF2569C6-6011-4716-9538-F9E9069E00EB}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{DF2569C6-6011-4716-9538-F9E9069E00EB}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{DF2569C6-6011-4716-9538-F9E9069E00EB}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
--- a/dotnet-examples/speech-enhancement-gtcrn/Program.cs
+++ b/dotnet-examples/speech-enhancement-gtcrn/Program.cs
@@ -0,0 +1,45 @@
+// Copyright (c)  2025  Xiaomi Corporation
+//
+// This file shows how to use speech enhancement API with GTCRN models.
+//
+// 1. Download a model from
+// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
+//
+// 2. Download a test file
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
+//
+// 3. Now run it
+//
+// dotnet run
+
+using SherpaOnnx;
+
+class OfflineSpeechEnhancementDemo
+{
+  static void Main(string[] args)
+  {
+    var config = new OfflineSpeechDenoiserConfig();
+    config.Model.Gtcrn.Model = "./gtcrn_simple.onnx";
+    config.Model.Debug = 1;
+    config.Model.NumThreads = 1;
+    var sd = new OfflineSpeechDenoiser(config);
+
+    WaveReader waveReader = new WaveReader("./inp_16k.wav");
+    var denoisedAudio =  sd.Run(waveReader.Samples, waveReader.SampleRate);
+
+    var outputFilename = "./enhanced-16k.wav";
+    var ok = denoisedAudio.SaveToWaveFile(outputFilename);
+
+    if (ok)
+    {
+      Console.WriteLine($"Wrote to {outputFilename} succeeded!");
+    }
+    else
+    {
+      Console.WriteLine($"Failed to write {outputFilename}");
+    }
+  }
+}
--- a/dotnet-examples/speech-enhancement-gtcrn/run.sh
+++ b/dotnet-examples/speech-enhancement-gtcrn/run.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+set -ex
+
+if [ ! -f ./gtcrn_simple.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
+fi
+
+if [ ! -f ./inp_16k.wav ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
+fi
+
+dotnet run
--- a/dotnet-examples/speech-enhancement-gtcrn/speech-enhancement-gtcrn.csproj
+++ b/dotnet-examples/speech-enhancement-gtcrn/speech-enhancement-gtcrn.csproj
@@ -0,0 +1,15 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net8.0</TargetFramework>
+    <RootNamespace>speech_enhancement_gtcrn</RootNamespace>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\Common\Common.csproj" />
+  </ItemGroup>
+
+</Project>