Add C# API for speech enhancement GTCRN models (#1990)
This commit is contained in:
6
.github/scripts/test-dot-net.sh
vendored
6
.github/scripts/test-dot-net.sh
vendored
@@ -2,7 +2,11 @@
|
|||||||
|
|
||||||
cd dotnet-examples/
|
cd dotnet-examples/
|
||||||
|
|
||||||
cd ./kokoro-tts
|
cd ./speech-enhancement-gtcrn
|
||||||
|
./run.sh
|
||||||
|
ls -lh
|
||||||
|
|
||||||
|
cd ../kokoro-tts
|
||||||
./run-kokoro.sh
|
./run-kokoro.sh
|
||||||
ls -lh
|
ls -lh
|
||||||
|
|
||||||
|
|||||||
@@ -35,6 +35,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts", "kokoro-tts\ko
|
|||||||
EndProject
|
EndProject
|
||||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts-play", "kokoro-tts-play\kokoro-tts-play.csproj", "{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}"
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts-play", "kokoro-tts-play\kokoro-tts-play.csproj", "{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}"
|
||||||
EndProject
|
EndProject
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speech-enhancement-gtcrn", "speech-enhancement-gtcrn\speech-enhancement-gtcrn.csproj", "{DF2569C6-6011-4716-9538-F9E9069E00EB}"
|
||||||
|
EndProject
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
Debug|Any CPU = Debug|Any CPU
|
Debug|Any CPU = Debug|Any CPU
|
||||||
@@ -105,6 +107,10 @@ Global
|
|||||||
{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.Build.0 = Release|Any CPU
|
{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{DF2569C6-6011-4716-9538-F9E9069E00EB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{DF2569C6-6011-4716-9538-F9E9069E00EB}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{DF2569C6-6011-4716-9538-F9E9069E00EB}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{DF2569C6-6011-4716-9538-F9E9069E00EB}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
EndGlobalSection
|
EndGlobalSection
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
HideSolutionNode = FALSE
|
HideSolutionNode = FALSE
|
||||||
|
|||||||
45
dotnet-examples/speech-enhancement-gtcrn/Program.cs
Normal file
45
dotnet-examples/speech-enhancement-gtcrn/Program.cs
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
// Copyright (c) 2025 Xiaomi Corporation
|
||||||
|
//
|
||||||
|
// This file shows how to use speech enhancement API with GTCRN models.
|
||||||
|
//
|
||||||
|
// 1. Download a model from
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
|
||||||
|
//
|
||||||
|
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
|
||||||
|
//
|
||||||
|
// 2. Download a test file
|
||||||
|
//
|
||||||
|
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
|
||||||
|
//
|
||||||
|
// 3. Now run it
|
||||||
|
//
|
||||||
|
// dotnet run
|
||||||
|
|
||||||
|
using SherpaOnnx;
|
||||||
|
|
||||||
|
class OfflineSpeechEnhancementDemo
|
||||||
|
{
|
||||||
|
static void Main(string[] args)
|
||||||
|
{
|
||||||
|
var config = new OfflineSpeechDenoiserConfig();
|
||||||
|
config.Model.Gtcrn.Model = "./gtcrn_simple.onnx";
|
||||||
|
config.Model.Debug = 1;
|
||||||
|
config.Model.NumThreads = 1;
|
||||||
|
var sd = new OfflineSpeechDenoiser(config);
|
||||||
|
|
||||||
|
WaveReader waveReader = new WaveReader("./inp_16k.wav");
|
||||||
|
var denoisedAudio = sd.Run(waveReader.Samples, waveReader.SampleRate);
|
||||||
|
|
||||||
|
var outputFilename = "./enhanced-16k.wav";
|
||||||
|
var ok = denoisedAudio.SaveToWaveFile(outputFilename);
|
||||||
|
|
||||||
|
if (ok)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Wrote to {outputFilename} succeeded!");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Failed to write {outputFilename}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
12
dotnet-examples/speech-enhancement-gtcrn/run.sh
Executable file
12
dotnet-examples/speech-enhancement-gtcrn/run.sh
Executable file
@@ -0,0 +1,12 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
if [ ! -f ./gtcrn_simple.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./inp_16k.wav ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
|
||||||
|
fi
|
||||||
|
|
||||||
|
dotnet run
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<OutputType>Exe</OutputType>
|
||||||
|
<TargetFramework>net8.0</TargetFramework>
|
||||||
|
<RootNamespace>speech_enhancement_gtcrn</RootNamespace>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="..\Common\Common.csproj" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
94
scripts/dotnet/DenoisedAudio.cs
Normal file
94
scripts/dotnet/DenoisedAudio.cs
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
using System;
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
|
namespace SherpaOnnx
|
||||||
|
{
|
||||||
|
public class DenoisedAudio
|
||||||
|
{
|
||||||
|
public DenoisedAudio(IntPtr p)
|
||||||
|
{
|
||||||
|
_handle = new HandleRef(this, p);
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool SaveToWaveFile(String filename)
|
||||||
|
{
|
||||||
|
Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
|
||||||
|
byte[] utf8Filename = Encoding.UTF8.GetBytes(filename);
|
||||||
|
byte[] utf8FilenameWithNull = new byte[utf8Filename.Length + 1]; // +1 for null terminator
|
||||||
|
Array.Copy(utf8Filename, utf8FilenameWithNull, utf8Filename.Length);
|
||||||
|
utf8FilenameWithNull[utf8Filename.Length] = 0; // Null terminator
|
||||||
|
int status = SherpaOnnxWriteWave(impl.Samples, impl.NumSamples, impl.SampleRate, utf8FilenameWithNull);
|
||||||
|
return status == 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
~DenoisedAudio()
|
||||||
|
{
|
||||||
|
Cleanup();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Dispose()
|
||||||
|
{
|
||||||
|
Cleanup();
|
||||||
|
// Prevent the object from being placed on the
|
||||||
|
// finalization queue
|
||||||
|
System.GC.SuppressFinalize(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void Cleanup()
|
||||||
|
{
|
||||||
|
SherpaOnnxDestroyDenoisedAudio(Handle);
|
||||||
|
|
||||||
|
// Don't permit the handle to be used again.
|
||||||
|
_handle = new HandleRef(this, IntPtr.Zero);
|
||||||
|
}
|
||||||
|
|
||||||
|
[StructLayout(LayoutKind.Sequential)]
|
||||||
|
struct Impl
|
||||||
|
{
|
||||||
|
public IntPtr Samples;
|
||||||
|
public int NumSamples;
|
||||||
|
public int SampleRate;
|
||||||
|
}
|
||||||
|
|
||||||
|
private HandleRef _handle;
|
||||||
|
public IntPtr Handle => _handle.Handle;
|
||||||
|
|
||||||
|
public int NumSamples
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
|
||||||
|
return impl.NumSamples;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int SampleRate
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
|
||||||
|
return impl.SampleRate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public float[] Samples
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
|
||||||
|
|
||||||
|
float[] samples = new float[impl.NumSamples];
|
||||||
|
Marshal.Copy(impl.Samples, samples, 0, impl.NumSamples);
|
||||||
|
return samples;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[DllImport(Dll.Filename)]
|
||||||
|
private static extern void SherpaOnnxDestroyDenoisedAudio(IntPtr handle);
|
||||||
|
|
||||||
|
[DllImport(Dll.Filename)]
|
||||||
|
private static extern int SherpaOnnxWriteWave(IntPtr samples, int n, int sample_rate, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Filename);
|
||||||
|
}
|
||||||
|
}
|
||||||
64
scripts/dotnet/OfflineSpeechDenoiser.cs
Normal file
64
scripts/dotnet/OfflineSpeechDenoiser.cs
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
|
|
||||||
|
namespace SherpaOnnx
|
||||||
|
{
|
||||||
|
public class OfflineSpeechDenoiser: IDisposable
|
||||||
|
{
|
||||||
|
public OfflineSpeechDenoiser(OfflineSpeechDenoiserConfig config)
|
||||||
|
{
|
||||||
|
IntPtr h = SherpaOnnxCreateOfflineSpeechDenoiser(ref config);
|
||||||
|
_handle = new HandleRef(this, h);
|
||||||
|
}
|
||||||
|
|
||||||
|
public DenoisedAudio Run(float[] samples, int sampleRate)
|
||||||
|
{
|
||||||
|
IntPtr p = SherpaOnnxOfflineSpeechDenoiserRun(_handle.Handle, samples, samples.Length, sampleRate);
|
||||||
|
return new DenoisedAudio(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Dispose()
|
||||||
|
{
|
||||||
|
Cleanup();
|
||||||
|
// Prevent the object from being placed on the
|
||||||
|
// finalization queue
|
||||||
|
System.GC.SuppressFinalize(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
~OfflineSpeechDenoiser()
|
||||||
|
{
|
||||||
|
Cleanup();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void Cleanup()
|
||||||
|
{
|
||||||
|
SherpaOnnxDestroyOfflineSpeechDenoiser(_handle.Handle);
|
||||||
|
|
||||||
|
// Don't permit the handle to be used again.
|
||||||
|
_handle = new HandleRef(this, IntPtr.Zero);
|
||||||
|
}
|
||||||
|
|
||||||
|
private HandleRef _handle;
|
||||||
|
|
||||||
|
public int SampleRate
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
return SherpaOnnxOfflineSpeechDenoiserGetSampleRate(_handle.Handle);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[DllImport(Dll.Filename)]
|
||||||
|
private static extern IntPtr SherpaOnnxCreateOfflineSpeechDenoiser(ref OfflineSpeechDenoiserConfig config);
|
||||||
|
|
||||||
|
[DllImport(Dll.Filename)]
|
||||||
|
private static extern void SherpaOnnxDestroyOfflineSpeechDenoiser(IntPtr handle);
|
||||||
|
|
||||||
|
[DllImport(Dll.Filename)]
|
||||||
|
private static extern int SherpaOnnxOfflineSpeechDenoiserGetSampleRate(IntPtr handle);
|
||||||
|
|
||||||
|
[DllImport(Dll.Filename)]
|
||||||
|
private static extern IntPtr SherpaOnnxOfflineSpeechDenoiserRun(IntPtr handle, float[] samples, int n, int sampleRate);
|
||||||
|
}
|
||||||
|
}
|
||||||
16
scripts/dotnet/OfflineSpeechDenoiserConfig.cs
Normal file
16
scripts/dotnet/OfflineSpeechDenoiserConfig.cs
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
|
|
||||||
|
namespace SherpaOnnx
|
||||||
|
{
|
||||||
|
[StructLayout(LayoutKind.Sequential)]
|
||||||
|
public struct OfflineSpeechDenoiserConfig
|
||||||
|
{
|
||||||
|
public OfflineSpeechDenoiserConfig()
|
||||||
|
{
|
||||||
|
Model = new OfflineSpeechDenoiserModelConfig();
|
||||||
|
}
|
||||||
|
public OfflineSpeechDenoiserModelConfig Model;
|
||||||
|
}
|
||||||
|
}
|
||||||
17
scripts/dotnet/OfflineSpeechDenoiserGtcrnModelConfig.cs
Normal file
17
scripts/dotnet/OfflineSpeechDenoiserGtcrnModelConfig.cs
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
|
|
||||||
|
namespace SherpaOnnx
|
||||||
|
{
|
||||||
|
[StructLayout(LayoutKind.Sequential)]
|
||||||
|
public struct OfflineSpeechDenoiserGtcrnModelConfig
|
||||||
|
{
|
||||||
|
public OfflineSpeechDenoiserGtcrnModelConfig()
|
||||||
|
{
|
||||||
|
Model = "";
|
||||||
|
}
|
||||||
|
[MarshalAs(UnmanagedType.LPStr)]
|
||||||
|
public string Model;
|
||||||
|
}
|
||||||
|
}
|
||||||
27
scripts/dotnet/OfflineSpeechDenoiserModelConfig.cs
Normal file
27
scripts/dotnet/OfflineSpeechDenoiserModelConfig.cs
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
|
|
||||||
|
namespace SherpaOnnx
|
||||||
|
{
|
||||||
|
[StructLayout(LayoutKind.Sequential)]
|
||||||
|
public struct OfflineSpeechDenoiserModelConfig
|
||||||
|
{
|
||||||
|
public OfflineSpeechDenoiserModelConfig()
|
||||||
|
{
|
||||||
|
Gtcrn = new OfflineSpeechDenoiserGtcrnModelConfig();
|
||||||
|
NumThreads = 1;
|
||||||
|
Debug = 0;
|
||||||
|
Provider = "cpu";
|
||||||
|
}
|
||||||
|
|
||||||
|
public OfflineSpeechDenoiserGtcrnModelConfig Gtcrn;
|
||||||
|
|
||||||
|
public int NumThreads;
|
||||||
|
|
||||||
|
public int Debug;
|
||||||
|
|
||||||
|
[MarshalAs(UnmanagedType.LPStr)]
|
||||||
|
public string Provider;
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user