Add C# API for speech enhancement GTCRN models (#1990)

This commit is contained in:
Fangjun Kuang
2025-03-11 18:58:17 +08:00
committed by GitHub
parent c12d1d88c0
commit d3e27d5e21
10 changed files with 301 additions and 1 deletions

View File

@@ -35,6 +35,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts", "kokoro-tts\ko
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts-play", "kokoro-tts-play\kokoro-tts-play.csproj", "{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speech-enhancement-gtcrn", "speech-enhancement-gtcrn\speech-enhancement-gtcrn.csproj", "{DF2569C6-6011-4716-9538-F9E9069E00EB}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -105,6 +107,10 @@ Global
{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.Build.0 = Release|Any CPU
{DF2569C6-6011-4716-9538-F9E9069E00EB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{DF2569C6-6011-4716-9538-F9E9069E00EB}.Debug|Any CPU.Build.0 = Debug|Any CPU
{DF2569C6-6011-4716-9538-F9E9069E00EB}.Release|Any CPU.ActiveCfg = Release|Any CPU
{DF2569C6-6011-4716-9538-F9E9069E00EB}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

View File

@@ -0,0 +1,45 @@
// Copyright (c) 2025 Xiaomi Corporation
//
// This file shows how to use speech enhancement API with GTCRN models.
//
// 1. Download a model from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
//
// 2. Download a test file
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
//
// 3. Now run it
//
// dotnet run
using SherpaOnnx;
class OfflineSpeechEnhancementDemo
{
static void Main(string[] args)
{
var config = new OfflineSpeechDenoiserConfig();
config.Model.Gtcrn.Model = "./gtcrn_simple.onnx";
config.Model.Debug = 1;
config.Model.NumThreads = 1;
var sd = new OfflineSpeechDenoiser(config);
WaveReader waveReader = new WaveReader("./inp_16k.wav");
var denoisedAudio = sd.Run(waveReader.Samples, waveReader.SampleRate);
var outputFilename = "./enhanced-16k.wav";
var ok = denoisedAudio.SaveToWaveFile(outputFilename);
if (ok)
{
Console.WriteLine($"Wrote to {outputFilename} succeeded!");
}
else
{
Console.WriteLine($"Failed to write {outputFilename}");
}
}
}

View File

@@ -0,0 +1,12 @@
#!/usr/bin/env bash
set -ex
if [ ! -f ./gtcrn_simple.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
fi
if [ ! -f ./inp_16k.wav ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi
dotnet run

View File

@@ -0,0 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<RootNamespace>speech_enhancement_gtcrn</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\Common\Common.csproj" />
</ItemGroup>
</Project>