Wrap VAD APIs to C# (#946)

This commit is contained in:
Fangjun Kuang
2024-05-30 18:29:38 +08:00
committed by GitHub
parent a99c7cb35c
commit 0196f1d546
17 changed files with 474 additions and 10 deletions

View File

@@ -17,7 +17,6 @@ using System;
class OfflinePunctuationDemo
{
static void Main(string[] args)
{
var config = new OfflinePunctuationConfig();
@@ -42,4 +41,3 @@ class OfflinePunctuationDemo
}
}
}

View File

@@ -21,6 +21,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speaker-identification", "s
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-punctuation", "offline-punctuation\offline-punctuation.csproj", "{42D85582-BB63-4259-A4EA-837D66AC078B}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "vad-non-streaming-asr-paraformer", "vad-non-streaming-asr-paraformer\vad-non-streaming-asr-paraformer.csproj", "{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -66,5 +68,9 @@ Global
{42D85582-BB63-4259-A4EA-837D66AC078B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{42D85582-BB63-4259-A4EA-837D66AC078B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{42D85582-BB63-4259-A4EA-837D66AC078B}.Release|Any CPU.Build.0 = Release|Any CPU
{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Debug|Any CPU.Build.0 = Debug|Any CPU
{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Release|Any CPU.ActiveCfg = Release|Any CPU
{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal

View File

@@ -0,0 +1,62 @@
// Copyright (c) 2024 Xiaomi Corporation
//
// This file shows how to use a silero_vad model with a non-streaming Paraformer
// for speech recognition.
using SherpaOnnx;
using System.Collections.Generic;
using System;
class VadNonStreamingAsrParaformer
{
static void Main(string[] args)
{
// please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
OfflineRecognizerConfig config = new OfflineRecognizerConfig();
config.ModelConfig.Paraformer.Model = "./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx";
config.ModelConfig.Tokens = "./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt";
config.ModelConfig.Debug = 0;
OfflineRecognizer recognizer = new OfflineRecognizer(config);
VadModelConfig vadModelConfig = new VadModelConfig();
vadModelConfig.SileroVad.Model = "./silero_vad.onnx";
vadModelConfig.Debug = 0;
VoiceActivityDetector vad = new VoiceActivityDetector(vadModelConfig, 60);
string testWaveFilename = "./lei-jun-test.wav";
WaveReader reader = new WaveReader(testWaveFilename);
int numSamples = reader.Samples.Length;
int windowSize = vadModelConfig.SileroVad.WindowSize;
int sampleRate = vadModelConfig.SampleRate;
int numIter = numSamples / windowSize;
for (int i = 0; i != numIter; ++i) {
int start = i * windowSize;
float[] samples = new float[windowSize];
Array.Copy(reader.Samples, start, samples, 0, windowSize);
vad.AcceptWaveform(samples);
if (vad.IsSpeechDetected()) {
while (!vad.IsEmpty()) {
SpeechSegment segment = vad.Front();
float startTime = segment.Start / (float)sampleRate;
float duration = segment.Samples.Length / (float)sampleRate;
OfflineStream stream = recognizer.CreateStream();
stream.AcceptWaveform(sampleRate, segment.Samples);
recognizer.Decode(stream);
String text = stream.Result.Text;
if (!String.IsNullOrEmpty(text)) {
Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime),
String.Format("{0:0.00}", startTime+duration), text);
}
vad.Pop();
}
}
}
}
}

View File

@@ -0,0 +1 @@
../online-decode-files/WaveReader.cs

View File

@@ -0,0 +1,20 @@
#!/usr/bin/env bash
set -ex
if [ ! -f ./silero_vad.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi
if [ ! -f ./lei-jun-test.wav ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi
if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
fi
dotnet run

View File

@@ -0,0 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<RootNamespace>vad_non_streaming_asr_paraformer</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
</ItemGroup>
</Project>