Wrap VAD APIs to C# (#946)
This commit is contained in:
@@ -17,7 +17,6 @@ using System;
|
||||
|
||||
class OfflinePunctuationDemo
|
||||
{
|
||||
|
||||
static void Main(string[] args)
|
||||
{
|
||||
var config = new OfflinePunctuationConfig();
|
||||
@@ -42,4 +41,3 @@ class OfflinePunctuationDemo
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -21,6 +21,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speaker-identification", "s
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-punctuation", "offline-punctuation\offline-punctuation.csproj", "{42D85582-BB63-4259-A4EA-837D66AC078B}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "vad-non-streaming-asr-paraformer", "vad-non-streaming-asr-paraformer\vad-non-streaming-asr-paraformer.csproj", "{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
@@ -66,5 +68,9 @@ Global
|
||||
{42D85582-BB63-4259-A4EA-837D66AC078B}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{42D85582-BB63-4259-A4EA-837D66AC078B}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{42D85582-BB63-4259-A4EA-837D66AC078B}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
|
||||
62
dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs
Normal file
62
dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs
Normal file
@@ -0,0 +1,62 @@
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
//
|
||||
// This file shows how to use a silero_vad model with a non-streaming Paraformer
|
||||
// for speech recognition.
|
||||
using SherpaOnnx;
|
||||
using System.Collections.Generic;
|
||||
using System;
|
||||
|
||||
class VadNonStreamingAsrParaformer
|
||||
{
|
||||
static void Main(string[] args)
|
||||
{
|
||||
// please download model files from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||
OfflineRecognizerConfig config = new OfflineRecognizerConfig();
|
||||
config.ModelConfig.Paraformer.Model = "./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx";
|
||||
config.ModelConfig.Tokens = "./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt";
|
||||
config.ModelConfig.Debug = 0;
|
||||
OfflineRecognizer recognizer = new OfflineRecognizer(config);
|
||||
|
||||
VadModelConfig vadModelConfig = new VadModelConfig();
|
||||
vadModelConfig.SileroVad.Model = "./silero_vad.onnx";
|
||||
vadModelConfig.Debug = 0;
|
||||
|
||||
VoiceActivityDetector vad = new VoiceActivityDetector(vadModelConfig, 60);
|
||||
|
||||
string testWaveFilename = "./lei-jun-test.wav";
|
||||
WaveReader reader = new WaveReader(testWaveFilename);
|
||||
|
||||
int numSamples = reader.Samples.Length;
|
||||
int windowSize = vadModelConfig.SileroVad.WindowSize;
|
||||
int sampleRate = vadModelConfig.SampleRate;
|
||||
int numIter = numSamples / windowSize;
|
||||
|
||||
for (int i = 0; i != numIter; ++i) {
|
||||
int start = i * windowSize;
|
||||
float[] samples = new float[windowSize];
|
||||
Array.Copy(reader.Samples, start, samples, 0, windowSize);
|
||||
vad.AcceptWaveform(samples);
|
||||
if (vad.IsSpeechDetected()) {
|
||||
while (!vad.IsEmpty()) {
|
||||
SpeechSegment segment = vad.Front();
|
||||
float startTime = segment.Start / (float)sampleRate;
|
||||
float duration = segment.Samples.Length / (float)sampleRate;
|
||||
|
||||
OfflineStream stream = recognizer.CreateStream();
|
||||
stream.AcceptWaveform(sampleRate, segment.Samples);
|
||||
recognizer.Decode(stream);
|
||||
String text = stream.Result.Text;
|
||||
|
||||
if (!String.IsNullOrEmpty(text)) {
|
||||
Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime),
|
||||
String.Format("{0:0.00}", startTime+duration), text);
|
||||
}
|
||||
|
||||
vad.Pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
1
dotnet-examples/vad-non-streaming-asr-paraformer/WaveReader.cs
Symbolic link
1
dotnet-examples/vad-non-streaming-asr-paraformer/WaveReader.cs
Symbolic link
@@ -0,0 +1 @@
|
||||
../online-decode-files/WaveReader.cs
|
||||
20
dotnet-examples/vad-non-streaming-asr-paraformer/run.sh
Executable file
20
dotnet-examples/vad-non-streaming-asr-paraformer/run.sh
Executable file
@@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [ ! -f ./silero_vad.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||
fi
|
||||
|
||||
if [ ! -f ./lei-jun-test.wav ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
|
||||
fi
|
||||
|
||||
if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
||||
|
||||
tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
||||
rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
||||
fi
|
||||
|
||||
dotnet run
|
||||
@@ -0,0 +1,15 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net6.0</TargetFramework>
|
||||
<RootNamespace>vad_non_streaming_asr_paraformer</RootNamespace>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
Reference in New Issue
Block a user