Upgraded to .NET 8 and made code style a little more internally consistent. (#1680)

This commit is contained in:
Michael Lamothe
2025-01-04 19:39:06 +11:00
committed by GitHub
parent bf3330c906
commit 8a60985363
29 changed files with 354 additions and 404 deletions

View File

@@ -3,8 +3,6 @@
// This file shows how to use a silero_vad model with a non-streaming Paraformer
// for speech recognition.
using SherpaOnnx;
using System.Collections.Generic;
using System;
class VadNonStreamingAsrParaformer
{
@@ -12,45 +10,49 @@ class VadNonStreamingAsrParaformer
{
// please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
OfflineRecognizerConfig config = new OfflineRecognizerConfig();
var config = new OfflineRecognizerConfig();
config.ModelConfig.Paraformer.Model = "./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx";
config.ModelConfig.Tokens = "./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt";
config.ModelConfig.Debug = 0;
OfflineRecognizer recognizer = new OfflineRecognizer(config);
var recognizer = new OfflineRecognizer(config);
VadModelConfig vadModelConfig = new VadModelConfig();
var vadModelConfig = new VadModelConfig();
vadModelConfig.SileroVad.Model = "./silero_vad.onnx";
vadModelConfig.Debug = 0;
VoiceActivityDetector vad = new VoiceActivityDetector(vadModelConfig, 60);
var vad = new VoiceActivityDetector(vadModelConfig, 60);
string testWaveFilename = "./lei-jun-test.wav";
WaveReader reader = new WaveReader(testWaveFilename);
var testWaveFilename = "./lei-jun-test.wav";
var reader = new WaveReader(testWaveFilename);
int numSamples = reader.Samples.Length;
int windowSize = vadModelConfig.SileroVad.WindowSize;
int sampleRate = vadModelConfig.SampleRate;
int numIter = numSamples / windowSize;
for (int i = 0; i != numIter; ++i) {
for (int i = 0; i != numIter; ++i)
{
int start = i * windowSize;
float[] samples = new float[windowSize];
var samples = new float[windowSize];
Array.Copy(reader.Samples, start, samples, 0, windowSize);
vad.AcceptWaveform(samples);
if (vad.IsSpeechDetected()) {
while (!vad.IsEmpty()) {
if (vad.IsSpeechDetected())
{
while (!vad.IsEmpty())
{
SpeechSegment segment = vad.Front();
float startTime = segment.Start / (float)sampleRate;
float duration = segment.Samples.Length / (float)sampleRate;
var startTime = segment.Start / (float)sampleRate;
var duration = segment.Samples.Length / (float)sampleRate;
OfflineStream stream = recognizer.CreateStream();
stream.AcceptWaveform(sampleRate, segment.Samples);
recognizer.Decode(stream);
String text = stream.Result.Text;
var text = stream.Result.Text;
if (!String.IsNullOrEmpty(text)) {
Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime),
String.Format("{0:0.00}", startTime+duration), text);
if (!string.IsNullOrEmpty(text))
{
Console.WriteLine("{0}--{1}: {2}", string.Format("{0:0.00}", startTime),
string.Format("{0:0.00}", startTime + duration), text);
}
vad.Pop();
@@ -60,19 +62,21 @@ class VadNonStreamingAsrParaformer
vad.Flush();
while (!vad.IsEmpty()) {
SpeechSegment segment = vad.Front();
while (!vad.IsEmpty())
{
var segment = vad.Front();
float startTime = segment.Start / (float)sampleRate;
float duration = segment.Samples.Length / (float)sampleRate;
OfflineStream stream = recognizer.CreateStream();
var stream = recognizer.CreateStream();
stream.AcceptWaveform(sampleRate, segment.Samples);
recognizer.Decode(stream);
String text = stream.Result.Text;
var text = stream.Result.Text;
if (!String.IsNullOrEmpty(text)) {
Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime),
String.Format("{0:0.00}", startTime+duration), text);
if (!string.IsNullOrEmpty(text))
{
Console.WriteLine("{0}--{1}: {2}", string.Format("{0:0.00}", startTime),
string.Format("{0:0.00}", startTime + duration), text);
}
vad.Pop();