Upgraded to .NET 8 and made code style a little more internally consistent. (#1680)

This commit is contained in:
Michael Lamothe
2025-01-04 19:39:06 +11:00
committed by GitHub
parent bf3330c906
commit 8a60985363
29 changed files with 354 additions and 404 deletions

View File

@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk"> <Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup> <PropertyGroup>
<TargetFramework>net6.0</TargetFramework> <TargetFramework>net8.0</TargetFramework>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks> <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>

View File

@@ -4,25 +4,24 @@ using System.IO;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
namespace SherpaOnnx namespace SherpaOnnx;
{
[StructLayout(LayoutKind.Sequential)] [StructLayout(LayoutKind.Sequential)]
public struct WaveHeader public struct WaveHeader
{ {
public Int32 ChunkID; public int ChunkID;
public Int32 ChunkSize; public int ChunkSize;
public Int32 Format; public int Format;
public Int32 SubChunk1ID; public int SubChunk1ID;
public Int32 SubChunk1Size; public int SubChunk1Size;
public Int16 AudioFormat; public short AudioFormat;
public Int16 NumChannels; public short NumChannels;
public Int32 SampleRate; public int SampleRate;
public Int32 ByteRate; public int ByteRate;
public Int16 BlockAlign; public short BlockAlign;
public Int16 BitsPerSample; public short BitsPerSample;
public Int32 SubChunk2ID; public int SubChunk2ID;
public Int32 SubChunk2Size; public int SubChunk2Size;
public bool Validate() public bool Validate()
{ {
@@ -90,17 +89,16 @@ namespace SherpaOnnx
// The sample rate can be any value. // The sample rate can be any value.
public class WaveReader public class WaveReader
{ {
public WaveReader(String fileName) public WaveReader(string fileName)
{ {
if (!File.Exists(fileName)) if (!File.Exists(fileName))
{ {
throw new ApplicationException($"{fileName} does not exist!"); throw new ApplicationException($"{fileName} does not exist!");
} }
using (var stream = File.Open(fileName, FileMode.Open)) using var stream = File.Open(fileName, FileMode.Open);
{ using var reader = new BinaryReader(stream);
using (var reader = new BinaryReader(stream))
{
_header = ReadHeader(reader); _header = ReadHeader(reader);
if (!_header.Validate()) if (!_header.Validate())
@@ -113,8 +111,8 @@ namespace SherpaOnnx
// now read samples // now read samples
// _header.SubChunk2Size contains number of bytes in total. // _header.SubChunk2Size contains number of bytes in total.
// we assume each sample is of type int16 // we assume each sample is of type int16
byte[] buffer = reader.ReadBytes(_header.SubChunk2Size); var buffer = reader.ReadBytes(_header.SubChunk2Size);
short[] samples_int16 = new short[_header.SubChunk2Size / 2]; var samples_int16 = new short[_header.SubChunk2Size / 2];
Buffer.BlockCopy(buffer, 0, samples_int16, 0, buffer.Length); Buffer.BlockCopy(buffer, 0, samples_int16, 0, buffer.Length);
_samples = new float[samples_int16.Length]; _samples = new float[samples_int16.Length];
@@ -124,12 +122,10 @@ namespace SherpaOnnx
_samples[i] = samples_int16[i] / 32768.0F; _samples[i] = samples_int16[i] / 32768.0F;
} }
} }
}
}
private static WaveHeader ReadHeader(BinaryReader reader) private static WaveHeader ReadHeader(BinaryReader reader)
{ {
byte[] bytes = reader.ReadBytes(Marshal.SizeOf(typeof(WaveHeader))); var bytes = reader.ReadBytes(Marshal.SizeOf(typeof(WaveHeader)));
GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned); GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned);
WaveHeader header = (WaveHeader)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(WaveHeader))!; WaveHeader header = (WaveHeader)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(WaveHeader))!;
@@ -142,8 +138,8 @@ namespace SherpaOnnx
{ {
var bs = reader.BaseStream; var bs = reader.BaseStream;
Int32 subChunk2ID = _header.SubChunk2ID; var subChunk2ID = _header.SubChunk2ID;
Int32 subChunk2Size = _header.SubChunk2Size; var subChunk2Size = _header.SubChunk2Size;
while (bs.Position != bs.Length && subChunk2ID != 0x61746164) while (bs.Position != bs.Length && subChunk2ID != 0x61746164)
{ {
@@ -161,14 +157,13 @@ namespace SherpaOnnx
private float[] _samples; private float[] _samples;
public int SampleRate => _header.SampleRate; public int SampleRate => _header.SampleRate;
public float[] Samples => _samples; public float[] Samples => _samples;
public static void Test(String fileName) public static void Test(string fileName)
{ {
WaveReader reader = new WaveReader(fileName); WaveReader reader = new WaveReader(fileName);
Console.WriteLine($"samples length: {reader.Samples.Length}"); Console.WriteLine($"samples length: {reader.Samples.Length}");
Console.WriteLine($"samples rate: {reader.SampleRate}"); Console.WriteLine($"samples rate: {reader.SampleRate}");
} }
} }
}

View File

@@ -13,8 +13,6 @@
// dotnet run // dotnet run
using SherpaOnnx; using SherpaOnnx;
using System.Collections.Generic;
using System;
class KeywordSpotterDemo class KeywordSpotterDemo
{ {
@@ -38,11 +36,11 @@ class KeywordSpotterDemo
var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"; var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav";
WaveReader waveReader = new WaveReader(filename); var waveReader = new WaveReader(filename);
Console.WriteLine("----------Use pre-defined keywords----------"); Console.WriteLine("----------Use pre-defined keywords----------");
OnlineStream s = kws.CreateStream(); var s = kws.CreateStream();
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)]; float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
@@ -53,7 +51,7 @@ class KeywordSpotterDemo
{ {
kws.Decode(s); kws.Decode(s);
var result = kws.GetResult(s); var result = kws.GetResult(s);
if (result.Keyword != "") if (result.Keyword != string.Empty)
{ {
Console.WriteLine("Detected: {0}", result.Keyword); Console.WriteLine("Detected: {0}", result.Keyword);
} }
@@ -70,7 +68,7 @@ class KeywordSpotterDemo
{ {
kws.Decode(s); kws.Decode(s);
var result = kws.GetResult(s); var result = kws.GetResult(s);
if (result.Keyword != "") if (result.Keyword != string.Empty)
{ {
Console.WriteLine("Detected: {0}", result.Keyword); Console.WriteLine("Detected: {0}", result.Keyword);
} }
@@ -89,7 +87,7 @@ class KeywordSpotterDemo
{ {
kws.Decode(s); kws.Decode(s);
var result = kws.GetResult(s); var result = kws.GetResult(s);
if (result.Keyword != "") if (result.Keyword != string.Empty)
{ {
Console.WriteLine("Detected: {0}", result.Keyword); Console.WriteLine("Detected: {0}", result.Keyword);
} }

View File

@@ -2,7 +2,7 @@
<PropertyGroup> <PropertyGroup>
<OutputType>Exe</OutputType> <OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework> <TargetFramework>net8.0</TargetFramework>
<RootNamespace>keyword_spotting_from_files</RootNamespace> <RootNamespace>keyword_spotting_from_files</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>

View File

@@ -12,12 +12,9 @@
// //
// dotnet run // dotnet run
using SherpaOnnx;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System;
using PortAudioSharp; using PortAudioSharp;
using SherpaOnnx;
using System.Runtime.InteropServices;
class KeywordSpotterDemo class KeywordSpotterDemo
{ {
@@ -41,11 +38,11 @@ class KeywordSpotterDemo
var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"; var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav";
WaveReader waveReader = new WaveReader(filename); var waveReader = new WaveReader(filename);
Console.WriteLine("----------Use pre-defined keywords----------"); Console.WriteLine("----------Use pre-defined keywords----------");
OnlineStream s = kws.CreateStream(); var s = kws.CreateStream();
Console.WriteLine(PortAudio.VersionInfo.versionText); Console.WriteLine(PortAudio.VersionInfo.versionText);
PortAudio.Initialize(); PortAudio.Initialize();
@@ -54,7 +51,7 @@ class KeywordSpotterDemo
for (int i = 0; i != PortAudio.DeviceCount; ++i) for (int i = 0; i != PortAudio.DeviceCount; ++i)
{ {
Console.WriteLine($" Device {i}"); Console.WriteLine($" Device {i}");
DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i); var deviceInfo = PortAudio.GetDeviceInfo(i);
Console.WriteLine($" Name: {deviceInfo.name}"); Console.WriteLine($" Name: {deviceInfo.name}");
Console.WriteLine($" Max input channels: {deviceInfo.maxInputChannels}"); Console.WriteLine($" Max input channels: {deviceInfo.maxInputChannels}");
Console.WriteLine($" Default sample rate: {deviceInfo.defaultSampleRate}"); Console.WriteLine($" Default sample rate: {deviceInfo.defaultSampleRate}");
@@ -66,12 +63,12 @@ class KeywordSpotterDemo
Environment.Exit(1); Environment.Exit(1);
} }
DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex); var info = PortAudio.GetDeviceInfo(deviceIndex);
Console.WriteLine(); Console.WriteLine();
Console.WriteLine($"Use default device {deviceIndex} ({info.name})"); Console.WriteLine($"Use default device {deviceIndex} ({info.name})");
StreamParameters param = new StreamParameters(); var param = new StreamParameters();
param.device = deviceIndex; param.device = deviceIndex;
param.channelCount = 1; param.channelCount = 1;
param.sampleFormat = SampleFormat.Float32; param.sampleFormat = SampleFormat.Float32;
@@ -79,21 +76,21 @@ class KeywordSpotterDemo
param.hostApiSpecificStreamInfo = IntPtr.Zero; param.hostApiSpecificStreamInfo = IntPtr.Zero;
PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output, PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output,
UInt32 frameCount, uint frameCount,
ref StreamCallbackTimeInfo timeInfo, ref StreamCallbackTimeInfo timeInfo,
StreamCallbackFlags statusFlags, StreamCallbackFlags statusFlags,
IntPtr userData IntPtr userData
) => ) =>
{ {
float[] samples = new float[frameCount]; var samples = new float[frameCount];
Marshal.Copy(input, samples, 0, (Int32)frameCount); Marshal.Copy(input, samples, 0, (int)frameCount);
s.AcceptWaveform(config.FeatConfig.SampleRate, samples); s.AcceptWaveform(config.FeatConfig.SampleRate, samples);
return StreamCallbackResult.Continue; return StreamCallbackResult.Continue;
}; };
PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: config.FeatConfig.SampleRate, var stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: config.FeatConfig.SampleRate,
framesPerBuffer: 0, framesPerBuffer: 0,
streamFlags: StreamFlags.ClipOff, streamFlags: StreamFlags.ClipOff,
callback: callback, callback: callback,
@@ -113,15 +110,13 @@ class KeywordSpotterDemo
} }
var result = kws.GetResult(s); var result = kws.GetResult(s);
if (result.Keyword != "") if (result.Keyword != string.Empty)
{ {
Console.WriteLine("Detected: {0}", result.Keyword); Console.WriteLine("Detected: {0}", result.Keyword);
} }
Thread.Sleep(200); // ms Thread.Sleep(200); // ms
} }
PortAudio.Terminate();
} }
} }

View File

@@ -2,7 +2,7 @@
<PropertyGroup> <PropertyGroup>
<OutputType>Exe</OutputType> <OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework> <TargetFramework>net8.0</TargetFramework>
<RootNamespace>keyword_spotting_from_microphone</RootNamespace> <RootNamespace>keyword_spotting_from_microphone</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>

View File

@@ -5,17 +5,14 @@
// Please refer to // Please refer to
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
// to download non-streaming models // to download non-streaming models
using CommandLine.Text;
using CommandLine; using CommandLine;
using CommandLine.Text;
using SherpaOnnx; using SherpaOnnx;
using System.Collections.Generic;
using System;
class OfflineDecodeFiles class OfflineDecodeFiles
{ {
class Options class Options
{ {
[Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")] [Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")]
public int SampleRate { get; set; } = 16000; public int SampleRate { get; set; } = 16000;
@@ -23,58 +20,58 @@ class OfflineDecodeFiles
public int FeatureDim { get; set; } = 80; public int FeatureDim { get; set; } = 80;
[Option(Required = false, HelpText = "Path to tokens.txt")] [Option(Required = false, HelpText = "Path to tokens.txt")]
public string Tokens { get; set; } = ""; public string Tokens { get; set; } = string.Empty;
[Option(Required = false, Default = "", HelpText = "Path to transducer encoder.onnx. Used only for transducer models")] [Option(Required = false, Default = "", HelpText = "Path to transducer encoder.onnx. Used only for transducer models")]
public string Encoder { get; set; } = ""; public string Encoder { get; set; } = string.Empty;
[Option(Required = false, Default = "", HelpText = "Path to transducer decoder.onnx. Used only for transducer models")] [Option(Required = false, Default = "", HelpText = "Path to transducer decoder.onnx. Used only for transducer models")]
public string Decoder { get; set; } = ""; public string Decoder { get; set; } = string.Empty;
[Option(Required = false, Default = "", HelpText = "Path to transducer joiner.onnx. Used only for transducer models")] [Option(Required = false, Default = "", HelpText = "Path to transducer joiner.onnx. Used only for transducer models")]
public string Joiner { get; set; } = ""; public string Joiner { get; set; } = string.Empty;
[Option("model-type", Required = false, Default = "", HelpText = "model type")] [Option("model-type", Required = false, Default = "", HelpText = "model type")]
public string ModelType { get; set; } = ""; public string ModelType { get; set; } = string.Empty;
[Option("whisper-encoder", Required = false, Default = "", HelpText = "Path to whisper encoder.onnx. Used only for whisper models")] [Option("whisper-encoder", Required = false, Default = "", HelpText = "Path to whisper encoder.onnx. Used only for whisper models")]
public string WhisperEncoder { get; set; } = ""; public string WhisperEncoder { get; set; } = string.Empty;
[Option("whisper-decoder", Required = false, Default = "", HelpText = "Path to whisper decoder.onnx. Used only for whisper models")] [Option("whisper-decoder", Required = false, Default = "", HelpText = "Path to whisper decoder.onnx. Used only for whisper models")]
public string WhisperDecoder { get; set; } = ""; public string WhisperDecoder { get; set; } = string.Empty;
[Option("whisper-language", Required = false, Default = "", HelpText = "Language of the input file. Can be empty")] [Option("whisper-language", Required = false, Default = "", HelpText = "Language of the input file. Can be empty")]
public string WhisperLanguage { get; set; } = ""; public string WhisperLanguage { get; set; } = string.Empty;
[Option("whisper-task", Required = false, Default = "transcribe", HelpText = "transcribe or translate")] [Option("whisper-task", Required = false, Default = "transcribe", HelpText = "transcribe or translate")]
public string WhisperTask { get; set; } = "transcribe"; public string WhisperTask { get; set; } = "transcribe";
[Option("moonshine-preprocessor", Required = false, Default = "", HelpText = "Path to preprocess.onnx. Used only for Moonshine models")] [Option("moonshine-preprocessor", Required = false, Default = "", HelpText = "Path to preprocess.onnx. Used only for Moonshine models")]
public string MoonshinePreprocessor { get; set; } = ""; public string MoonshinePreprocessor { get; set; } = string.Empty;
[Option("moonshine-encoder", Required = false, Default = "", HelpText = "Path to encode.onnx. Used only for Moonshine models")] [Option("moonshine-encoder", Required = false, Default = "", HelpText = "Path to encode.onnx. Used only for Moonshine models")]
public string MoonshineEncoder { get; set; } = ""; public string MoonshineEncoder { get; set; } = string.Empty;
[Option("moonshine-uncached-decoder", Required = false, Default = "", HelpText = "Path to uncached_decode.onnx. Used only for Moonshine models")] [Option("moonshine-uncached-decoder", Required = false, Default = "", HelpText = "Path to uncached_decode.onnx. Used only for Moonshine models")]
public string MoonshineUncachedDecoder { get; set; } = ""; public string MoonshineUncachedDecoder { get; set; } = string.Empty;
[Option("moonshine-cached-decoder", Required = false, Default = "", HelpText = "Path to cached_decode.onnx. Used only for Moonshine models")] [Option("moonshine-cached-decoder", Required = false, Default = "", HelpText = "Path to cached_decode.onnx. Used only for Moonshine models")]
public string MoonshineCachedDecoder { get; set; } = ""; public string MoonshineCachedDecoder { get; set; } = string.Empty;
[Option("tdnn-model", Required = false, Default = "", HelpText = "Path to tdnn yesno model")] [Option("tdnn-model", Required = false, Default = "", HelpText = "Path to tdnn yesno model")]
public string TdnnModel { get; set; } = ""; public string TdnnModel { get; set; } = string.Empty;
[Option(Required = false, HelpText = "Path to model.onnx. Used only for paraformer models")] [Option(Required = false, HelpText = "Path to model.onnx. Used only for paraformer models")]
public string Paraformer { get; set; } = ""; public string Paraformer { get; set; } = string.Empty;
[Option("nemo-ctc", Required = false, HelpText = "Path to model.onnx. Used only for NeMo CTC models")] [Option("nemo-ctc", Required = false, HelpText = "Path to model.onnx. Used only for NeMo CTC models")]
public string NeMoCtc { get; set; } = ""; public string NeMoCtc { get; set; } = string.Empty;
[Option("telespeech-ctc", Required = false, HelpText = "Path to model.onnx. Used only for TeleSpeech CTC models")] [Option("telespeech-ctc", Required = false, HelpText = "Path to model.onnx. Used only for TeleSpeech CTC models")]
public string TeleSpeechCtc { get; set; } = ""; public string TeleSpeechCtc { get; set; } = string.Empty;
[Option("sense-voice-model", Required = false, HelpText = "Path to model.onnx. Used only for SenseVoice CTC models")] [Option("sense-voice-model", Required = false, HelpText = "Path to model.onnx. Used only for SenseVoice CTC models")]
public string SenseVoiceModel { get; set; } = ""; public string SenseVoiceModel { get; set; } = string.Empty;
[Option("sense-voice-use-itn", Required = false, HelpText = "1 to use inverse text normalization for sense voice.")] [Option("sense-voice-use-itn", Required = false, HelpText = "1 to use inverse text normalization for sense voice.")]
public int SenseVoiceUseItn { get; set; } = 1; public int SenseVoiceUseItn { get; set; } = 1;
@@ -88,7 +85,7 @@ class OfflineDecodeFiles
[Option("rule-fsts", Required = false, Default = "", [Option("rule-fsts", Required = false, Default = "",
HelpText = "If not empty, path to rule fst for inverse text normalization")] HelpText = "If not empty, path to rule fst for inverse text normalization")]
public string RuleFsts { get; set; } = ""; public string RuleFsts { get; set; } = string.Empty;
[Option("max-active-paths", Required = false, Default = 4, [Option("max-active-paths", Required = false, Default = 4,
HelpText = @"Used only when --decoding--method is modified_beam_search. HelpText = @"Used only when --decoding--method is modified_beam_search.
@@ -96,7 +93,7 @@ It specifies number of active paths to keep during the search")]
public int MaxActivePaths { get; set; } = 4; public int MaxActivePaths { get; set; } = 4;
[Option("hotwords-file", Required = false, Default = "", HelpText = "Path to hotwords.txt")] [Option("hotwords-file", Required = false, Default = "", HelpText = "Path to hotwords.txt")]
public string HotwordsFile { get; set; } = ""; public string HotwordsFile { get; set; } = string.Empty;
[Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")] [Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")]
public float HotwordsScore { get; set; } = 1.5F; public float HotwordsScore { get; set; } = 1.5F;
@@ -117,7 +114,7 @@ It specifies number of active paths to keep during the search")]
private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs) private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
{ {
string usage = @" var usage = @"
# Zipformer # Zipformer
dotnet run \ dotnet run \
@@ -213,42 +210,42 @@ to download pre-trained Tdnn models.
config.ModelConfig.Tokens = options.Tokens; config.ModelConfig.Tokens = options.Tokens;
if (!String.IsNullOrEmpty(options.Encoder)) if (!string.IsNullOrEmpty(options.Encoder))
{ {
// this is a transducer model // this is a transducer model
config.ModelConfig.Transducer.Encoder = options.Encoder; config.ModelConfig.Transducer.Encoder = options.Encoder;
config.ModelConfig.Transducer.Decoder = options.Decoder; config.ModelConfig.Transducer.Decoder = options.Decoder;
config.ModelConfig.Transducer.Joiner = options.Joiner; config.ModelConfig.Transducer.Joiner = options.Joiner;
} }
else if (!String.IsNullOrEmpty(options.Paraformer)) else if (!string.IsNullOrEmpty(options.Paraformer))
{ {
config.ModelConfig.Paraformer.Model = options.Paraformer; config.ModelConfig.Paraformer.Model = options.Paraformer;
} }
else if (!String.IsNullOrEmpty(options.NeMoCtc)) else if (!string.IsNullOrEmpty(options.NeMoCtc))
{ {
config.ModelConfig.NeMoCtc.Model = options.NeMoCtc; config.ModelConfig.NeMoCtc.Model = options.NeMoCtc;
} }
else if (!String.IsNullOrEmpty(options.TeleSpeechCtc)) else if (!string.IsNullOrEmpty(options.TeleSpeechCtc))
{ {
config.ModelConfig.TeleSpeechCtc = options.TeleSpeechCtc; config.ModelConfig.TeleSpeechCtc = options.TeleSpeechCtc;
} }
else if (!String.IsNullOrEmpty(options.WhisperEncoder)) else if (!string.IsNullOrEmpty(options.WhisperEncoder))
{ {
config.ModelConfig.Whisper.Encoder = options.WhisperEncoder; config.ModelConfig.Whisper.Encoder = options.WhisperEncoder;
config.ModelConfig.Whisper.Decoder = options.WhisperDecoder; config.ModelConfig.Whisper.Decoder = options.WhisperDecoder;
config.ModelConfig.Whisper.Language = options.WhisperLanguage; config.ModelConfig.Whisper.Language = options.WhisperLanguage;
config.ModelConfig.Whisper.Task = options.WhisperTask; config.ModelConfig.Whisper.Task = options.WhisperTask;
} }
else if (!String.IsNullOrEmpty(options.TdnnModel)) else if (!string.IsNullOrEmpty(options.TdnnModel))
{ {
config.ModelConfig.Tdnn.Model = options.TdnnModel; config.ModelConfig.Tdnn.Model = options.TdnnModel;
} }
else if (!String.IsNullOrEmpty(options.SenseVoiceModel)) else if (!string.IsNullOrEmpty(options.SenseVoiceModel))
{ {
config.ModelConfig.SenseVoice.Model = options.SenseVoiceModel; config.ModelConfig.SenseVoice.Model = options.SenseVoiceModel;
config.ModelConfig.SenseVoice.UseInverseTextNormalization = options.SenseVoiceUseItn; config.ModelConfig.SenseVoice.UseInverseTextNormalization = options.SenseVoiceUseItn;
} }
else if (!String.IsNullOrEmpty(options.MoonshinePreprocessor)) else if (!string.IsNullOrEmpty(options.MoonshinePreprocessor))
{ {
config.ModelConfig.Moonshine.Preprocessor = options.MoonshinePreprocessor; config.ModelConfig.Moonshine.Preprocessor = options.MoonshinePreprocessor;
config.ModelConfig.Moonshine.Encoder = options.MoonshineEncoder; config.ModelConfig.Moonshine.Encoder = options.MoonshineEncoder;
@@ -270,17 +267,17 @@ to download pre-trained Tdnn models.
config.ModelConfig.Debug = 0; config.ModelConfig.Debug = 0;
OfflineRecognizer recognizer = new OfflineRecognizer(config); var recognizer = new OfflineRecognizer(config);
string[] files = options.Files.ToArray(); var files = options.Files.ToArray();
// We create a separate stream for each file // We create a separate stream for each file
List<OfflineStream> streams = new List<OfflineStream>(); var streams = new List<OfflineStream>();
streams.EnsureCapacity(files.Length); streams.EnsureCapacity(files.Length);
for (int i = 0; i != files.Length; ++i) for (int i = 0; i != files.Length; ++i)
{ {
OfflineStream s = recognizer.CreateStream(); var s = recognizer.CreateStream();
WaveReader waveReader = new WaveReader(files[i]); WaveReader waveReader = new WaveReader(files[i]);
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
@@ -299,7 +296,7 @@ to download pre-trained Tdnn models.
Console.WriteLine("Tokens: [{0}]", string.Join(", ", r.Tokens)); Console.WriteLine("Tokens: [{0}]", string.Join(", ", r.Tokens));
if (r.Timestamps != null && r.Timestamps.Length > 0) { if (r.Timestamps != null && r.Timestamps.Length > 0) {
Console.Write("Timestamps: ["); Console.Write("Timestamps: [");
var sep = ""; var sep = string.Empty;
for (int k = 0; k != r.Timestamps.Length; ++k) for (int k = 0; k != r.Timestamps.Length; ++k)
{ {
Console.Write("{0}{1}", sep, r.Timestamps[k].ToString("0.00")); Console.Write("{0}{1}", sep, r.Timestamps[k].ToString("0.00"));

View File

@@ -2,7 +2,7 @@
<PropertyGroup> <PropertyGroup>
<OutputType>Exe</OutputType> <OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework> <TargetFramework>net8.0</TargetFramework>
<RootNamespace>offline_decode_files</RootNamespace> <RootNamespace>offline_decode_files</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>

View File

@@ -12,8 +12,6 @@
// dotnet run // dotnet run
using SherpaOnnx; using SherpaOnnx;
using System.Collections.Generic;
using System;
class OfflinePunctuationDemo class OfflinePunctuationDemo
{ {
@@ -25,14 +23,14 @@ class OfflinePunctuationDemo
config.Model.NumThreads = 1; config.Model.NumThreads = 1;
var punct = new OfflinePunctuation(config); var punct = new OfflinePunctuation(config);
string[] textList = new string[] { var textList = new string[] {
"这是一个测试你好吗How are you我很好thank you are you ok谢谢你", "这是一个测试你好吗How are you我很好thank you are you ok谢谢你",
"我们都是木头人不会说话不会动", "我们都是木头人不会说话不会动",
"The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry", "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
}; };
Console.WriteLine("---------"); Console.WriteLine("---------");
foreach (string text in textList) foreach (var text in textList)
{ {
string textWithPunct = punct.AddPunct(text); string textWithPunct = punct.AddPunct(text);
Console.WriteLine("Input text: {0}", text); Console.WriteLine("Input text: {0}", text);

View File

@@ -2,7 +2,7 @@
<PropertyGroup> <PropertyGroup>
<OutputType>Exe</OutputType> <OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework> <TargetFramework>net8.0</TargetFramework>
<RootNamespace>offline_punctuation</RootNamespace> <RootNamespace>offline_punctuation</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>

View File

@@ -34,7 +34,6 @@ Step 4. Run it
*/ */
using SherpaOnnx; using SherpaOnnx;
using System;
class OfflineSpeakerDiarizationDemo class OfflineSpeakerDiarizationDemo
{ {
@@ -54,7 +53,7 @@ class OfflineSpeakerDiarizationDemo
var sd = new OfflineSpeakerDiarization(config); var sd = new OfflineSpeakerDiarization(config);
var testWaveFile = "./0-four-speakers-zh.wav"; var testWaveFile = "./0-four-speakers-zh.wav";
WaveReader waveReader = new WaveReader(testWaveFile); var waveReader = new WaveReader(testWaveFile);
if (sd.SampleRate != waveReader.SampleRate) if (sd.SampleRate != waveReader.SampleRate)
{ {
Console.WriteLine($"Expected sample rate: {sd.SampleRate}. Given: {waveReader.SampleRate}"); Console.WriteLine($"Expected sample rate: {sd.SampleRate}. Given: {waveReader.SampleRate}");
@@ -65,19 +64,19 @@ class OfflineSpeakerDiarizationDemo
// var segments = sd.Process(waveReader.Samples); // this one is also ok // var segments = sd.Process(waveReader.Samples); // this one is also ok
var MyProgressCallback = (int numProcessedChunks, int numTotalChunks, IntPtr arg) => var progressCallback = (int numProcessedChunks, int numTotalChunks, IntPtr arg) =>
{ {
float progress = 100.0F * numProcessedChunks / numTotalChunks; var progress = 100.0F * numProcessedChunks / numTotalChunks;
Console.WriteLine("Progress {0}%", String.Format("{0:0.00}", progress)); Console.WriteLine("Progress {0}%", string.Format("{0:0.00}", progress));
return 0; return 0;
}; };
var callback = new OfflineSpeakerDiarizationProgressCallback(MyProgressCallback); var callback = new OfflineSpeakerDiarizationProgressCallback(progressCallback);
var segments = sd.ProcessWithCallback(waveReader.Samples, callback, IntPtr.Zero); var segments = sd.ProcessWithCallback(waveReader.Samples, callback, IntPtr.Zero);
foreach (var s in segments) foreach (var s in segments)
{ {
Console.WriteLine("{0} -- {1} speaker_{2}", String.Format("{0:0.00}", s.Start), String.Format("{0:0.00}", s.End), s.Speaker); Console.WriteLine("{0} -- {1} speaker_{2}", string.Format("{0:0.00}", s.Start), string.Format("{0:0.00}", s.End), s.Speaker);
} }
} }
} }

View File

@@ -2,7 +2,7 @@
<PropertyGroup> <PropertyGroup>
<OutputType>Exe</OutputType> <OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework> <TargetFramework>net8.0</TargetFramework>
<RootNamespace>offline_speaker_diarization</RootNamespace> <RootNamespace>offline_speaker_diarization</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>

View File

@@ -10,15 +10,12 @@
// Note that you need a speaker to run this file since it will play // Note that you need a speaker to run this file since it will play
// the generated audio as it is generating. // the generated audio as it is generating.
using CommandLine.Text;
using CommandLine; using CommandLine;
using CommandLine.Text;
using PortAudioSharp; using PortAudioSharp;
using SherpaOnnx; using SherpaOnnx;
using System.Collections.Concurrent; using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
using System.Threading;
using System;
class OfflineTtsPlayDemo class OfflineTtsPlayDemo
{ {
@@ -26,13 +23,13 @@ class OfflineTtsPlayDemo
{ {
[Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")] [Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")]
public string RuleFsts { get; set; } public string? RuleFsts { get; set; }
[Option("vits-dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")] [Option("vits-dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")]
public string DictDir { get; set; } public string? DictDir { get; set; }
[Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")] [Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
public string DataDir { get; set; } public string? DataDir { get; set; }
[Option("vits-length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")] [Option("vits-length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")]
public float LengthScale { get; set; } public float LengthScale { get; set; }
@@ -44,10 +41,10 @@ class OfflineTtsPlayDemo
public float NoiseScaleW { get; set; } public float NoiseScaleW { get; set; }
[Option("vits-lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")] [Option("vits-lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")]
public string Lexicon { get; set; } public string? Lexicon { get; set; }
[Option("vits-tokens", Required = false, Default = "", HelpText = "Path to tokens.txt")] [Option("vits-tokens", Required = false, Default = "", HelpText = "Path to tokens.txt")]
public string Tokens { get; set; } public string? Tokens { get; set; }
[Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")] [Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")]
public int MaxNumSentences { get; set; } public int MaxNumSentences { get; set; }
@@ -56,16 +53,16 @@ class OfflineTtsPlayDemo
public int Debug { get; set; } public int Debug { get; set; }
[Option("vits-model", Required = true, HelpText = "Path to VITS model")] [Option("vits-model", Required = true, HelpText = "Path to VITS model")]
public string Model { get; set; } public string? Model { get; set; }
[Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")] [Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")]
public int SpeakerId { get; set; } public int SpeakerId { get; set; }
[Option("text", Required = true, HelpText = "Text to synthesize")] [Option("text", Required = true, HelpText = "Text to synthesize")]
public string Text { get; set; } public string? Text { get; set; }
[Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")] [Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")]
public string OutputFilename { get; set; } public string? OutputFilename { get; set; }
} }
static void Main(string[] args) static void Main(string[] args)
@@ -124,10 +121,9 @@ to download more models.
Console.WriteLine(helpText); Console.WriteLine(helpText);
} }
private static void Run(Options options) private static void Run(Options options)
{ {
OfflineTtsConfig config = new OfflineTtsConfig(); var config = new OfflineTtsConfig();
config.Model.Vits.Model = options.Model; config.Model.Vits.Model = options.Model;
config.Model.Vits.Lexicon = options.Lexicon; config.Model.Vits.Lexicon = options.Lexicon;
config.Model.Vits.Tokens = options.Tokens; config.Model.Vits.Tokens = options.Tokens;
@@ -142,10 +138,9 @@ to download more models.
config.RuleFsts = options.RuleFsts; config.RuleFsts = options.RuleFsts;
config.MaxNumSentences = options.MaxNumSentences; config.MaxNumSentences = options.MaxNumSentences;
OfflineTts tts = new OfflineTts(config); var tts = new OfflineTts(config);
float speed = 1.0f / options.LengthScale; var speed = 1.0f / options.LengthScale;
int sid = options.SpeakerId; var sid = options.SpeakerId;
Console.WriteLine(PortAudio.VersionInfo.versionText); Console.WriteLine(PortAudio.VersionInfo.versionText);
PortAudio.Initialize(); PortAudio.Initialize();
@@ -166,11 +161,11 @@ to download more models.
Environment.Exit(1); Environment.Exit(1);
} }
DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex); var info = PortAudio.GetDeviceInfo(deviceIndex);
Console.WriteLine(); Console.WriteLine();
Console.WriteLine($"Use output default device {deviceIndex} ({info.name})"); Console.WriteLine($"Use output default device {deviceIndex} ({info.name})");
StreamParameters param = new StreamParameters(); var param = new StreamParameters();
param.device = deviceIndex; param.device = deviceIndex;
param.channelCount = 1; param.channelCount = 1;
param.sampleFormat = SampleFormat.Float32; param.sampleFormat = SampleFormat.Float32;
@@ -178,7 +173,7 @@ to download more models.
param.hostApiSpecificStreamInfo = IntPtr.Zero; param.hostApiSpecificStreamInfo = IntPtr.Zero;
// https://learn.microsoft.com/en-us/dotnet/standard/collections/thread-safe/blockingcollection-overview // https://learn.microsoft.com/en-us/dotnet/standard/collections/thread-safe/blockingcollection-overview
BlockingCollection<float[]> dataItems = new BlockingCollection<float[]>(); var dataItems = new BlockingCollection<float[]>();
var MyCallback = (IntPtr samples, int n) => var MyCallback = (IntPtr samples, int n) =>
{ {
@@ -193,9 +188,9 @@ to download more models.
return 1; return 1;
}; };
bool playFinished = false; var playFinished = false;
float[] lastSampleArray = null; float[]? lastSampleArray = null;
int lastIndex = 0; // not played int lastIndex = 0; // not played
PortAudioSharp.Stream.Callback playCallback = (IntPtr input, IntPtr output, PortAudioSharp.Stream.Callback playCallback = (IntPtr input, IntPtr output,
@@ -270,10 +265,10 @@ to download more models.
stream.Start(); stream.Start();
OfflineTtsCallback callback = new OfflineTtsCallback(MyCallback); var callback = new OfflineTtsCallback(MyCallback);
OfflineTtsGeneratedAudio audio = tts.GenerateWithCallback(options.Text, speed, sid, callback); var audio = tts.GenerateWithCallback(options.Text, speed, sid, callback);
bool ok = audio.SaveToWaveFile(options.OutputFilename); var ok = audio.SaveToWaveFile(options.OutputFilename);
if (ok) if (ok)
{ {

View File

@@ -2,7 +2,7 @@
<PropertyGroup> <PropertyGroup>
<OutputType>Exe</OutputType> <OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework> <TargetFramework>net8.0</TargetFramework>
<RootNamespace>offline_tts_play</RootNamespace> <RootNamespace>offline_tts_play</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>

View File

@@ -6,28 +6,25 @@
// and // and
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// to download pre-trained models // to download pre-trained models
using CommandLine.Text;
using CommandLine; using CommandLine;
using CommandLine.Text;
using SherpaOnnx; using SherpaOnnx;
using System.Collections.Generic;
using System;
class OfflineTtsDemo class OfflineTtsDemo
{ {
class Options class Options
{ {
[Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")] [Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")]
public string RuleFsts { get; set; } = ""; public string RuleFsts { get; set; } = string.Empty;
[Option("tts-rule-fars", Required = false, Default = "", HelpText = "path to rule.far")] [Option("tts-rule-fars", Required = false, Default = "", HelpText = "path to rule.far")]
public string RuleFars { get; set; } = ""; public string RuleFars { get; set; } = string.Empty;
[Option("vits-dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")] [Option("vits-dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")]
public string DictDir { get; set; } = ""; public string DictDir { get; set; } = string.Empty;
[Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")] [Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
public string DataDir { get; set; } = ""; public string DataDir { get; set; } = string.Empty;
[Option("vits-length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")] [Option("vits-length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")]
public float LengthScale { get; set; } = 1; public float LengthScale { get; set; } = 1;
@@ -39,10 +36,10 @@ class OfflineTtsDemo
public float NoiseScaleW { get; set; } = 0.8F; public float NoiseScaleW { get; set; } = 0.8F;
[Option("vits-lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")] [Option("vits-lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")]
public string Lexicon { get; set; } = ""; public string Lexicon { get; set; } = string.Empty;
[Option("vits-tokens", Required = false, Default = "", HelpText = "Path to tokens.txt")] [Option("vits-tokens", Required = false, Default = "", HelpText = "Path to tokens.txt")]
public string Tokens { get; set; } = ""; public string Tokens { get; set; } = string.Empty;
[Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")] [Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")]
public int MaxNumSentences { get; set; } = 1; public int MaxNumSentences { get; set; } = 1;
@@ -51,13 +48,13 @@ class OfflineTtsDemo
public int Debug { get; set; } = 0; public int Debug { get; set; } = 0;
[Option("vits-model", Required = true, HelpText = "Path to VITS model")] [Option("vits-model", Required = true, HelpText = "Path to VITS model")]
public string Model { get; set; } = ""; public string Model { get; set; } = string.Empty;
[Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")] [Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")]
public int SpeakerId { get; set; } = 0; public int SpeakerId { get; set; } = 0;
[Option("text", Required = true, HelpText = "Text to synthesize")] [Option("text", Required = true, HelpText = "Text to synthesize")]
public string Text { get; set; } = ""; public string Text { get; set; } = string.Empty;
[Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")] [Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")]
public string OutputFilename { get; set; } = "./generated.wav"; public string OutputFilename { get; set; } = "./generated.wav";
@@ -65,7 +62,7 @@ class OfflineTtsDemo
static void Main(string[] args) static void Main(string[] args)
{ {
var parser = new CommandLine.Parser(with => with.HelpWriter = null); var parser = new Parser(with => with.HelpWriter = null);
var parserResult = parser.ParseArguments<Options>(args); var parserResult = parser.ParseArguments<Options>(args);
parserResult parserResult
@@ -75,7 +72,7 @@ class OfflineTtsDemo
private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs) private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
{ {
string usage = @" var usage = @"
# vits-aishell3 # vits-aishell3
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
@@ -122,7 +119,7 @@ to download more models.
private static void Run(Options options) private static void Run(Options options)
{ {
OfflineTtsConfig config = new OfflineTtsConfig(); var config = new OfflineTtsConfig();
config.Model.Vits.Model = options.Model; config.Model.Vits.Model = options.Model;
config.Model.Vits.Lexicon = options.Lexicon; config.Model.Vits.Lexicon = options.Lexicon;
config.Model.Vits.Tokens = options.Tokens; config.Model.Vits.Tokens = options.Tokens;
@@ -138,11 +135,11 @@ to download more models.
config.RuleFars = options.RuleFars; config.RuleFars = options.RuleFars;
config.MaxNumSentences = options.MaxNumSentences; config.MaxNumSentences = options.MaxNumSentences;
OfflineTts tts = new OfflineTts(config); var tts = new OfflineTts(config);
float speed = 1.0f / options.LengthScale; var speed = 1.0f / options.LengthScale;
int sid = options.SpeakerId; var sid = options.SpeakerId;
OfflineTtsGeneratedAudio audio = tts.Generate(options.Text, speed, sid); var audio = tts.Generate(options.Text, speed, sid);
bool ok = audio.SaveToWaveFile(options.OutputFilename); var ok = audio.SaveToWaveFile(options.OutputFilename);
if (ok) if (ok)
{ {

View File

@@ -2,7 +2,7 @@
<PropertyGroup> <PropertyGroup>
<OutputType>Exe</OutputType> <OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework> <TargetFramework>net8.0</TargetFramework>
<RootNamespace>offline_tts</RootNamespace> <RootNamespace>offline_tts</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>

View File

@@ -6,40 +6,37 @@
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html
// to download streaming models // to download streaming models
using CommandLine.Text;
using CommandLine; using CommandLine;
using CommandLine.Text;
using SherpaOnnx; using SherpaOnnx;
using System.Collections.Generic;
using System.Linq;
using System;
class OnlineDecodeFiles class OnlineDecodeFiles
{ {
class Options class Options
{ {
[Option(Required = true, HelpText = "Path to tokens.txt")] [Option(Required = true, HelpText = "Path to tokens.txt")]
public string Tokens { get; set; } = ""; public string Tokens { get; set; } = string.Empty;
[Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")] [Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")]
public string Provider { get; set; } = ""; public string Provider { get; set; } = string.Empty;
[Option(Required = false, HelpText = "Path to transducer encoder.onnx")] [Option(Required = false, HelpText = "Path to transducer encoder.onnx")]
public string Encoder { get; set; } = ""; public string Encoder { get; set; } = string.Empty;
[Option(Required = false, HelpText = "Path to transducer decoder.onnx")] [Option(Required = false, HelpText = "Path to transducer decoder.onnx")]
public string Decoder { get; set; } = ""; public string Decoder { get; set; } = string.Empty;
[Option(Required = false, HelpText = "Path to transducer joiner.onnx")] [Option(Required = false, HelpText = "Path to transducer joiner.onnx")]
public string Joiner { get; set; } = ""; public string Joiner { get; set; } = string.Empty;
[Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")] [Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")]
public string ParaformerEncoder { get; set; } = ""; public string ParaformerEncoder { get; set; } = string.Empty;
[Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")] [Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")]
public string ParaformerDecoder { get; set; } = ""; public string ParaformerDecoder { get; set; } = string.Empty;
[Option("zipformer2-ctc", Required = false, HelpText = "Path to zipformer2 CTC onnx model")] [Option("zipformer2-ctc", Required = false, HelpText = "Path to zipformer2 CTC onnx model")]
public string Zipformer2Ctc { get; set; } = ""; public string Zipformer2Ctc { get; set; } = string.Empty;
[Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")] [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
public int NumThreads { get; set; } = 1; public int NumThreads { get; set; } = 1;
@@ -80,15 +77,14 @@ larger than this value. Used only when --enable-endpoint is true.")]
public float Rule3MinUtteranceLength { get; set; } = 20.0F; public float Rule3MinUtteranceLength { get; set; } = 20.0F;
[Option("hotwords-file", Required = false, Default = "", HelpText = "Path to hotwords.txt")] [Option("hotwords-file", Required = false, Default = "", HelpText = "Path to hotwords.txt")]
public string HotwordsFile { get; set; } = ""; public string HotwordsFile { get; set; } = string.Empty;
[Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")] [Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")]
public float HotwordsScore { get; set; } = 1.5F; public float HotwordsScore { get; set; } = 1.5F;
[Option("rule-fsts", Required = false, Default = "", [Option("rule-fsts", Required = false, Default = "",
HelpText = "If not empty, path to rule fst for inverse text normalization")] HelpText = "If not empty, path to rule fst for inverse text normalization")]
public string RuleFsts { get; set; } = ""; public string RuleFsts { get; set; } = string.Empty;
[Option("files", Required = true, HelpText = "Audio files for decoding")] [Option("files", Required = true, HelpText = "Audio files for decoding")]
public IEnumerable<string> Files { get; set; } = new string[] {}; public IEnumerable<string> Files { get; set; } = new string[] {};
@@ -162,7 +158,7 @@ to download pre-trained streaming models.
private static void Run(Options options) private static void Run(Options options)
{ {
OnlineRecognizerConfig config = new OnlineRecognizerConfig(); var config = new OnlineRecognizerConfig();
config.FeatConfig.SampleRate = options.SampleRate; config.FeatConfig.SampleRate = options.SampleRate;
// All models from icefall using feature dim 80. // All models from icefall using feature dim 80.
@@ -194,22 +190,22 @@ to download pre-trained streaming models.
config.HotwordsScore = options.HotwordsScore; config.HotwordsScore = options.HotwordsScore;
config.RuleFsts = options.RuleFsts; config.RuleFsts = options.RuleFsts;
OnlineRecognizer recognizer = new OnlineRecognizer(config); var recognizer = new OnlineRecognizer(config);
string[] files = options.Files.ToArray(); var files = options.Files.ToArray();
// We create a separate stream for each file // We create a separate stream for each file
List<OnlineStream> streams = new List<OnlineStream>(); var streams = new List<OnlineStream>();
streams.EnsureCapacity(files.Length); streams.EnsureCapacity(files.Length);
for (int i = 0; i != files.Length; ++i) for (int i = 0; i != files.Length; ++i)
{ {
OnlineStream s = recognizer.CreateStream(); var s = recognizer.CreateStream();
WaveReader waveReader = new WaveReader(files[i]); var waveReader = new WaveReader(files[i]);
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)]; var tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
s.AcceptWaveform(waveReader.SampleRate, tailPadding); s.AcceptWaveform(waveReader.SampleRate, tailPadding);
s.InputFinished(); s.InputFinished();
@@ -230,7 +226,7 @@ to download pre-trained streaming models.
// display results // display results
for (int i = 0; i != files.Length; ++i) for (int i = 0; i != files.Length; ++i)
{ {
OnlineRecognizerResult r = recognizer.GetResult(streams[i]); var r = recognizer.GetResult(streams[i]);
var text = r.Text; var text = r.Text;
var tokens = r.Tokens; var tokens = r.Tokens;
Console.WriteLine("--------------------"); Console.WriteLine("--------------------");
@@ -238,7 +234,7 @@ to download pre-trained streaming models.
Console.WriteLine("text: {0}", text); Console.WriteLine("text: {0}", text);
Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens)); Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens));
Console.Write("timestamps: ["); Console.Write("timestamps: [");
r.Timestamps.ToList().ForEach(i => Console.Write(String.Format("{0:0.00}", i) + ", ")); r.Timestamps.ToList().ForEach(i => Console.Write(string.Format("{0:0.00}", i) + ", "));
Console.WriteLine("]"); Console.WriteLine("]");
} }
Console.WriteLine("--------------------"); Console.WriteLine("--------------------");

View File

@@ -2,7 +2,7 @@
<PropertyGroup> <PropertyGroup>
<OutputType>Exe</OutputType> <OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework> <TargetFramework>net8.0</TargetFramework>
<RootNamespace>online_decode_files</RootNamespace> <RootNamespace>online_decode_files</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>

View File

@@ -29,9 +29,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-files
EndProject EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-microphone", "keyword-spotting-from-microphone\keyword-spotting-from-microphone.csproj", "{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}" Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-microphone", "keyword-spotting-from-microphone\keyword-spotting-from-microphone.csproj", "{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}"
EndProject EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TTS", "TTS\TTS.csproj", "{DACE4A18-4FC8-4437-92BF-5A90BA81286C}" Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "offline-speaker-diarization", "offline-speaker-diarization\offline-speaker-diarization.csproj", "{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-speaker-diarization", "offline-speaker-diarization\offline-speaker-diarization.csproj", "{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}"
EndProject EndProject
Global Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -91,10 +89,6 @@ Global
{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Debug|Any CPU.Build.0 = Debug|Any CPU {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Debug|Any CPU.Build.0 = Debug|Any CPU
{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.ActiveCfg = Release|Any CPU {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.ActiveCfg = Release|Any CPU
{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.Build.0 = Release|Any CPU {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.Build.0 = Release|Any CPU
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Debug|Any CPU.Build.0 = Debug|Any CPU
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.ActiveCfg = Release|Any CPU
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.Build.0 = Release|Any CPU
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.Build.0 = Debug|Any CPU {D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.Build.0 = Debug|Any CPU
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.ActiveCfg = Release|Any CPU {D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.ActiveCfg = Release|Any CPU

View File

@@ -16,20 +16,18 @@
// dotnet run // dotnet run
using SherpaOnnx; using SherpaOnnx;
using System.Collections.Generic;
using System;
class SpeakerIdentificationDemo class SpeakerIdentificationDemo
{ {
public static float[] ComputeEmbedding(SpeakerEmbeddingExtractor extractor, String filename) public static float[] ComputeEmbedding(SpeakerEmbeddingExtractor extractor, string filename)
{ {
WaveReader reader = new WaveReader(filename); var reader = new WaveReader(filename);
OnlineStream stream = extractor.CreateStream(); var stream = extractor.CreateStream();
stream.AcceptWaveform(reader.SampleRate, reader.Samples); stream.AcceptWaveform(reader.SampleRate, reader.Samples);
stream.InputFinished(); stream.InputFinished();
float[] embedding = extractor.Compute(stream); var embedding = extractor.Compute(stream);
return embedding; return embedding;
} }
@@ -43,25 +41,25 @@ class SpeakerIdentificationDemo
var manager = new SpeakerEmbeddingManager(extractor.Dim); var manager = new SpeakerEmbeddingManager(extractor.Dim);
string[] spk1Files = var spk1Files =
new string[] { new string[] {
"./sr-data/enroll/fangjun-sr-1.wav", "./sr-data/enroll/fangjun-sr-1.wav",
"./sr-data/enroll/fangjun-sr-2.wav", "./sr-data/enroll/fangjun-sr-2.wav",
"./sr-data/enroll/fangjun-sr-3.wav", "./sr-data/enroll/fangjun-sr-3.wav",
}; };
float[][] spk1Vec = new float[spk1Files.Length][]; var spk1Vec = new float[spk1Files.Length][];
for (int i = 0; i < spk1Files.Length; ++i) for (int i = 0; i < spk1Files.Length; ++i)
{ {
spk1Vec[i] = ComputeEmbedding(extractor, spk1Files[i]); spk1Vec[i] = ComputeEmbedding(extractor, spk1Files[i]);
} }
string[] spk2Files = var spk2Files =
new string[] { new string[] {
"./sr-data/enroll/leijun-sr-1.wav", "./sr-data/enroll/leijun-sr-2.wav", "./sr-data/enroll/leijun-sr-1.wav", "./sr-data/enroll/leijun-sr-2.wav",
}; };
float[][] spk2Vec = new float[spk2Files.Length][]; var spk2Vec = new float[spk2Files.Length][];
for (int i = 0; i < spk2Files.Length; ++i) for (int i = 0; i < spk2Files.Length; ++i)
{ {
@@ -100,14 +98,14 @@ class SpeakerIdentificationDemo
Console.WriteLine("---All speakers---"); Console.WriteLine("---All speakers---");
string[] allSpeakers = manager.GetAllSpeakers(); var allSpeakers = manager.GetAllSpeakers();
foreach (var s in allSpeakers) foreach (var s in allSpeakers)
{ {
Console.WriteLine(s); Console.WriteLine(s);
} }
Console.WriteLine("------------"); Console.WriteLine("------------");
string[] testFiles = var testFiles =
new string[] { new string[] {
"./sr-data/test/fangjun-test-sr-1.wav", "./sr-data/test/fangjun-test-sr-1.wav",
"./sr-data/test/leijun-test-sr-1.wav", "./sr-data/test/leijun-test-sr-1.wav",
@@ -117,9 +115,9 @@ class SpeakerIdentificationDemo
float threshold = 0.6f; float threshold = 0.6f;
foreach (var file in testFiles) foreach (var file in testFiles)
{ {
float[] embedding = ComputeEmbedding(extractor, file); var embedding = ComputeEmbedding(extractor, file);
String name = manager.Search(embedding, threshold); var name = manager.Search(embedding, threshold);
if (name == "") if (name == "")
{ {
name = "<Unknown>"; name = "<Unknown>";

View File

@@ -2,7 +2,7 @@
<PropertyGroup> <PropertyGroup>
<OutputType>Exe</OutputType> <OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework> <TargetFramework>net8.0</TargetFramework>
<RootNamespace>speaker_identification</RootNamespace> <RootNamespace>speaker_identification</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>

View File

@@ -6,47 +6,43 @@
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html
// to download streaming models // to download streaming models
using CommandLine.Text;
using CommandLine; using CommandLine;
using CommandLine.Text;
using PortAudioSharp; using PortAudioSharp;
using System.Threading;
using SherpaOnnx; using SherpaOnnx;
using System.Collections.Generic;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
using System;
class SpeechRecognitionFromMicrophone class SpeechRecognitionFromMicrophone
{ {
class Options class Options
{ {
[Option(Required = true, HelpText = "Path to tokens.txt")] [Option(Required = true, HelpText = "Path to tokens.txt")]
public string Tokens { get; set; } public string? Tokens { get; set; }
[Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")] [Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")]
public string Provider { get; set; } public string? Provider { get; set; }
[Option(Required = false, HelpText = "Path to transducer encoder.onnx")] [Option(Required = false, HelpText = "Path to transducer encoder.onnx")]
public string Encoder { get; set; } public string? Encoder { get; set; }
[Option(Required = false, HelpText = "Path to transducer decoder.onnx")] [Option(Required = false, HelpText = "Path to transducer decoder.onnx")]
public string Decoder { get; set; } public string? Decoder { get; set; }
[Option(Required = false, HelpText = "Path to transducer joiner.onnx")] [Option(Required = false, HelpText = "Path to transducer joiner.onnx")]
public string Joiner { get; set; } public string? Joiner { get; set; }
[Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")] [Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")]
public string ParaformerEncoder { get; set; } public string? ParaformerEncoder { get; set; }
[Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")] [Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")]
public string ParaformerDecoder { get; set; } public string? ParaformerDecoder { get; set; }
[Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")] [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
public int NumThreads { get; set; } public int NumThreads { get; set; }
[Option("decoding-method", Required = false, Default = "greedy_search", [Option("decoding-method", Required = false, Default = "greedy_search",
HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")] HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")]
public string DecodingMethod { get; set; } public string? DecodingMethod { get; set; }
[Option(Required = false, Default = false, HelpText = "True to show model info during loading")] [Option(Required = false, Default = false, HelpText = "True to show model info during loading")]
public bool Debug { get; set; } public bool Debug { get; set; }
@@ -126,7 +122,7 @@ to download pre-trained streaming models.
private static void Run(Options options) private static void Run(Options options)
{ {
OnlineRecognizerConfig config = new OnlineRecognizerConfig(); var config = new OnlineRecognizerConfig();
config.FeatConfig.SampleRate = options.SampleRate; config.FeatConfig.SampleRate = options.SampleRate;
// All models from icefall using feature dim 80. // All models from icefall using feature dim 80.
@@ -153,9 +149,9 @@ to download pre-trained streaming models.
config.Rule2MinTrailingSilence = options.Rule2MinTrailingSilence; config.Rule2MinTrailingSilence = options.Rule2MinTrailingSilence;
config.Rule3MinUtteranceLength = options.Rule3MinUtteranceLength; config.Rule3MinUtteranceLength = options.Rule3MinUtteranceLength;
OnlineRecognizer recognizer = new OnlineRecognizer(config); var recognizer = new OnlineRecognizer(config);
OnlineStream s = recognizer.CreateStream(); var s = recognizer.CreateStream();
Console.WriteLine(PortAudio.VersionInfo.versionText); Console.WriteLine(PortAudio.VersionInfo.versionText);
PortAudio.Initialize(); PortAudio.Initialize();
@@ -176,12 +172,12 @@ to download pre-trained streaming models.
Environment.Exit(1); Environment.Exit(1);
} }
DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex); var info = PortAudio.GetDeviceInfo(deviceIndex);
Console.WriteLine(); Console.WriteLine();
Console.WriteLine($"Use default device {deviceIndex} ({info.name})"); Console.WriteLine($"Use default device {deviceIndex} ({info.name})");
StreamParameters param = new StreamParameters(); var param = new StreamParameters();
param.device = deviceIndex; param.device = deviceIndex;
param.channelCount = 1; param.channelCount = 1;
param.sampleFormat = SampleFormat.Float32; param.sampleFormat = SampleFormat.Float32;
@@ -189,14 +185,14 @@ to download pre-trained streaming models.
param.hostApiSpecificStreamInfo = IntPtr.Zero; param.hostApiSpecificStreamInfo = IntPtr.Zero;
PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output, PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output,
UInt32 frameCount, uint frameCount,
ref StreamCallbackTimeInfo timeInfo, ref StreamCallbackTimeInfo timeInfo,
StreamCallbackFlags statusFlags, StreamCallbackFlags statusFlags,
IntPtr userData IntPtr userData
) => ) =>
{ {
float[] samples = new float[frameCount]; var samples = new float[frameCount];
Marshal.Copy(input, samples, 0, (Int32)frameCount); Marshal.Copy(input, samples, 0, (int)frameCount);
s.AcceptWaveform(options.SampleRate, samples); s.AcceptWaveform(options.SampleRate, samples);
@@ -215,7 +211,7 @@ to download pre-trained streaming models.
stream.Start(); stream.Start();
String lastText = ""; var lastText = string.Empty;
int segmentIndex = 0; int segmentIndex = 0;
while (true) while (true)
@@ -245,9 +241,5 @@ to download pre-trained streaming models.
Thread.Sleep(200); // ms Thread.Sleep(200); // ms
} }
PortAudio.Terminate();
} }
} }

View File

@@ -2,7 +2,7 @@
<PropertyGroup> <PropertyGroup>
<OutputType>Exe</OutputType> <OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework> <TargetFramework>net8.0</TargetFramework>
<RootNamespace>speech_recognition_from_microphone</RootNamespace> <RootNamespace>speech_recognition_from_microphone</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>

View File

@@ -15,12 +15,9 @@
// dotnet run // dotnet run
using SherpaOnnx; using SherpaOnnx;
using System.Collections.Generic;
using System;
class SpokenLanguageIdentificationDemo class SpokenLanguageIdentificationDemo
{ {
static void Main(string[] args) static void Main(string[] args)
{ {
var config = new SpokenLanguageIdentificationConfig(); var config = new SpokenLanguageIdentificationConfig();
@@ -30,7 +27,7 @@ class SpokenLanguageIdentificationDemo
var slid = new SpokenLanguageIdentification(config); var slid = new SpokenLanguageIdentification(config);
var filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav"; var filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav";
WaveReader waveReader = new WaveReader(filename); var waveReader = new WaveReader(filename);
var s = slid.CreateStream(); var s = slid.CreateStream();
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);

View File

@@ -2,7 +2,7 @@
<PropertyGroup> <PropertyGroup>
<OutputType>Exe</OutputType> <OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework> <TargetFramework>net8.0</TargetFramework>
<RootNamespace>spoken_language_identification</RootNamespace> <RootNamespace>spoken_language_identification</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>

View File

@@ -13,12 +13,9 @@
// dotnet run // dotnet run
using SherpaOnnx; using SherpaOnnx;
using System.Collections.Generic;
using System;
class StreamingHlgDecodingDemo class StreamingHlgDecodingDemo
{ {
static void Main(string[] args) static void Main(string[] args)
{ {
var config = new OnlineRecognizerConfig(); var config = new OnlineRecognizerConfig();
@@ -32,15 +29,15 @@ class StreamingHlgDecodingDemo
config.ModelConfig.Debug = 0; config.ModelConfig.Debug = 0;
config.CtcFstDecoderConfig.Graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst"; config.CtcFstDecoderConfig.Graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst";
OnlineRecognizer recognizer = new OnlineRecognizer(config); var recognizer = new OnlineRecognizer(config);
var filename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav"; var filename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav";
WaveReader waveReader = new WaveReader(filename); var waveReader = new WaveReader(filename);
OnlineStream s = recognizer.CreateStream(); var s = recognizer.CreateStream();
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)]; var tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
s.AcceptWaveform(waveReader.SampleRate, tailPadding); s.AcceptWaveform(waveReader.SampleRate, tailPadding);
s.InputFinished(); s.InputFinished();
@@ -49,7 +46,7 @@ class StreamingHlgDecodingDemo
recognizer.Decode(s); recognizer.Decode(s);
} }
OnlineRecognizerResult r = recognizer.GetResult(s); var r = recognizer.GetResult(s);
var text = r.Text; var text = r.Text;
var tokens = r.Tokens; var tokens = r.Tokens;
Console.WriteLine("--------------------"); Console.WriteLine("--------------------");
@@ -57,10 +54,8 @@ class StreamingHlgDecodingDemo
Console.WriteLine("text: {0}", text); Console.WriteLine("text: {0}", text);
Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens)); Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens));
Console.Write("timestamps: ["); Console.Write("timestamps: [");
r.Timestamps.ToList().ForEach(i => Console.Write(String.Format("{0:0.00}", i) + ", ")); r.Timestamps.ToList().ForEach(i => Console.Write(string.Format("{0:0.00}", i) + ", "));
Console.WriteLine("]"); Console.WriteLine("]");
Console.WriteLine("--------------------"); Console.WriteLine("--------------------");
} }
} }

View File

@@ -2,7 +2,7 @@
<PropertyGroup> <PropertyGroup>
<OutputType>Exe</OutputType> <OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework> <TargetFramework>net8.0</TargetFramework>
<RootNamespace>streaming_hlg_decoding</RootNamespace> <RootNamespace>streaming_hlg_decoding</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>

View File

@@ -3,8 +3,6 @@
// This file shows how to use a silero_vad model with a non-streaming Paraformer // This file shows how to use a silero_vad model with a non-streaming Paraformer
// for speech recognition. // for speech recognition.
using SherpaOnnx; using SherpaOnnx;
using System.Collections.Generic;
using System;
class VadNonStreamingAsrParaformer class VadNonStreamingAsrParaformer
{ {
@@ -12,45 +10,49 @@ class VadNonStreamingAsrParaformer
{ {
// please download model files from // please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
OfflineRecognizerConfig config = new OfflineRecognizerConfig(); var config = new OfflineRecognizerConfig();
config.ModelConfig.Paraformer.Model = "./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx"; config.ModelConfig.Paraformer.Model = "./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx";
config.ModelConfig.Tokens = "./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt"; config.ModelConfig.Tokens = "./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt";
config.ModelConfig.Debug = 0; config.ModelConfig.Debug = 0;
OfflineRecognizer recognizer = new OfflineRecognizer(config); var recognizer = new OfflineRecognizer(config);
VadModelConfig vadModelConfig = new VadModelConfig(); var vadModelConfig = new VadModelConfig();
vadModelConfig.SileroVad.Model = "./silero_vad.onnx"; vadModelConfig.SileroVad.Model = "./silero_vad.onnx";
vadModelConfig.Debug = 0; vadModelConfig.Debug = 0;
VoiceActivityDetector vad = new VoiceActivityDetector(vadModelConfig, 60); var vad = new VoiceActivityDetector(vadModelConfig, 60);
string testWaveFilename = "./lei-jun-test.wav"; var testWaveFilename = "./lei-jun-test.wav";
WaveReader reader = new WaveReader(testWaveFilename); var reader = new WaveReader(testWaveFilename);
int numSamples = reader.Samples.Length; int numSamples = reader.Samples.Length;
int windowSize = vadModelConfig.SileroVad.WindowSize; int windowSize = vadModelConfig.SileroVad.WindowSize;
int sampleRate = vadModelConfig.SampleRate; int sampleRate = vadModelConfig.SampleRate;
int numIter = numSamples / windowSize; int numIter = numSamples / windowSize;
for (int i = 0; i != numIter; ++i) { for (int i = 0; i != numIter; ++i)
{
int start = i * windowSize; int start = i * windowSize;
float[] samples = new float[windowSize]; var samples = new float[windowSize];
Array.Copy(reader.Samples, start, samples, 0, windowSize); Array.Copy(reader.Samples, start, samples, 0, windowSize);
vad.AcceptWaveform(samples); vad.AcceptWaveform(samples);
if (vad.IsSpeechDetected()) { if (vad.IsSpeechDetected())
while (!vad.IsEmpty()) { {
while (!vad.IsEmpty())
{
SpeechSegment segment = vad.Front(); SpeechSegment segment = vad.Front();
float startTime = segment.Start / (float)sampleRate; var startTime = segment.Start / (float)sampleRate;
float duration = segment.Samples.Length / (float)sampleRate; var duration = segment.Samples.Length / (float)sampleRate;
OfflineStream stream = recognizer.CreateStream(); OfflineStream stream = recognizer.CreateStream();
stream.AcceptWaveform(sampleRate, segment.Samples); stream.AcceptWaveform(sampleRate, segment.Samples);
recognizer.Decode(stream); recognizer.Decode(stream);
String text = stream.Result.Text; var text = stream.Result.Text;
if (!String.IsNullOrEmpty(text)) { if (!string.IsNullOrEmpty(text))
Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime), {
String.Format("{0:0.00}", startTime+duration), text); Console.WriteLine("{0}--{1}: {2}", string.Format("{0:0.00}", startTime),
string.Format("{0:0.00}", startTime + duration), text);
} }
vad.Pop(); vad.Pop();
@@ -60,19 +62,21 @@ class VadNonStreamingAsrParaformer
vad.Flush(); vad.Flush();
while (!vad.IsEmpty()) { while (!vad.IsEmpty())
SpeechSegment segment = vad.Front(); {
var segment = vad.Front();
float startTime = segment.Start / (float)sampleRate; float startTime = segment.Start / (float)sampleRate;
float duration = segment.Samples.Length / (float)sampleRate; float duration = segment.Samples.Length / (float)sampleRate;
OfflineStream stream = recognizer.CreateStream(); var stream = recognizer.CreateStream();
stream.AcceptWaveform(sampleRate, segment.Samples); stream.AcceptWaveform(sampleRate, segment.Samples);
recognizer.Decode(stream); recognizer.Decode(stream);
String text = stream.Result.Text; var text = stream.Result.Text;
if (!String.IsNullOrEmpty(text)) { if (!string.IsNullOrEmpty(text))
Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime), {
String.Format("{0:0.00}", startTime+duration), text); Console.WriteLine("{0}--{1}: {2}", string.Format("{0:0.00}", startTime),
string.Format("{0:0.00}", startTime + duration), text);
} }
vad.Pop(); vad.Pop();

View File

@@ -2,7 +2,7 @@
<PropertyGroup> <PropertyGroup>
<OutputType>Exe</OutputType> <OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework> <TargetFramework>net8.0</TargetFramework>
<RootNamespace>vad_non_streaming_asr_paraformer</RootNamespace> <RootNamespace>vad_non_streaming_asr_paraformer</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>