Upgraded to .NET 8 and made code style a little more internally consistent. (#1680)
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
<Project Sdk="Microsoft.NET.Sdk">
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<TargetFramework>net6.0</TargetFramework>
|
<TargetFramework>net8.0</TargetFramework>
|
||||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
|||||||
@@ -4,25 +4,24 @@ using System.IO;
|
|||||||
|
|
||||||
using System.Runtime.InteropServices;
|
using System.Runtime.InteropServices;
|
||||||
|
|
||||||
namespace SherpaOnnx
|
namespace SherpaOnnx;
|
||||||
{
|
|
||||||
|
|
||||||
[StructLayout(LayoutKind.Sequential)]
|
[StructLayout(LayoutKind.Sequential)]
|
||||||
public struct WaveHeader
|
public struct WaveHeader
|
||||||
{
|
{
|
||||||
public Int32 ChunkID;
|
public int ChunkID;
|
||||||
public Int32 ChunkSize;
|
public int ChunkSize;
|
||||||
public Int32 Format;
|
public int Format;
|
||||||
public Int32 SubChunk1ID;
|
public int SubChunk1ID;
|
||||||
public Int32 SubChunk1Size;
|
public int SubChunk1Size;
|
||||||
public Int16 AudioFormat;
|
public short AudioFormat;
|
||||||
public Int16 NumChannels;
|
public short NumChannels;
|
||||||
public Int32 SampleRate;
|
public int SampleRate;
|
||||||
public Int32 ByteRate;
|
public int ByteRate;
|
||||||
public Int16 BlockAlign;
|
public short BlockAlign;
|
||||||
public Int16 BitsPerSample;
|
public short BitsPerSample;
|
||||||
public Int32 SubChunk2ID;
|
public int SubChunk2ID;
|
||||||
public Int32 SubChunk2Size;
|
public int SubChunk2Size;
|
||||||
|
|
||||||
public bool Validate()
|
public bool Validate()
|
||||||
{
|
{
|
||||||
@@ -90,17 +89,16 @@ namespace SherpaOnnx
|
|||||||
// The sample rate can be any value.
|
// The sample rate can be any value.
|
||||||
public class WaveReader
|
public class WaveReader
|
||||||
{
|
{
|
||||||
public WaveReader(String fileName)
|
public WaveReader(string fileName)
|
||||||
{
|
{
|
||||||
if (!File.Exists(fileName))
|
if (!File.Exists(fileName))
|
||||||
{
|
{
|
||||||
throw new ApplicationException($"{fileName} does not exist!");
|
throw new ApplicationException($"{fileName} does not exist!");
|
||||||
}
|
}
|
||||||
|
|
||||||
using (var stream = File.Open(fileName, FileMode.Open))
|
using var stream = File.Open(fileName, FileMode.Open);
|
||||||
{
|
using var reader = new BinaryReader(stream);
|
||||||
using (var reader = new BinaryReader(stream))
|
|
||||||
{
|
|
||||||
_header = ReadHeader(reader);
|
_header = ReadHeader(reader);
|
||||||
|
|
||||||
if (!_header.Validate())
|
if (!_header.Validate())
|
||||||
@@ -113,8 +111,8 @@ namespace SherpaOnnx
|
|||||||
// now read samples
|
// now read samples
|
||||||
// _header.SubChunk2Size contains number of bytes in total.
|
// _header.SubChunk2Size contains number of bytes in total.
|
||||||
// we assume each sample is of type int16
|
// we assume each sample is of type int16
|
||||||
byte[] buffer = reader.ReadBytes(_header.SubChunk2Size);
|
var buffer = reader.ReadBytes(_header.SubChunk2Size);
|
||||||
short[] samples_int16 = new short[_header.SubChunk2Size / 2];
|
var samples_int16 = new short[_header.SubChunk2Size / 2];
|
||||||
Buffer.BlockCopy(buffer, 0, samples_int16, 0, buffer.Length);
|
Buffer.BlockCopy(buffer, 0, samples_int16, 0, buffer.Length);
|
||||||
|
|
||||||
_samples = new float[samples_int16.Length];
|
_samples = new float[samples_int16.Length];
|
||||||
@@ -124,12 +122,10 @@ namespace SherpaOnnx
|
|||||||
_samples[i] = samples_int16[i] / 32768.0F;
|
_samples[i] = samples_int16[i] / 32768.0F;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static WaveHeader ReadHeader(BinaryReader reader)
|
private static WaveHeader ReadHeader(BinaryReader reader)
|
||||||
{
|
{
|
||||||
byte[] bytes = reader.ReadBytes(Marshal.SizeOf(typeof(WaveHeader)));
|
var bytes = reader.ReadBytes(Marshal.SizeOf(typeof(WaveHeader)));
|
||||||
|
|
||||||
GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned);
|
GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned);
|
||||||
WaveHeader header = (WaveHeader)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(WaveHeader))!;
|
WaveHeader header = (WaveHeader)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(WaveHeader))!;
|
||||||
@@ -142,8 +138,8 @@ namespace SherpaOnnx
|
|||||||
{
|
{
|
||||||
var bs = reader.BaseStream;
|
var bs = reader.BaseStream;
|
||||||
|
|
||||||
Int32 subChunk2ID = _header.SubChunk2ID;
|
var subChunk2ID = _header.SubChunk2ID;
|
||||||
Int32 subChunk2Size = _header.SubChunk2Size;
|
var subChunk2Size = _header.SubChunk2Size;
|
||||||
|
|
||||||
while (bs.Position != bs.Length && subChunk2ID != 0x61746164)
|
while (bs.Position != bs.Length && subChunk2ID != 0x61746164)
|
||||||
{
|
{
|
||||||
@@ -161,14 +157,13 @@ namespace SherpaOnnx
|
|||||||
private float[] _samples;
|
private float[] _samples;
|
||||||
|
|
||||||
public int SampleRate => _header.SampleRate;
|
public int SampleRate => _header.SampleRate;
|
||||||
|
|
||||||
public float[] Samples => _samples;
|
public float[] Samples => _samples;
|
||||||
|
|
||||||
public static void Test(String fileName)
|
public static void Test(string fileName)
|
||||||
{
|
{
|
||||||
WaveReader reader = new WaveReader(fileName);
|
WaveReader reader = new WaveReader(fileName);
|
||||||
Console.WriteLine($"samples length: {reader.Samples.Length}");
|
Console.WriteLine($"samples length: {reader.Samples.Length}");
|
||||||
Console.WriteLine($"samples rate: {reader.SampleRate}");
|
Console.WriteLine($"samples rate: {reader.SampleRate}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
// dotnet run
|
// dotnet run
|
||||||
|
|
||||||
using SherpaOnnx;
|
using SherpaOnnx;
|
||||||
using System.Collections.Generic;
|
|
||||||
using System;
|
|
||||||
|
|
||||||
class KeywordSpotterDemo
|
class KeywordSpotterDemo
|
||||||
{
|
{
|
||||||
@@ -38,11 +36,11 @@ class KeywordSpotterDemo
|
|||||||
|
|
||||||
var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav";
|
var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav";
|
||||||
|
|
||||||
WaveReader waveReader = new WaveReader(filename);
|
var waveReader = new WaveReader(filename);
|
||||||
|
|
||||||
Console.WriteLine("----------Use pre-defined keywords----------");
|
Console.WriteLine("----------Use pre-defined keywords----------");
|
||||||
|
|
||||||
OnlineStream s = kws.CreateStream();
|
var s = kws.CreateStream();
|
||||||
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
|
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
|
||||||
|
|
||||||
float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
|
float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
|
||||||
@@ -53,7 +51,7 @@ class KeywordSpotterDemo
|
|||||||
{
|
{
|
||||||
kws.Decode(s);
|
kws.Decode(s);
|
||||||
var result = kws.GetResult(s);
|
var result = kws.GetResult(s);
|
||||||
if (result.Keyword != "")
|
if (result.Keyword != string.Empty)
|
||||||
{
|
{
|
||||||
Console.WriteLine("Detected: {0}", result.Keyword);
|
Console.WriteLine("Detected: {0}", result.Keyword);
|
||||||
}
|
}
|
||||||
@@ -70,7 +68,7 @@ class KeywordSpotterDemo
|
|||||||
{
|
{
|
||||||
kws.Decode(s);
|
kws.Decode(s);
|
||||||
var result = kws.GetResult(s);
|
var result = kws.GetResult(s);
|
||||||
if (result.Keyword != "")
|
if (result.Keyword != string.Empty)
|
||||||
{
|
{
|
||||||
Console.WriteLine("Detected: {0}", result.Keyword);
|
Console.WriteLine("Detected: {0}", result.Keyword);
|
||||||
}
|
}
|
||||||
@@ -89,7 +87,7 @@ class KeywordSpotterDemo
|
|||||||
{
|
{
|
||||||
kws.Decode(s);
|
kws.Decode(s);
|
||||||
var result = kws.GetResult(s);
|
var result = kws.GetResult(s);
|
||||||
if (result.Keyword != "")
|
if (result.Keyword != string.Empty)
|
||||||
{
|
{
|
||||||
Console.WriteLine("Detected: {0}", result.Keyword);
|
Console.WriteLine("Detected: {0}", result.Keyword);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<OutputType>Exe</OutputType>
|
<OutputType>Exe</OutputType>
|
||||||
<TargetFramework>net6.0</TargetFramework>
|
<TargetFramework>net8.0</TargetFramework>
|
||||||
<RootNamespace>keyword_spotting_from_files</RootNamespace>
|
<RootNamespace>keyword_spotting_from_files</RootNamespace>
|
||||||
<ImplicitUsings>enable</ImplicitUsings>
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
<Nullable>enable</Nullable>
|
<Nullable>enable</Nullable>
|
||||||
|
|||||||
@@ -12,12 +12,9 @@
|
|||||||
//
|
//
|
||||||
// dotnet run
|
// dotnet run
|
||||||
|
|
||||||
using SherpaOnnx;
|
|
||||||
using System.Collections.Generic;
|
|
||||||
using System.Runtime.InteropServices;
|
|
||||||
using System;
|
|
||||||
|
|
||||||
using PortAudioSharp;
|
using PortAudioSharp;
|
||||||
|
using SherpaOnnx;
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
|
|
||||||
class KeywordSpotterDemo
|
class KeywordSpotterDemo
|
||||||
{
|
{
|
||||||
@@ -41,11 +38,11 @@ class KeywordSpotterDemo
|
|||||||
|
|
||||||
var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav";
|
var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav";
|
||||||
|
|
||||||
WaveReader waveReader = new WaveReader(filename);
|
var waveReader = new WaveReader(filename);
|
||||||
|
|
||||||
Console.WriteLine("----------Use pre-defined keywords----------");
|
Console.WriteLine("----------Use pre-defined keywords----------");
|
||||||
|
|
||||||
OnlineStream s = kws.CreateStream();
|
var s = kws.CreateStream();
|
||||||
|
|
||||||
Console.WriteLine(PortAudio.VersionInfo.versionText);
|
Console.WriteLine(PortAudio.VersionInfo.versionText);
|
||||||
PortAudio.Initialize();
|
PortAudio.Initialize();
|
||||||
@@ -54,7 +51,7 @@ class KeywordSpotterDemo
|
|||||||
for (int i = 0; i != PortAudio.DeviceCount; ++i)
|
for (int i = 0; i != PortAudio.DeviceCount; ++i)
|
||||||
{
|
{
|
||||||
Console.WriteLine($" Device {i}");
|
Console.WriteLine($" Device {i}");
|
||||||
DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i);
|
var deviceInfo = PortAudio.GetDeviceInfo(i);
|
||||||
Console.WriteLine($" Name: {deviceInfo.name}");
|
Console.WriteLine($" Name: {deviceInfo.name}");
|
||||||
Console.WriteLine($" Max input channels: {deviceInfo.maxInputChannels}");
|
Console.WriteLine($" Max input channels: {deviceInfo.maxInputChannels}");
|
||||||
Console.WriteLine($" Default sample rate: {deviceInfo.defaultSampleRate}");
|
Console.WriteLine($" Default sample rate: {deviceInfo.defaultSampleRate}");
|
||||||
@@ -66,12 +63,12 @@ class KeywordSpotterDemo
|
|||||||
Environment.Exit(1);
|
Environment.Exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex);
|
var info = PortAudio.GetDeviceInfo(deviceIndex);
|
||||||
|
|
||||||
Console.WriteLine();
|
Console.WriteLine();
|
||||||
Console.WriteLine($"Use default device {deviceIndex} ({info.name})");
|
Console.WriteLine($"Use default device {deviceIndex} ({info.name})");
|
||||||
|
|
||||||
StreamParameters param = new StreamParameters();
|
var param = new StreamParameters();
|
||||||
param.device = deviceIndex;
|
param.device = deviceIndex;
|
||||||
param.channelCount = 1;
|
param.channelCount = 1;
|
||||||
param.sampleFormat = SampleFormat.Float32;
|
param.sampleFormat = SampleFormat.Float32;
|
||||||
@@ -79,21 +76,21 @@ class KeywordSpotterDemo
|
|||||||
param.hostApiSpecificStreamInfo = IntPtr.Zero;
|
param.hostApiSpecificStreamInfo = IntPtr.Zero;
|
||||||
|
|
||||||
PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output,
|
PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output,
|
||||||
UInt32 frameCount,
|
uint frameCount,
|
||||||
ref StreamCallbackTimeInfo timeInfo,
|
ref StreamCallbackTimeInfo timeInfo,
|
||||||
StreamCallbackFlags statusFlags,
|
StreamCallbackFlags statusFlags,
|
||||||
IntPtr userData
|
IntPtr userData
|
||||||
) =>
|
) =>
|
||||||
{
|
{
|
||||||
float[] samples = new float[frameCount];
|
var samples = new float[frameCount];
|
||||||
Marshal.Copy(input, samples, 0, (Int32)frameCount);
|
Marshal.Copy(input, samples, 0, (int)frameCount);
|
||||||
|
|
||||||
s.AcceptWaveform(config.FeatConfig.SampleRate, samples);
|
s.AcceptWaveform(config.FeatConfig.SampleRate, samples);
|
||||||
|
|
||||||
return StreamCallbackResult.Continue;
|
return StreamCallbackResult.Continue;
|
||||||
};
|
};
|
||||||
|
|
||||||
PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: config.FeatConfig.SampleRate,
|
var stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: config.FeatConfig.SampleRate,
|
||||||
framesPerBuffer: 0,
|
framesPerBuffer: 0,
|
||||||
streamFlags: StreamFlags.ClipOff,
|
streamFlags: StreamFlags.ClipOff,
|
||||||
callback: callback,
|
callback: callback,
|
||||||
@@ -113,15 +110,13 @@ class KeywordSpotterDemo
|
|||||||
}
|
}
|
||||||
|
|
||||||
var result = kws.GetResult(s);
|
var result = kws.GetResult(s);
|
||||||
if (result.Keyword != "")
|
if (result.Keyword != string.Empty)
|
||||||
{
|
{
|
||||||
Console.WriteLine("Detected: {0}", result.Keyword);
|
Console.WriteLine("Detected: {0}", result.Keyword);
|
||||||
}
|
}
|
||||||
|
|
||||||
Thread.Sleep(200); // ms
|
Thread.Sleep(200); // ms
|
||||||
}
|
}
|
||||||
|
|
||||||
PortAudio.Terminate();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<OutputType>Exe</OutputType>
|
<OutputType>Exe</OutputType>
|
||||||
<TargetFramework>net6.0</TargetFramework>
|
<TargetFramework>net8.0</TargetFramework>
|
||||||
<RootNamespace>keyword_spotting_from_microphone</RootNamespace>
|
<RootNamespace>keyword_spotting_from_microphone</RootNamespace>
|
||||||
<ImplicitUsings>enable</ImplicitUsings>
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
<Nullable>enable</Nullable>
|
<Nullable>enable</Nullable>
|
||||||
|
|||||||
@@ -5,17 +5,14 @@
|
|||||||
// Please refer to
|
// Please refer to
|
||||||
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||||
// to download non-streaming models
|
// to download non-streaming models
|
||||||
using CommandLine.Text;
|
|
||||||
using CommandLine;
|
using CommandLine;
|
||||||
|
using CommandLine.Text;
|
||||||
using SherpaOnnx;
|
using SherpaOnnx;
|
||||||
using System.Collections.Generic;
|
|
||||||
using System;
|
|
||||||
|
|
||||||
class OfflineDecodeFiles
|
class OfflineDecodeFiles
|
||||||
{
|
{
|
||||||
class Options
|
class Options
|
||||||
{
|
{
|
||||||
|
|
||||||
[Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")]
|
[Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")]
|
||||||
public int SampleRate { get; set; } = 16000;
|
public int SampleRate { get; set; } = 16000;
|
||||||
|
|
||||||
@@ -23,58 +20,58 @@ class OfflineDecodeFiles
|
|||||||
public int FeatureDim { get; set; } = 80;
|
public int FeatureDim { get; set; } = 80;
|
||||||
|
|
||||||
[Option(Required = false, HelpText = "Path to tokens.txt")]
|
[Option(Required = false, HelpText = "Path to tokens.txt")]
|
||||||
public string Tokens { get; set; } = "";
|
public string Tokens { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option(Required = false, Default = "", HelpText = "Path to transducer encoder.onnx. Used only for transducer models")]
|
[Option(Required = false, Default = "", HelpText = "Path to transducer encoder.onnx. Used only for transducer models")]
|
||||||
public string Encoder { get; set; } = "";
|
public string Encoder { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option(Required = false, Default = "", HelpText = "Path to transducer decoder.onnx. Used only for transducer models")]
|
[Option(Required = false, Default = "", HelpText = "Path to transducer decoder.onnx. Used only for transducer models")]
|
||||||
public string Decoder { get; set; } = "";
|
public string Decoder { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option(Required = false, Default = "", HelpText = "Path to transducer joiner.onnx. Used only for transducer models")]
|
[Option(Required = false, Default = "", HelpText = "Path to transducer joiner.onnx. Used only for transducer models")]
|
||||||
public string Joiner { get; set; } = "";
|
public string Joiner { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("model-type", Required = false, Default = "", HelpText = "model type")]
|
[Option("model-type", Required = false, Default = "", HelpText = "model type")]
|
||||||
public string ModelType { get; set; } = "";
|
public string ModelType { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("whisper-encoder", Required = false, Default = "", HelpText = "Path to whisper encoder.onnx. Used only for whisper models")]
|
[Option("whisper-encoder", Required = false, Default = "", HelpText = "Path to whisper encoder.onnx. Used only for whisper models")]
|
||||||
public string WhisperEncoder { get; set; } = "";
|
public string WhisperEncoder { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("whisper-decoder", Required = false, Default = "", HelpText = "Path to whisper decoder.onnx. Used only for whisper models")]
|
[Option("whisper-decoder", Required = false, Default = "", HelpText = "Path to whisper decoder.onnx. Used only for whisper models")]
|
||||||
public string WhisperDecoder { get; set; } = "";
|
public string WhisperDecoder { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("whisper-language", Required = false, Default = "", HelpText = "Language of the input file. Can be empty")]
|
[Option("whisper-language", Required = false, Default = "", HelpText = "Language of the input file. Can be empty")]
|
||||||
public string WhisperLanguage { get; set; } = "";
|
public string WhisperLanguage { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("whisper-task", Required = false, Default = "transcribe", HelpText = "transcribe or translate")]
|
[Option("whisper-task", Required = false, Default = "transcribe", HelpText = "transcribe or translate")]
|
||||||
public string WhisperTask { get; set; } = "transcribe";
|
public string WhisperTask { get; set; } = "transcribe";
|
||||||
|
|
||||||
[Option("moonshine-preprocessor", Required = false, Default = "", HelpText = "Path to preprocess.onnx. Used only for Moonshine models")]
|
[Option("moonshine-preprocessor", Required = false, Default = "", HelpText = "Path to preprocess.onnx. Used only for Moonshine models")]
|
||||||
public string MoonshinePreprocessor { get; set; } = "";
|
public string MoonshinePreprocessor { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("moonshine-encoder", Required = false, Default = "", HelpText = "Path to encode.onnx. Used only for Moonshine models")]
|
[Option("moonshine-encoder", Required = false, Default = "", HelpText = "Path to encode.onnx. Used only for Moonshine models")]
|
||||||
public string MoonshineEncoder { get; set; } = "";
|
public string MoonshineEncoder { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("moonshine-uncached-decoder", Required = false, Default = "", HelpText = "Path to uncached_decode.onnx. Used only for Moonshine models")]
|
[Option("moonshine-uncached-decoder", Required = false, Default = "", HelpText = "Path to uncached_decode.onnx. Used only for Moonshine models")]
|
||||||
public string MoonshineUncachedDecoder { get; set; } = "";
|
public string MoonshineUncachedDecoder { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("moonshine-cached-decoder", Required = false, Default = "", HelpText = "Path to cached_decode.onnx. Used only for Moonshine models")]
|
[Option("moonshine-cached-decoder", Required = false, Default = "", HelpText = "Path to cached_decode.onnx. Used only for Moonshine models")]
|
||||||
public string MoonshineCachedDecoder { get; set; } = "";
|
public string MoonshineCachedDecoder { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("tdnn-model", Required = false, Default = "", HelpText = "Path to tdnn yesno model")]
|
[Option("tdnn-model", Required = false, Default = "", HelpText = "Path to tdnn yesno model")]
|
||||||
public string TdnnModel { get; set; } = "";
|
public string TdnnModel { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option(Required = false, HelpText = "Path to model.onnx. Used only for paraformer models")]
|
[Option(Required = false, HelpText = "Path to model.onnx. Used only for paraformer models")]
|
||||||
public string Paraformer { get; set; } = "";
|
public string Paraformer { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("nemo-ctc", Required = false, HelpText = "Path to model.onnx. Used only for NeMo CTC models")]
|
[Option("nemo-ctc", Required = false, HelpText = "Path to model.onnx. Used only for NeMo CTC models")]
|
||||||
public string NeMoCtc { get; set; } = "";
|
public string NeMoCtc { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("telespeech-ctc", Required = false, HelpText = "Path to model.onnx. Used only for TeleSpeech CTC models")]
|
[Option("telespeech-ctc", Required = false, HelpText = "Path to model.onnx. Used only for TeleSpeech CTC models")]
|
||||||
public string TeleSpeechCtc { get; set; } = "";
|
public string TeleSpeechCtc { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("sense-voice-model", Required = false, HelpText = "Path to model.onnx. Used only for SenseVoice CTC models")]
|
[Option("sense-voice-model", Required = false, HelpText = "Path to model.onnx. Used only for SenseVoice CTC models")]
|
||||||
public string SenseVoiceModel { get; set; } = "";
|
public string SenseVoiceModel { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("sense-voice-use-itn", Required = false, HelpText = "1 to use inverse text normalization for sense voice.")]
|
[Option("sense-voice-use-itn", Required = false, HelpText = "1 to use inverse text normalization for sense voice.")]
|
||||||
public int SenseVoiceUseItn { get; set; } = 1;
|
public int SenseVoiceUseItn { get; set; } = 1;
|
||||||
@@ -88,7 +85,7 @@ class OfflineDecodeFiles
|
|||||||
|
|
||||||
[Option("rule-fsts", Required = false, Default = "",
|
[Option("rule-fsts", Required = false, Default = "",
|
||||||
HelpText = "If not empty, path to rule fst for inverse text normalization")]
|
HelpText = "If not empty, path to rule fst for inverse text normalization")]
|
||||||
public string RuleFsts { get; set; } = "";
|
public string RuleFsts { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("max-active-paths", Required = false, Default = 4,
|
[Option("max-active-paths", Required = false, Default = 4,
|
||||||
HelpText = @"Used only when --decoding--method is modified_beam_search.
|
HelpText = @"Used only when --decoding--method is modified_beam_search.
|
||||||
@@ -96,7 +93,7 @@ It specifies number of active paths to keep during the search")]
|
|||||||
public int MaxActivePaths { get; set; } = 4;
|
public int MaxActivePaths { get; set; } = 4;
|
||||||
|
|
||||||
[Option("hotwords-file", Required = false, Default = "", HelpText = "Path to hotwords.txt")]
|
[Option("hotwords-file", Required = false, Default = "", HelpText = "Path to hotwords.txt")]
|
||||||
public string HotwordsFile { get; set; } = "";
|
public string HotwordsFile { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")]
|
[Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")]
|
||||||
public float HotwordsScore { get; set; } = 1.5F;
|
public float HotwordsScore { get; set; } = 1.5F;
|
||||||
@@ -117,7 +114,7 @@ It specifies number of active paths to keep during the search")]
|
|||||||
|
|
||||||
private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
|
private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
|
||||||
{
|
{
|
||||||
string usage = @"
|
var usage = @"
|
||||||
# Zipformer
|
# Zipformer
|
||||||
|
|
||||||
dotnet run \
|
dotnet run \
|
||||||
@@ -213,42 +210,42 @@ to download pre-trained Tdnn models.
|
|||||||
|
|
||||||
config.ModelConfig.Tokens = options.Tokens;
|
config.ModelConfig.Tokens = options.Tokens;
|
||||||
|
|
||||||
if (!String.IsNullOrEmpty(options.Encoder))
|
if (!string.IsNullOrEmpty(options.Encoder))
|
||||||
{
|
{
|
||||||
// this is a transducer model
|
// this is a transducer model
|
||||||
config.ModelConfig.Transducer.Encoder = options.Encoder;
|
config.ModelConfig.Transducer.Encoder = options.Encoder;
|
||||||
config.ModelConfig.Transducer.Decoder = options.Decoder;
|
config.ModelConfig.Transducer.Decoder = options.Decoder;
|
||||||
config.ModelConfig.Transducer.Joiner = options.Joiner;
|
config.ModelConfig.Transducer.Joiner = options.Joiner;
|
||||||
}
|
}
|
||||||
else if (!String.IsNullOrEmpty(options.Paraformer))
|
else if (!string.IsNullOrEmpty(options.Paraformer))
|
||||||
{
|
{
|
||||||
config.ModelConfig.Paraformer.Model = options.Paraformer;
|
config.ModelConfig.Paraformer.Model = options.Paraformer;
|
||||||
}
|
}
|
||||||
else if (!String.IsNullOrEmpty(options.NeMoCtc))
|
else if (!string.IsNullOrEmpty(options.NeMoCtc))
|
||||||
{
|
{
|
||||||
config.ModelConfig.NeMoCtc.Model = options.NeMoCtc;
|
config.ModelConfig.NeMoCtc.Model = options.NeMoCtc;
|
||||||
}
|
}
|
||||||
else if (!String.IsNullOrEmpty(options.TeleSpeechCtc))
|
else if (!string.IsNullOrEmpty(options.TeleSpeechCtc))
|
||||||
{
|
{
|
||||||
config.ModelConfig.TeleSpeechCtc = options.TeleSpeechCtc;
|
config.ModelConfig.TeleSpeechCtc = options.TeleSpeechCtc;
|
||||||
}
|
}
|
||||||
else if (!String.IsNullOrEmpty(options.WhisperEncoder))
|
else if (!string.IsNullOrEmpty(options.WhisperEncoder))
|
||||||
{
|
{
|
||||||
config.ModelConfig.Whisper.Encoder = options.WhisperEncoder;
|
config.ModelConfig.Whisper.Encoder = options.WhisperEncoder;
|
||||||
config.ModelConfig.Whisper.Decoder = options.WhisperDecoder;
|
config.ModelConfig.Whisper.Decoder = options.WhisperDecoder;
|
||||||
config.ModelConfig.Whisper.Language = options.WhisperLanguage;
|
config.ModelConfig.Whisper.Language = options.WhisperLanguage;
|
||||||
config.ModelConfig.Whisper.Task = options.WhisperTask;
|
config.ModelConfig.Whisper.Task = options.WhisperTask;
|
||||||
}
|
}
|
||||||
else if (!String.IsNullOrEmpty(options.TdnnModel))
|
else if (!string.IsNullOrEmpty(options.TdnnModel))
|
||||||
{
|
{
|
||||||
config.ModelConfig.Tdnn.Model = options.TdnnModel;
|
config.ModelConfig.Tdnn.Model = options.TdnnModel;
|
||||||
}
|
}
|
||||||
else if (!String.IsNullOrEmpty(options.SenseVoiceModel))
|
else if (!string.IsNullOrEmpty(options.SenseVoiceModel))
|
||||||
{
|
{
|
||||||
config.ModelConfig.SenseVoice.Model = options.SenseVoiceModel;
|
config.ModelConfig.SenseVoice.Model = options.SenseVoiceModel;
|
||||||
config.ModelConfig.SenseVoice.UseInverseTextNormalization = options.SenseVoiceUseItn;
|
config.ModelConfig.SenseVoice.UseInverseTextNormalization = options.SenseVoiceUseItn;
|
||||||
}
|
}
|
||||||
else if (!String.IsNullOrEmpty(options.MoonshinePreprocessor))
|
else if (!string.IsNullOrEmpty(options.MoonshinePreprocessor))
|
||||||
{
|
{
|
||||||
config.ModelConfig.Moonshine.Preprocessor = options.MoonshinePreprocessor;
|
config.ModelConfig.Moonshine.Preprocessor = options.MoonshinePreprocessor;
|
||||||
config.ModelConfig.Moonshine.Encoder = options.MoonshineEncoder;
|
config.ModelConfig.Moonshine.Encoder = options.MoonshineEncoder;
|
||||||
@@ -270,17 +267,17 @@ to download pre-trained Tdnn models.
|
|||||||
|
|
||||||
config.ModelConfig.Debug = 0;
|
config.ModelConfig.Debug = 0;
|
||||||
|
|
||||||
OfflineRecognizer recognizer = new OfflineRecognizer(config);
|
var recognizer = new OfflineRecognizer(config);
|
||||||
|
|
||||||
string[] files = options.Files.ToArray();
|
var files = options.Files.ToArray();
|
||||||
|
|
||||||
// We create a separate stream for each file
|
// We create a separate stream for each file
|
||||||
List<OfflineStream> streams = new List<OfflineStream>();
|
var streams = new List<OfflineStream>();
|
||||||
streams.EnsureCapacity(files.Length);
|
streams.EnsureCapacity(files.Length);
|
||||||
|
|
||||||
for (int i = 0; i != files.Length; ++i)
|
for (int i = 0; i != files.Length; ++i)
|
||||||
{
|
{
|
||||||
OfflineStream s = recognizer.CreateStream();
|
var s = recognizer.CreateStream();
|
||||||
|
|
||||||
WaveReader waveReader = new WaveReader(files[i]);
|
WaveReader waveReader = new WaveReader(files[i]);
|
||||||
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
|
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
|
||||||
@@ -299,7 +296,7 @@ to download pre-trained Tdnn models.
|
|||||||
Console.WriteLine("Tokens: [{0}]", string.Join(", ", r.Tokens));
|
Console.WriteLine("Tokens: [{0}]", string.Join(", ", r.Tokens));
|
||||||
if (r.Timestamps != null && r.Timestamps.Length > 0) {
|
if (r.Timestamps != null && r.Timestamps.Length > 0) {
|
||||||
Console.Write("Timestamps: [");
|
Console.Write("Timestamps: [");
|
||||||
var sep = "";
|
var sep = string.Empty;
|
||||||
for (int k = 0; k != r.Timestamps.Length; ++k)
|
for (int k = 0; k != r.Timestamps.Length; ++k)
|
||||||
{
|
{
|
||||||
Console.Write("{0}{1}", sep, r.Timestamps[k].ToString("0.00"));
|
Console.Write("{0}{1}", sep, r.Timestamps[k].ToString("0.00"));
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<OutputType>Exe</OutputType>
|
<OutputType>Exe</OutputType>
|
||||||
<TargetFramework>net6.0</TargetFramework>
|
<TargetFramework>net8.0</TargetFramework>
|
||||||
<RootNamespace>offline_decode_files</RootNamespace>
|
<RootNamespace>offline_decode_files</RootNamespace>
|
||||||
<ImplicitUsings>enable</ImplicitUsings>
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
<Nullable>enable</Nullable>
|
<Nullable>enable</Nullable>
|
||||||
|
|||||||
@@ -12,8 +12,6 @@
|
|||||||
// dotnet run
|
// dotnet run
|
||||||
|
|
||||||
using SherpaOnnx;
|
using SherpaOnnx;
|
||||||
using System.Collections.Generic;
|
|
||||||
using System;
|
|
||||||
|
|
||||||
class OfflinePunctuationDemo
|
class OfflinePunctuationDemo
|
||||||
{
|
{
|
||||||
@@ -25,14 +23,14 @@ class OfflinePunctuationDemo
|
|||||||
config.Model.NumThreads = 1;
|
config.Model.NumThreads = 1;
|
||||||
var punct = new OfflinePunctuation(config);
|
var punct = new OfflinePunctuation(config);
|
||||||
|
|
||||||
string[] textList = new string[] {
|
var textList = new string[] {
|
||||||
"这是一个测试你好吗How are you我很好thank you are you ok谢谢你",
|
"这是一个测试你好吗How are you我很好thank you are you ok谢谢你",
|
||||||
"我们都是木头人不会说话不会动",
|
"我们都是木头人不会说话不会动",
|
||||||
"The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
|
"The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
|
||||||
};
|
};
|
||||||
|
|
||||||
Console.WriteLine("---------");
|
Console.WriteLine("---------");
|
||||||
foreach (string text in textList)
|
foreach (var text in textList)
|
||||||
{
|
{
|
||||||
string textWithPunct = punct.AddPunct(text);
|
string textWithPunct = punct.AddPunct(text);
|
||||||
Console.WriteLine("Input text: {0}", text);
|
Console.WriteLine("Input text: {0}", text);
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<OutputType>Exe</OutputType>
|
<OutputType>Exe</OutputType>
|
||||||
<TargetFramework>net6.0</TargetFramework>
|
<TargetFramework>net8.0</TargetFramework>
|
||||||
<RootNamespace>offline_punctuation</RootNamespace>
|
<RootNamespace>offline_punctuation</RootNamespace>
|
||||||
<ImplicitUsings>enable</ImplicitUsings>
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
<Nullable>enable</Nullable>
|
<Nullable>enable</Nullable>
|
||||||
|
|||||||
@@ -34,7 +34,6 @@ Step 4. Run it
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
using SherpaOnnx;
|
using SherpaOnnx;
|
||||||
using System;
|
|
||||||
|
|
||||||
class OfflineSpeakerDiarizationDemo
|
class OfflineSpeakerDiarizationDemo
|
||||||
{
|
{
|
||||||
@@ -54,7 +53,7 @@ class OfflineSpeakerDiarizationDemo
|
|||||||
var sd = new OfflineSpeakerDiarization(config);
|
var sd = new OfflineSpeakerDiarization(config);
|
||||||
|
|
||||||
var testWaveFile = "./0-four-speakers-zh.wav";
|
var testWaveFile = "./0-four-speakers-zh.wav";
|
||||||
WaveReader waveReader = new WaveReader(testWaveFile);
|
var waveReader = new WaveReader(testWaveFile);
|
||||||
if (sd.SampleRate != waveReader.SampleRate)
|
if (sd.SampleRate != waveReader.SampleRate)
|
||||||
{
|
{
|
||||||
Console.WriteLine($"Expected sample rate: {sd.SampleRate}. Given: {waveReader.SampleRate}");
|
Console.WriteLine($"Expected sample rate: {sd.SampleRate}. Given: {waveReader.SampleRate}");
|
||||||
@@ -65,19 +64,19 @@ class OfflineSpeakerDiarizationDemo
|
|||||||
|
|
||||||
// var segments = sd.Process(waveReader.Samples); // this one is also ok
|
// var segments = sd.Process(waveReader.Samples); // this one is also ok
|
||||||
|
|
||||||
var MyProgressCallback = (int numProcessedChunks, int numTotalChunks, IntPtr arg) =>
|
var progressCallback = (int numProcessedChunks, int numTotalChunks, IntPtr arg) =>
|
||||||
{
|
{
|
||||||
float progress = 100.0F * numProcessedChunks / numTotalChunks;
|
var progress = 100.0F * numProcessedChunks / numTotalChunks;
|
||||||
Console.WriteLine("Progress {0}%", String.Format("{0:0.00}", progress));
|
Console.WriteLine("Progress {0}%", string.Format("{0:0.00}", progress));
|
||||||
return 0;
|
return 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
var callback = new OfflineSpeakerDiarizationProgressCallback(MyProgressCallback);
|
var callback = new OfflineSpeakerDiarizationProgressCallback(progressCallback);
|
||||||
var segments = sd.ProcessWithCallback(waveReader.Samples, callback, IntPtr.Zero);
|
var segments = sd.ProcessWithCallback(waveReader.Samples, callback, IntPtr.Zero);
|
||||||
|
|
||||||
foreach (var s in segments)
|
foreach (var s in segments)
|
||||||
{
|
{
|
||||||
Console.WriteLine("{0} -- {1} speaker_{2}", String.Format("{0:0.00}", s.Start), String.Format("{0:0.00}", s.End), s.Speaker);
|
Console.WriteLine("{0} -- {1} speaker_{2}", string.Format("{0:0.00}", s.Start), string.Format("{0:0.00}", s.End), s.Speaker);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<OutputType>Exe</OutputType>
|
<OutputType>Exe</OutputType>
|
||||||
<TargetFramework>net6.0</TargetFramework>
|
<TargetFramework>net8.0</TargetFramework>
|
||||||
<RootNamespace>offline_speaker_diarization</RootNamespace>
|
<RootNamespace>offline_speaker_diarization</RootNamespace>
|
||||||
<ImplicitUsings>enable</ImplicitUsings>
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
<Nullable>enable</Nullable>
|
<Nullable>enable</Nullable>
|
||||||
|
|||||||
@@ -10,15 +10,12 @@
|
|||||||
// Note that you need a speaker to run this file since it will play
|
// Note that you need a speaker to run this file since it will play
|
||||||
// the generated audio as it is generating.
|
// the generated audio as it is generating.
|
||||||
|
|
||||||
using CommandLine.Text;
|
|
||||||
using CommandLine;
|
using CommandLine;
|
||||||
|
using CommandLine.Text;
|
||||||
using PortAudioSharp;
|
using PortAudioSharp;
|
||||||
using SherpaOnnx;
|
using SherpaOnnx;
|
||||||
using System.Collections.Concurrent;
|
using System.Collections.Concurrent;
|
||||||
using System.Collections.Generic;
|
|
||||||
using System.Runtime.InteropServices;
|
using System.Runtime.InteropServices;
|
||||||
using System.Threading;
|
|
||||||
using System;
|
|
||||||
|
|
||||||
class OfflineTtsPlayDemo
|
class OfflineTtsPlayDemo
|
||||||
{
|
{
|
||||||
@@ -26,13 +23,13 @@ class OfflineTtsPlayDemo
|
|||||||
{
|
{
|
||||||
|
|
||||||
[Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")]
|
[Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")]
|
||||||
public string RuleFsts { get; set; }
|
public string? RuleFsts { get; set; }
|
||||||
|
|
||||||
[Option("vits-dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")]
|
[Option("vits-dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")]
|
||||||
public string DictDir { get; set; }
|
public string? DictDir { get; set; }
|
||||||
|
|
||||||
[Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
|
[Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
|
||||||
public string DataDir { get; set; }
|
public string? DataDir { get; set; }
|
||||||
|
|
||||||
[Option("vits-length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")]
|
[Option("vits-length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")]
|
||||||
public float LengthScale { get; set; }
|
public float LengthScale { get; set; }
|
||||||
@@ -44,10 +41,10 @@ class OfflineTtsPlayDemo
|
|||||||
public float NoiseScaleW { get; set; }
|
public float NoiseScaleW { get; set; }
|
||||||
|
|
||||||
[Option("vits-lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")]
|
[Option("vits-lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")]
|
||||||
public string Lexicon { get; set; }
|
public string? Lexicon { get; set; }
|
||||||
|
|
||||||
[Option("vits-tokens", Required = false, Default = "", HelpText = "Path to tokens.txt")]
|
[Option("vits-tokens", Required = false, Default = "", HelpText = "Path to tokens.txt")]
|
||||||
public string Tokens { get; set; }
|
public string? Tokens { get; set; }
|
||||||
|
|
||||||
[Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")]
|
[Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")]
|
||||||
public int MaxNumSentences { get; set; }
|
public int MaxNumSentences { get; set; }
|
||||||
@@ -56,16 +53,16 @@ class OfflineTtsPlayDemo
|
|||||||
public int Debug { get; set; }
|
public int Debug { get; set; }
|
||||||
|
|
||||||
[Option("vits-model", Required = true, HelpText = "Path to VITS model")]
|
[Option("vits-model", Required = true, HelpText = "Path to VITS model")]
|
||||||
public string Model { get; set; }
|
public string? Model { get; set; }
|
||||||
|
|
||||||
[Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")]
|
[Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")]
|
||||||
public int SpeakerId { get; set; }
|
public int SpeakerId { get; set; }
|
||||||
|
|
||||||
[Option("text", Required = true, HelpText = "Text to synthesize")]
|
[Option("text", Required = true, HelpText = "Text to synthesize")]
|
||||||
public string Text { get; set; }
|
public string? Text { get; set; }
|
||||||
|
|
||||||
[Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")]
|
[Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")]
|
||||||
public string OutputFilename { get; set; }
|
public string? OutputFilename { get; set; }
|
||||||
}
|
}
|
||||||
|
|
||||||
static void Main(string[] args)
|
static void Main(string[] args)
|
||||||
@@ -124,10 +121,9 @@ to download more models.
|
|||||||
Console.WriteLine(helpText);
|
Console.WriteLine(helpText);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static void Run(Options options)
|
private static void Run(Options options)
|
||||||
{
|
{
|
||||||
OfflineTtsConfig config = new OfflineTtsConfig();
|
var config = new OfflineTtsConfig();
|
||||||
config.Model.Vits.Model = options.Model;
|
config.Model.Vits.Model = options.Model;
|
||||||
config.Model.Vits.Lexicon = options.Lexicon;
|
config.Model.Vits.Lexicon = options.Lexicon;
|
||||||
config.Model.Vits.Tokens = options.Tokens;
|
config.Model.Vits.Tokens = options.Tokens;
|
||||||
@@ -142,10 +138,9 @@ to download more models.
|
|||||||
config.RuleFsts = options.RuleFsts;
|
config.RuleFsts = options.RuleFsts;
|
||||||
config.MaxNumSentences = options.MaxNumSentences;
|
config.MaxNumSentences = options.MaxNumSentences;
|
||||||
|
|
||||||
OfflineTts tts = new OfflineTts(config);
|
var tts = new OfflineTts(config);
|
||||||
float speed = 1.0f / options.LengthScale;
|
var speed = 1.0f / options.LengthScale;
|
||||||
int sid = options.SpeakerId;
|
var sid = options.SpeakerId;
|
||||||
|
|
||||||
|
|
||||||
Console.WriteLine(PortAudio.VersionInfo.versionText);
|
Console.WriteLine(PortAudio.VersionInfo.versionText);
|
||||||
PortAudio.Initialize();
|
PortAudio.Initialize();
|
||||||
@@ -166,11 +161,11 @@ to download more models.
|
|||||||
Environment.Exit(1);
|
Environment.Exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex);
|
var info = PortAudio.GetDeviceInfo(deviceIndex);
|
||||||
Console.WriteLine();
|
Console.WriteLine();
|
||||||
Console.WriteLine($"Use output default device {deviceIndex} ({info.name})");
|
Console.WriteLine($"Use output default device {deviceIndex} ({info.name})");
|
||||||
|
|
||||||
StreamParameters param = new StreamParameters();
|
var param = new StreamParameters();
|
||||||
param.device = deviceIndex;
|
param.device = deviceIndex;
|
||||||
param.channelCount = 1;
|
param.channelCount = 1;
|
||||||
param.sampleFormat = SampleFormat.Float32;
|
param.sampleFormat = SampleFormat.Float32;
|
||||||
@@ -178,7 +173,7 @@ to download more models.
|
|||||||
param.hostApiSpecificStreamInfo = IntPtr.Zero;
|
param.hostApiSpecificStreamInfo = IntPtr.Zero;
|
||||||
|
|
||||||
// https://learn.microsoft.com/en-us/dotnet/standard/collections/thread-safe/blockingcollection-overview
|
// https://learn.microsoft.com/en-us/dotnet/standard/collections/thread-safe/blockingcollection-overview
|
||||||
BlockingCollection<float[]> dataItems = new BlockingCollection<float[]>();
|
var dataItems = new BlockingCollection<float[]>();
|
||||||
|
|
||||||
var MyCallback = (IntPtr samples, int n) =>
|
var MyCallback = (IntPtr samples, int n) =>
|
||||||
{
|
{
|
||||||
@@ -193,9 +188,9 @@ to download more models.
|
|||||||
return 1;
|
return 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
bool playFinished = false;
|
var playFinished = false;
|
||||||
|
|
||||||
float[] lastSampleArray = null;
|
float[]? lastSampleArray = null;
|
||||||
int lastIndex = 0; // not played
|
int lastIndex = 0; // not played
|
||||||
|
|
||||||
PortAudioSharp.Stream.Callback playCallback = (IntPtr input, IntPtr output,
|
PortAudioSharp.Stream.Callback playCallback = (IntPtr input, IntPtr output,
|
||||||
@@ -270,10 +265,10 @@ to download more models.
|
|||||||
|
|
||||||
stream.Start();
|
stream.Start();
|
||||||
|
|
||||||
OfflineTtsCallback callback = new OfflineTtsCallback(MyCallback);
|
var callback = new OfflineTtsCallback(MyCallback);
|
||||||
|
|
||||||
OfflineTtsGeneratedAudio audio = tts.GenerateWithCallback(options.Text, speed, sid, callback);
|
var audio = tts.GenerateWithCallback(options.Text, speed, sid, callback);
|
||||||
bool ok = audio.SaveToWaveFile(options.OutputFilename);
|
var ok = audio.SaveToWaveFile(options.OutputFilename);
|
||||||
|
|
||||||
if (ok)
|
if (ok)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<OutputType>Exe</OutputType>
|
<OutputType>Exe</OutputType>
|
||||||
<TargetFramework>net6.0</TargetFramework>
|
<TargetFramework>net8.0</TargetFramework>
|
||||||
<RootNamespace>offline_tts_play</RootNamespace>
|
<RootNamespace>offline_tts_play</RootNamespace>
|
||||||
<ImplicitUsings>enable</ImplicitUsings>
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
<Nullable>enable</Nullable>
|
<Nullable>enable</Nullable>
|
||||||
|
|||||||
@@ -6,28 +6,25 @@
|
|||||||
// and
|
// and
|
||||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
|
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
|
||||||
// to download pre-trained models
|
// to download pre-trained models
|
||||||
using CommandLine.Text;
|
|
||||||
using CommandLine;
|
using CommandLine;
|
||||||
|
using CommandLine.Text;
|
||||||
using SherpaOnnx;
|
using SherpaOnnx;
|
||||||
using System.Collections.Generic;
|
|
||||||
using System;
|
|
||||||
|
|
||||||
class OfflineTtsDemo
|
class OfflineTtsDemo
|
||||||
{
|
{
|
||||||
class Options
|
class Options
|
||||||
{
|
{
|
||||||
|
|
||||||
[Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")]
|
[Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")]
|
||||||
public string RuleFsts { get; set; } = "";
|
public string RuleFsts { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("tts-rule-fars", Required = false, Default = "", HelpText = "path to rule.far")]
|
[Option("tts-rule-fars", Required = false, Default = "", HelpText = "path to rule.far")]
|
||||||
public string RuleFars { get; set; } = "";
|
public string RuleFars { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("vits-dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")]
|
[Option("vits-dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")]
|
||||||
public string DictDir { get; set; } = "";
|
public string DictDir { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
|
[Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
|
||||||
public string DataDir { get; set; } = "";
|
public string DataDir { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("vits-length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")]
|
[Option("vits-length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")]
|
||||||
public float LengthScale { get; set; } = 1;
|
public float LengthScale { get; set; } = 1;
|
||||||
@@ -39,10 +36,10 @@ class OfflineTtsDemo
|
|||||||
public float NoiseScaleW { get; set; } = 0.8F;
|
public float NoiseScaleW { get; set; } = 0.8F;
|
||||||
|
|
||||||
[Option("vits-lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")]
|
[Option("vits-lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")]
|
||||||
public string Lexicon { get; set; } = "";
|
public string Lexicon { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("vits-tokens", Required = false, Default = "", HelpText = "Path to tokens.txt")]
|
[Option("vits-tokens", Required = false, Default = "", HelpText = "Path to tokens.txt")]
|
||||||
public string Tokens { get; set; } = "";
|
public string Tokens { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")]
|
[Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")]
|
||||||
public int MaxNumSentences { get; set; } = 1;
|
public int MaxNumSentences { get; set; } = 1;
|
||||||
@@ -51,13 +48,13 @@ class OfflineTtsDemo
|
|||||||
public int Debug { get; set; } = 0;
|
public int Debug { get; set; } = 0;
|
||||||
|
|
||||||
[Option("vits-model", Required = true, HelpText = "Path to VITS model")]
|
[Option("vits-model", Required = true, HelpText = "Path to VITS model")]
|
||||||
public string Model { get; set; } = "";
|
public string Model { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")]
|
[Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")]
|
||||||
public int SpeakerId { get; set; } = 0;
|
public int SpeakerId { get; set; } = 0;
|
||||||
|
|
||||||
[Option("text", Required = true, HelpText = "Text to synthesize")]
|
[Option("text", Required = true, HelpText = "Text to synthesize")]
|
||||||
public string Text { get; set; } = "";
|
public string Text { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")]
|
[Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")]
|
||||||
public string OutputFilename { get; set; } = "./generated.wav";
|
public string OutputFilename { get; set; } = "./generated.wav";
|
||||||
@@ -65,7 +62,7 @@ class OfflineTtsDemo
|
|||||||
|
|
||||||
static void Main(string[] args)
|
static void Main(string[] args)
|
||||||
{
|
{
|
||||||
var parser = new CommandLine.Parser(with => with.HelpWriter = null);
|
var parser = new Parser(with => with.HelpWriter = null);
|
||||||
var parserResult = parser.ParseArguments<Options>(args);
|
var parserResult = parser.ParseArguments<Options>(args);
|
||||||
|
|
||||||
parserResult
|
parserResult
|
||||||
@@ -75,7 +72,7 @@ class OfflineTtsDemo
|
|||||||
|
|
||||||
private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
|
private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
|
||||||
{
|
{
|
||||||
string usage = @"
|
var usage = @"
|
||||||
# vits-aishell3
|
# vits-aishell3
|
||||||
|
|
||||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
|
||||||
@@ -122,7 +119,7 @@ to download more models.
|
|||||||
|
|
||||||
private static void Run(Options options)
|
private static void Run(Options options)
|
||||||
{
|
{
|
||||||
OfflineTtsConfig config = new OfflineTtsConfig();
|
var config = new OfflineTtsConfig();
|
||||||
config.Model.Vits.Model = options.Model;
|
config.Model.Vits.Model = options.Model;
|
||||||
config.Model.Vits.Lexicon = options.Lexicon;
|
config.Model.Vits.Lexicon = options.Lexicon;
|
||||||
config.Model.Vits.Tokens = options.Tokens;
|
config.Model.Vits.Tokens = options.Tokens;
|
||||||
@@ -138,11 +135,11 @@ to download more models.
|
|||||||
config.RuleFars = options.RuleFars;
|
config.RuleFars = options.RuleFars;
|
||||||
config.MaxNumSentences = options.MaxNumSentences;
|
config.MaxNumSentences = options.MaxNumSentences;
|
||||||
|
|
||||||
OfflineTts tts = new OfflineTts(config);
|
var tts = new OfflineTts(config);
|
||||||
float speed = 1.0f / options.LengthScale;
|
var speed = 1.0f / options.LengthScale;
|
||||||
int sid = options.SpeakerId;
|
var sid = options.SpeakerId;
|
||||||
OfflineTtsGeneratedAudio audio = tts.Generate(options.Text, speed, sid);
|
var audio = tts.Generate(options.Text, speed, sid);
|
||||||
bool ok = audio.SaveToWaveFile(options.OutputFilename);
|
var ok = audio.SaveToWaveFile(options.OutputFilename);
|
||||||
|
|
||||||
if (ok)
|
if (ok)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<OutputType>Exe</OutputType>
|
<OutputType>Exe</OutputType>
|
||||||
<TargetFramework>net6.0</TargetFramework>
|
<TargetFramework>net8.0</TargetFramework>
|
||||||
<RootNamespace>offline_tts</RootNamespace>
|
<RootNamespace>offline_tts</RootNamespace>
|
||||||
<ImplicitUsings>enable</ImplicitUsings>
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
<Nullable>enable</Nullable>
|
<Nullable>enable</Nullable>
|
||||||
|
|||||||
@@ -6,40 +6,37 @@
|
|||||||
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html
|
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html
|
||||||
// to download streaming models
|
// to download streaming models
|
||||||
|
|
||||||
using CommandLine.Text;
|
|
||||||
using CommandLine;
|
using CommandLine;
|
||||||
|
using CommandLine.Text;
|
||||||
using SherpaOnnx;
|
using SherpaOnnx;
|
||||||
using System.Collections.Generic;
|
|
||||||
using System.Linq;
|
|
||||||
using System;
|
|
||||||
|
|
||||||
class OnlineDecodeFiles
|
class OnlineDecodeFiles
|
||||||
{
|
{
|
||||||
class Options
|
class Options
|
||||||
{
|
{
|
||||||
[Option(Required = true, HelpText = "Path to tokens.txt")]
|
[Option(Required = true, HelpText = "Path to tokens.txt")]
|
||||||
public string Tokens { get; set; } = "";
|
public string Tokens { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")]
|
[Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")]
|
||||||
public string Provider { get; set; } = "";
|
public string Provider { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option(Required = false, HelpText = "Path to transducer encoder.onnx")]
|
[Option(Required = false, HelpText = "Path to transducer encoder.onnx")]
|
||||||
public string Encoder { get; set; } = "";
|
public string Encoder { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option(Required = false, HelpText = "Path to transducer decoder.onnx")]
|
[Option(Required = false, HelpText = "Path to transducer decoder.onnx")]
|
||||||
public string Decoder { get; set; } = "";
|
public string Decoder { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option(Required = false, HelpText = "Path to transducer joiner.onnx")]
|
[Option(Required = false, HelpText = "Path to transducer joiner.onnx")]
|
||||||
public string Joiner { get; set; } = "";
|
public string Joiner { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")]
|
[Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")]
|
||||||
public string ParaformerEncoder { get; set; } = "";
|
public string ParaformerEncoder { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")]
|
[Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")]
|
||||||
public string ParaformerDecoder { get; set; } = "";
|
public string ParaformerDecoder { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("zipformer2-ctc", Required = false, HelpText = "Path to zipformer2 CTC onnx model")]
|
[Option("zipformer2-ctc", Required = false, HelpText = "Path to zipformer2 CTC onnx model")]
|
||||||
public string Zipformer2Ctc { get; set; } = "";
|
public string Zipformer2Ctc { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
|
[Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
|
||||||
public int NumThreads { get; set; } = 1;
|
public int NumThreads { get; set; } = 1;
|
||||||
@@ -80,15 +77,14 @@ larger than this value. Used only when --enable-endpoint is true.")]
|
|||||||
public float Rule3MinUtteranceLength { get; set; } = 20.0F;
|
public float Rule3MinUtteranceLength { get; set; } = 20.0F;
|
||||||
|
|
||||||
[Option("hotwords-file", Required = false, Default = "", HelpText = "Path to hotwords.txt")]
|
[Option("hotwords-file", Required = false, Default = "", HelpText = "Path to hotwords.txt")]
|
||||||
public string HotwordsFile { get; set; } = "";
|
public string HotwordsFile { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")]
|
[Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")]
|
||||||
public float HotwordsScore { get; set; } = 1.5F;
|
public float HotwordsScore { get; set; } = 1.5F;
|
||||||
|
|
||||||
[Option("rule-fsts", Required = false, Default = "",
|
[Option("rule-fsts", Required = false, Default = "",
|
||||||
HelpText = "If not empty, path to rule fst for inverse text normalization")]
|
HelpText = "If not empty, path to rule fst for inverse text normalization")]
|
||||||
public string RuleFsts { get; set; } = "";
|
public string RuleFsts { get; set; } = string.Empty;
|
||||||
|
|
||||||
|
|
||||||
[Option("files", Required = true, HelpText = "Audio files for decoding")]
|
[Option("files", Required = true, HelpText = "Audio files for decoding")]
|
||||||
public IEnumerable<string> Files { get; set; } = new string[] {};
|
public IEnumerable<string> Files { get; set; } = new string[] {};
|
||||||
@@ -162,7 +158,7 @@ to download pre-trained streaming models.
|
|||||||
|
|
||||||
private static void Run(Options options)
|
private static void Run(Options options)
|
||||||
{
|
{
|
||||||
OnlineRecognizerConfig config = new OnlineRecognizerConfig();
|
var config = new OnlineRecognizerConfig();
|
||||||
config.FeatConfig.SampleRate = options.SampleRate;
|
config.FeatConfig.SampleRate = options.SampleRate;
|
||||||
|
|
||||||
// All models from icefall using feature dim 80.
|
// All models from icefall using feature dim 80.
|
||||||
@@ -194,22 +190,22 @@ to download pre-trained streaming models.
|
|||||||
config.HotwordsScore = options.HotwordsScore;
|
config.HotwordsScore = options.HotwordsScore;
|
||||||
config.RuleFsts = options.RuleFsts;
|
config.RuleFsts = options.RuleFsts;
|
||||||
|
|
||||||
OnlineRecognizer recognizer = new OnlineRecognizer(config);
|
var recognizer = new OnlineRecognizer(config);
|
||||||
|
|
||||||
string[] files = options.Files.ToArray();
|
var files = options.Files.ToArray();
|
||||||
|
|
||||||
// We create a separate stream for each file
|
// We create a separate stream for each file
|
||||||
List<OnlineStream> streams = new List<OnlineStream>();
|
var streams = new List<OnlineStream>();
|
||||||
streams.EnsureCapacity(files.Length);
|
streams.EnsureCapacity(files.Length);
|
||||||
|
|
||||||
for (int i = 0; i != files.Length; ++i)
|
for (int i = 0; i != files.Length; ++i)
|
||||||
{
|
{
|
||||||
OnlineStream s = recognizer.CreateStream();
|
var s = recognizer.CreateStream();
|
||||||
|
|
||||||
WaveReader waveReader = new WaveReader(files[i]);
|
var waveReader = new WaveReader(files[i]);
|
||||||
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
|
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
|
||||||
|
|
||||||
float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
|
var tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
|
||||||
s.AcceptWaveform(waveReader.SampleRate, tailPadding);
|
s.AcceptWaveform(waveReader.SampleRate, tailPadding);
|
||||||
s.InputFinished();
|
s.InputFinished();
|
||||||
|
|
||||||
@@ -230,7 +226,7 @@ to download pre-trained streaming models.
|
|||||||
// display results
|
// display results
|
||||||
for (int i = 0; i != files.Length; ++i)
|
for (int i = 0; i != files.Length; ++i)
|
||||||
{
|
{
|
||||||
OnlineRecognizerResult r = recognizer.GetResult(streams[i]);
|
var r = recognizer.GetResult(streams[i]);
|
||||||
var text = r.Text;
|
var text = r.Text;
|
||||||
var tokens = r.Tokens;
|
var tokens = r.Tokens;
|
||||||
Console.WriteLine("--------------------");
|
Console.WriteLine("--------------------");
|
||||||
@@ -238,7 +234,7 @@ to download pre-trained streaming models.
|
|||||||
Console.WriteLine("text: {0}", text);
|
Console.WriteLine("text: {0}", text);
|
||||||
Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens));
|
Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens));
|
||||||
Console.Write("timestamps: [");
|
Console.Write("timestamps: [");
|
||||||
r.Timestamps.ToList().ForEach(i => Console.Write(String.Format("{0:0.00}", i) + ", "));
|
r.Timestamps.ToList().ForEach(i => Console.Write(string.Format("{0:0.00}", i) + ", "));
|
||||||
Console.WriteLine("]");
|
Console.WriteLine("]");
|
||||||
}
|
}
|
||||||
Console.WriteLine("--------------------");
|
Console.WriteLine("--------------------");
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<OutputType>Exe</OutputType>
|
<OutputType>Exe</OutputType>
|
||||||
<TargetFramework>net6.0</TargetFramework>
|
<TargetFramework>net8.0</TargetFramework>
|
||||||
<RootNamespace>online_decode_files</RootNamespace>
|
<RootNamespace>online_decode_files</RootNamespace>
|
||||||
<ImplicitUsings>enable</ImplicitUsings>
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
<Nullable>enable</Nullable>
|
<Nullable>enable</Nullable>
|
||||||
|
|||||||
@@ -29,9 +29,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-files
|
|||||||
EndProject
|
EndProject
|
||||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-microphone", "keyword-spotting-from-microphone\keyword-spotting-from-microphone.csproj", "{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}"
|
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-microphone", "keyword-spotting-from-microphone\keyword-spotting-from-microphone.csproj", "{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}"
|
||||||
EndProject
|
EndProject
|
||||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TTS", "TTS\TTS.csproj", "{DACE4A18-4FC8-4437-92BF-5A90BA81286C}"
|
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "offline-speaker-diarization", "offline-speaker-diarization\offline-speaker-diarization.csproj", "{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}"
|
||||||
EndProject
|
|
||||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-speaker-diarization", "offline-speaker-diarization\offline-speaker-diarization.csproj", "{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}"
|
|
||||||
EndProject
|
EndProject
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
@@ -91,10 +89,6 @@ Global
|
|||||||
{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.Build.0 = Release|Any CPU
|
{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
|
||||||
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
|
||||||
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
|
||||||
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.Build.0 = Release|Any CPU
|
|
||||||
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
|||||||
@@ -16,20 +16,18 @@
|
|||||||
// dotnet run
|
// dotnet run
|
||||||
|
|
||||||
using SherpaOnnx;
|
using SherpaOnnx;
|
||||||
using System.Collections.Generic;
|
|
||||||
using System;
|
|
||||||
|
|
||||||
class SpeakerIdentificationDemo
|
class SpeakerIdentificationDemo
|
||||||
{
|
{
|
||||||
public static float[] ComputeEmbedding(SpeakerEmbeddingExtractor extractor, String filename)
|
public static float[] ComputeEmbedding(SpeakerEmbeddingExtractor extractor, string filename)
|
||||||
{
|
{
|
||||||
WaveReader reader = new WaveReader(filename);
|
var reader = new WaveReader(filename);
|
||||||
|
|
||||||
OnlineStream stream = extractor.CreateStream();
|
var stream = extractor.CreateStream();
|
||||||
stream.AcceptWaveform(reader.SampleRate, reader.Samples);
|
stream.AcceptWaveform(reader.SampleRate, reader.Samples);
|
||||||
stream.InputFinished();
|
stream.InputFinished();
|
||||||
|
|
||||||
float[] embedding = extractor.Compute(stream);
|
var embedding = extractor.Compute(stream);
|
||||||
|
|
||||||
return embedding;
|
return embedding;
|
||||||
}
|
}
|
||||||
@@ -43,25 +41,25 @@ class SpeakerIdentificationDemo
|
|||||||
|
|
||||||
var manager = new SpeakerEmbeddingManager(extractor.Dim);
|
var manager = new SpeakerEmbeddingManager(extractor.Dim);
|
||||||
|
|
||||||
string[] spk1Files =
|
var spk1Files =
|
||||||
new string[] {
|
new string[] {
|
||||||
"./sr-data/enroll/fangjun-sr-1.wav",
|
"./sr-data/enroll/fangjun-sr-1.wav",
|
||||||
"./sr-data/enroll/fangjun-sr-2.wav",
|
"./sr-data/enroll/fangjun-sr-2.wav",
|
||||||
"./sr-data/enroll/fangjun-sr-3.wav",
|
"./sr-data/enroll/fangjun-sr-3.wav",
|
||||||
};
|
};
|
||||||
float[][] spk1Vec = new float[spk1Files.Length][];
|
var spk1Vec = new float[spk1Files.Length][];
|
||||||
|
|
||||||
for (int i = 0; i < spk1Files.Length; ++i)
|
for (int i = 0; i < spk1Files.Length; ++i)
|
||||||
{
|
{
|
||||||
spk1Vec[i] = ComputeEmbedding(extractor, spk1Files[i]);
|
spk1Vec[i] = ComputeEmbedding(extractor, spk1Files[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
string[] spk2Files =
|
var spk2Files =
|
||||||
new string[] {
|
new string[] {
|
||||||
"./sr-data/enroll/leijun-sr-1.wav", "./sr-data/enroll/leijun-sr-2.wav",
|
"./sr-data/enroll/leijun-sr-1.wav", "./sr-data/enroll/leijun-sr-2.wav",
|
||||||
};
|
};
|
||||||
|
|
||||||
float[][] spk2Vec = new float[spk2Files.Length][];
|
var spk2Vec = new float[spk2Files.Length][];
|
||||||
|
|
||||||
for (int i = 0; i < spk2Files.Length; ++i)
|
for (int i = 0; i < spk2Files.Length; ++i)
|
||||||
{
|
{
|
||||||
@@ -100,14 +98,14 @@ class SpeakerIdentificationDemo
|
|||||||
|
|
||||||
Console.WriteLine("---All speakers---");
|
Console.WriteLine("---All speakers---");
|
||||||
|
|
||||||
string[] allSpeakers = manager.GetAllSpeakers();
|
var allSpeakers = manager.GetAllSpeakers();
|
||||||
foreach (var s in allSpeakers)
|
foreach (var s in allSpeakers)
|
||||||
{
|
{
|
||||||
Console.WriteLine(s);
|
Console.WriteLine(s);
|
||||||
}
|
}
|
||||||
Console.WriteLine("------------");
|
Console.WriteLine("------------");
|
||||||
|
|
||||||
string[] testFiles =
|
var testFiles =
|
||||||
new string[] {
|
new string[] {
|
||||||
"./sr-data/test/fangjun-test-sr-1.wav",
|
"./sr-data/test/fangjun-test-sr-1.wav",
|
||||||
"./sr-data/test/leijun-test-sr-1.wav",
|
"./sr-data/test/leijun-test-sr-1.wav",
|
||||||
@@ -117,9 +115,9 @@ class SpeakerIdentificationDemo
|
|||||||
float threshold = 0.6f;
|
float threshold = 0.6f;
|
||||||
foreach (var file in testFiles)
|
foreach (var file in testFiles)
|
||||||
{
|
{
|
||||||
float[] embedding = ComputeEmbedding(extractor, file);
|
var embedding = ComputeEmbedding(extractor, file);
|
||||||
|
|
||||||
String name = manager.Search(embedding, threshold);
|
var name = manager.Search(embedding, threshold);
|
||||||
if (name == "")
|
if (name == "")
|
||||||
{
|
{
|
||||||
name = "<Unknown>";
|
name = "<Unknown>";
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<OutputType>Exe</OutputType>
|
<OutputType>Exe</OutputType>
|
||||||
<TargetFramework>net6.0</TargetFramework>
|
<TargetFramework>net8.0</TargetFramework>
|
||||||
<RootNamespace>speaker_identification</RootNamespace>
|
<RootNamespace>speaker_identification</RootNamespace>
|
||||||
<ImplicitUsings>enable</ImplicitUsings>
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
<Nullable>enable</Nullable>
|
<Nullable>enable</Nullable>
|
||||||
|
|||||||
@@ -6,47 +6,43 @@
|
|||||||
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html
|
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html
|
||||||
// to download streaming models
|
// to download streaming models
|
||||||
|
|
||||||
using CommandLine.Text;
|
|
||||||
using CommandLine;
|
using CommandLine;
|
||||||
|
using CommandLine.Text;
|
||||||
using PortAudioSharp;
|
using PortAudioSharp;
|
||||||
using System.Threading;
|
|
||||||
using SherpaOnnx;
|
using SherpaOnnx;
|
||||||
using System.Collections.Generic;
|
|
||||||
using System.Runtime.InteropServices;
|
using System.Runtime.InteropServices;
|
||||||
using System;
|
|
||||||
|
|
||||||
|
|
||||||
class SpeechRecognitionFromMicrophone
|
class SpeechRecognitionFromMicrophone
|
||||||
{
|
{
|
||||||
class Options
|
class Options
|
||||||
{
|
{
|
||||||
[Option(Required = true, HelpText = "Path to tokens.txt")]
|
[Option(Required = true, HelpText = "Path to tokens.txt")]
|
||||||
public string Tokens { get; set; }
|
public string? Tokens { get; set; }
|
||||||
|
|
||||||
[Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")]
|
[Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")]
|
||||||
public string Provider { get; set; }
|
public string? Provider { get; set; }
|
||||||
|
|
||||||
[Option(Required = false, HelpText = "Path to transducer encoder.onnx")]
|
[Option(Required = false, HelpText = "Path to transducer encoder.onnx")]
|
||||||
public string Encoder { get; set; }
|
public string? Encoder { get; set; }
|
||||||
|
|
||||||
[Option(Required = false, HelpText = "Path to transducer decoder.onnx")]
|
[Option(Required = false, HelpText = "Path to transducer decoder.onnx")]
|
||||||
public string Decoder { get; set; }
|
public string? Decoder { get; set; }
|
||||||
|
|
||||||
[Option(Required = false, HelpText = "Path to transducer joiner.onnx")]
|
[Option(Required = false, HelpText = "Path to transducer joiner.onnx")]
|
||||||
public string Joiner { get; set; }
|
public string? Joiner { get; set; }
|
||||||
|
|
||||||
[Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")]
|
[Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")]
|
||||||
public string ParaformerEncoder { get; set; }
|
public string? ParaformerEncoder { get; set; }
|
||||||
|
|
||||||
[Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")]
|
[Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")]
|
||||||
public string ParaformerDecoder { get; set; }
|
public string? ParaformerDecoder { get; set; }
|
||||||
|
|
||||||
[Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
|
[Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
|
||||||
public int NumThreads { get; set; }
|
public int NumThreads { get; set; }
|
||||||
|
|
||||||
[Option("decoding-method", Required = false, Default = "greedy_search",
|
[Option("decoding-method", Required = false, Default = "greedy_search",
|
||||||
HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")]
|
HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")]
|
||||||
public string DecodingMethod { get; set; }
|
public string? DecodingMethod { get; set; }
|
||||||
|
|
||||||
[Option(Required = false, Default = false, HelpText = "True to show model info during loading")]
|
[Option(Required = false, Default = false, HelpText = "True to show model info during loading")]
|
||||||
public bool Debug { get; set; }
|
public bool Debug { get; set; }
|
||||||
@@ -126,7 +122,7 @@ to download pre-trained streaming models.
|
|||||||
|
|
||||||
private static void Run(Options options)
|
private static void Run(Options options)
|
||||||
{
|
{
|
||||||
OnlineRecognizerConfig config = new OnlineRecognizerConfig();
|
var config = new OnlineRecognizerConfig();
|
||||||
config.FeatConfig.SampleRate = options.SampleRate;
|
config.FeatConfig.SampleRate = options.SampleRate;
|
||||||
|
|
||||||
// All models from icefall using feature dim 80.
|
// All models from icefall using feature dim 80.
|
||||||
@@ -153,9 +149,9 @@ to download pre-trained streaming models.
|
|||||||
config.Rule2MinTrailingSilence = options.Rule2MinTrailingSilence;
|
config.Rule2MinTrailingSilence = options.Rule2MinTrailingSilence;
|
||||||
config.Rule3MinUtteranceLength = options.Rule3MinUtteranceLength;
|
config.Rule3MinUtteranceLength = options.Rule3MinUtteranceLength;
|
||||||
|
|
||||||
OnlineRecognizer recognizer = new OnlineRecognizer(config);
|
var recognizer = new OnlineRecognizer(config);
|
||||||
|
|
||||||
OnlineStream s = recognizer.CreateStream();
|
var s = recognizer.CreateStream();
|
||||||
|
|
||||||
Console.WriteLine(PortAudio.VersionInfo.versionText);
|
Console.WriteLine(PortAudio.VersionInfo.versionText);
|
||||||
PortAudio.Initialize();
|
PortAudio.Initialize();
|
||||||
@@ -176,12 +172,12 @@ to download pre-trained streaming models.
|
|||||||
Environment.Exit(1);
|
Environment.Exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex);
|
var info = PortAudio.GetDeviceInfo(deviceIndex);
|
||||||
|
|
||||||
Console.WriteLine();
|
Console.WriteLine();
|
||||||
Console.WriteLine($"Use default device {deviceIndex} ({info.name})");
|
Console.WriteLine($"Use default device {deviceIndex} ({info.name})");
|
||||||
|
|
||||||
StreamParameters param = new StreamParameters();
|
var param = new StreamParameters();
|
||||||
param.device = deviceIndex;
|
param.device = deviceIndex;
|
||||||
param.channelCount = 1;
|
param.channelCount = 1;
|
||||||
param.sampleFormat = SampleFormat.Float32;
|
param.sampleFormat = SampleFormat.Float32;
|
||||||
@@ -189,14 +185,14 @@ to download pre-trained streaming models.
|
|||||||
param.hostApiSpecificStreamInfo = IntPtr.Zero;
|
param.hostApiSpecificStreamInfo = IntPtr.Zero;
|
||||||
|
|
||||||
PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output,
|
PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output,
|
||||||
UInt32 frameCount,
|
uint frameCount,
|
||||||
ref StreamCallbackTimeInfo timeInfo,
|
ref StreamCallbackTimeInfo timeInfo,
|
||||||
StreamCallbackFlags statusFlags,
|
StreamCallbackFlags statusFlags,
|
||||||
IntPtr userData
|
IntPtr userData
|
||||||
) =>
|
) =>
|
||||||
{
|
{
|
||||||
float[] samples = new float[frameCount];
|
var samples = new float[frameCount];
|
||||||
Marshal.Copy(input, samples, 0, (Int32)frameCount);
|
Marshal.Copy(input, samples, 0, (int)frameCount);
|
||||||
|
|
||||||
s.AcceptWaveform(options.SampleRate, samples);
|
s.AcceptWaveform(options.SampleRate, samples);
|
||||||
|
|
||||||
@@ -215,7 +211,7 @@ to download pre-trained streaming models.
|
|||||||
|
|
||||||
stream.Start();
|
stream.Start();
|
||||||
|
|
||||||
String lastText = "";
|
var lastText = string.Empty;
|
||||||
int segmentIndex = 0;
|
int segmentIndex = 0;
|
||||||
|
|
||||||
while (true)
|
while (true)
|
||||||
@@ -245,9 +241,5 @@ to download pre-trained streaming models.
|
|||||||
|
|
||||||
Thread.Sleep(200); // ms
|
Thread.Sleep(200); // ms
|
||||||
}
|
}
|
||||||
|
|
||||||
PortAudio.Terminate();
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<OutputType>Exe</OutputType>
|
<OutputType>Exe</OutputType>
|
||||||
<TargetFramework>net6.0</TargetFramework>
|
<TargetFramework>net8.0</TargetFramework>
|
||||||
<RootNamespace>speech_recognition_from_microphone</RootNamespace>
|
<RootNamespace>speech_recognition_from_microphone</RootNamespace>
|
||||||
<ImplicitUsings>enable</ImplicitUsings>
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
<Nullable>enable</Nullable>
|
<Nullable>enable</Nullable>
|
||||||
|
|||||||
@@ -15,12 +15,9 @@
|
|||||||
// dotnet run
|
// dotnet run
|
||||||
|
|
||||||
using SherpaOnnx;
|
using SherpaOnnx;
|
||||||
using System.Collections.Generic;
|
|
||||||
using System;
|
|
||||||
|
|
||||||
class SpokenLanguageIdentificationDemo
|
class SpokenLanguageIdentificationDemo
|
||||||
{
|
{
|
||||||
|
|
||||||
static void Main(string[] args)
|
static void Main(string[] args)
|
||||||
{
|
{
|
||||||
var config = new SpokenLanguageIdentificationConfig();
|
var config = new SpokenLanguageIdentificationConfig();
|
||||||
@@ -30,7 +27,7 @@ class SpokenLanguageIdentificationDemo
|
|||||||
var slid = new SpokenLanguageIdentification(config);
|
var slid = new SpokenLanguageIdentification(config);
|
||||||
var filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav";
|
var filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav";
|
||||||
|
|
||||||
WaveReader waveReader = new WaveReader(filename);
|
var waveReader = new WaveReader(filename);
|
||||||
|
|
||||||
var s = slid.CreateStream();
|
var s = slid.CreateStream();
|
||||||
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
|
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<OutputType>Exe</OutputType>
|
<OutputType>Exe</OutputType>
|
||||||
<TargetFramework>net6.0</TargetFramework>
|
<TargetFramework>net8.0</TargetFramework>
|
||||||
<RootNamespace>spoken_language_identification</RootNamespace>
|
<RootNamespace>spoken_language_identification</RootNamespace>
|
||||||
<ImplicitUsings>enable</ImplicitUsings>
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
<Nullable>enable</Nullable>
|
<Nullable>enable</Nullable>
|
||||||
|
|||||||
@@ -13,12 +13,9 @@
|
|||||||
// dotnet run
|
// dotnet run
|
||||||
|
|
||||||
using SherpaOnnx;
|
using SherpaOnnx;
|
||||||
using System.Collections.Generic;
|
|
||||||
using System;
|
|
||||||
|
|
||||||
class StreamingHlgDecodingDemo
|
class StreamingHlgDecodingDemo
|
||||||
{
|
{
|
||||||
|
|
||||||
static void Main(string[] args)
|
static void Main(string[] args)
|
||||||
{
|
{
|
||||||
var config = new OnlineRecognizerConfig();
|
var config = new OnlineRecognizerConfig();
|
||||||
@@ -32,15 +29,15 @@ class StreamingHlgDecodingDemo
|
|||||||
config.ModelConfig.Debug = 0;
|
config.ModelConfig.Debug = 0;
|
||||||
config.CtcFstDecoderConfig.Graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst";
|
config.CtcFstDecoderConfig.Graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst";
|
||||||
|
|
||||||
OnlineRecognizer recognizer = new OnlineRecognizer(config);
|
var recognizer = new OnlineRecognizer(config);
|
||||||
|
|
||||||
var filename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav";
|
var filename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav";
|
||||||
|
|
||||||
WaveReader waveReader = new WaveReader(filename);
|
var waveReader = new WaveReader(filename);
|
||||||
OnlineStream s = recognizer.CreateStream();
|
var s = recognizer.CreateStream();
|
||||||
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
|
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
|
||||||
|
|
||||||
float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
|
var tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
|
||||||
s.AcceptWaveform(waveReader.SampleRate, tailPadding);
|
s.AcceptWaveform(waveReader.SampleRate, tailPadding);
|
||||||
s.InputFinished();
|
s.InputFinished();
|
||||||
|
|
||||||
@@ -49,7 +46,7 @@ class StreamingHlgDecodingDemo
|
|||||||
recognizer.Decode(s);
|
recognizer.Decode(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
OnlineRecognizerResult r = recognizer.GetResult(s);
|
var r = recognizer.GetResult(s);
|
||||||
var text = r.Text;
|
var text = r.Text;
|
||||||
var tokens = r.Tokens;
|
var tokens = r.Tokens;
|
||||||
Console.WriteLine("--------------------");
|
Console.WriteLine("--------------------");
|
||||||
@@ -57,10 +54,8 @@ class StreamingHlgDecodingDemo
|
|||||||
Console.WriteLine("text: {0}", text);
|
Console.WriteLine("text: {0}", text);
|
||||||
Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens));
|
Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens));
|
||||||
Console.Write("timestamps: [");
|
Console.Write("timestamps: [");
|
||||||
r.Timestamps.ToList().ForEach(i => Console.Write(String.Format("{0:0.00}", i) + ", "));
|
r.Timestamps.ToList().ForEach(i => Console.Write(string.Format("{0:0.00}", i) + ", "));
|
||||||
Console.WriteLine("]");
|
Console.WriteLine("]");
|
||||||
Console.WriteLine("--------------------");
|
Console.WriteLine("--------------------");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<OutputType>Exe</OutputType>
|
<OutputType>Exe</OutputType>
|
||||||
<TargetFramework>net6.0</TargetFramework>
|
<TargetFramework>net8.0</TargetFramework>
|
||||||
<RootNamespace>streaming_hlg_decoding</RootNamespace>
|
<RootNamespace>streaming_hlg_decoding</RootNamespace>
|
||||||
<ImplicitUsings>enable</ImplicitUsings>
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
<Nullable>enable</Nullable>
|
<Nullable>enable</Nullable>
|
||||||
|
|||||||
@@ -3,8 +3,6 @@
|
|||||||
// This file shows how to use a silero_vad model with a non-streaming Paraformer
|
// This file shows how to use a silero_vad model with a non-streaming Paraformer
|
||||||
// for speech recognition.
|
// for speech recognition.
|
||||||
using SherpaOnnx;
|
using SherpaOnnx;
|
||||||
using System.Collections.Generic;
|
|
||||||
using System;
|
|
||||||
|
|
||||||
class VadNonStreamingAsrParaformer
|
class VadNonStreamingAsrParaformer
|
||||||
{
|
{
|
||||||
@@ -12,45 +10,49 @@ class VadNonStreamingAsrParaformer
|
|||||||
{
|
{
|
||||||
// please download model files from
|
// please download model files from
|
||||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
OfflineRecognizerConfig config = new OfflineRecognizerConfig();
|
var config = new OfflineRecognizerConfig();
|
||||||
config.ModelConfig.Paraformer.Model = "./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx";
|
config.ModelConfig.Paraformer.Model = "./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx";
|
||||||
config.ModelConfig.Tokens = "./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt";
|
config.ModelConfig.Tokens = "./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt";
|
||||||
config.ModelConfig.Debug = 0;
|
config.ModelConfig.Debug = 0;
|
||||||
OfflineRecognizer recognizer = new OfflineRecognizer(config);
|
var recognizer = new OfflineRecognizer(config);
|
||||||
|
|
||||||
VadModelConfig vadModelConfig = new VadModelConfig();
|
var vadModelConfig = new VadModelConfig();
|
||||||
vadModelConfig.SileroVad.Model = "./silero_vad.onnx";
|
vadModelConfig.SileroVad.Model = "./silero_vad.onnx";
|
||||||
vadModelConfig.Debug = 0;
|
vadModelConfig.Debug = 0;
|
||||||
|
|
||||||
VoiceActivityDetector vad = new VoiceActivityDetector(vadModelConfig, 60);
|
var vad = new VoiceActivityDetector(vadModelConfig, 60);
|
||||||
|
|
||||||
string testWaveFilename = "./lei-jun-test.wav";
|
var testWaveFilename = "./lei-jun-test.wav";
|
||||||
WaveReader reader = new WaveReader(testWaveFilename);
|
var reader = new WaveReader(testWaveFilename);
|
||||||
|
|
||||||
int numSamples = reader.Samples.Length;
|
int numSamples = reader.Samples.Length;
|
||||||
int windowSize = vadModelConfig.SileroVad.WindowSize;
|
int windowSize = vadModelConfig.SileroVad.WindowSize;
|
||||||
int sampleRate = vadModelConfig.SampleRate;
|
int sampleRate = vadModelConfig.SampleRate;
|
||||||
int numIter = numSamples / windowSize;
|
int numIter = numSamples / windowSize;
|
||||||
|
|
||||||
for (int i = 0; i != numIter; ++i) {
|
for (int i = 0; i != numIter; ++i)
|
||||||
|
{
|
||||||
int start = i * windowSize;
|
int start = i * windowSize;
|
||||||
float[] samples = new float[windowSize];
|
var samples = new float[windowSize];
|
||||||
Array.Copy(reader.Samples, start, samples, 0, windowSize);
|
Array.Copy(reader.Samples, start, samples, 0, windowSize);
|
||||||
vad.AcceptWaveform(samples);
|
vad.AcceptWaveform(samples);
|
||||||
if (vad.IsSpeechDetected()) {
|
if (vad.IsSpeechDetected())
|
||||||
while (!vad.IsEmpty()) {
|
{
|
||||||
|
while (!vad.IsEmpty())
|
||||||
|
{
|
||||||
SpeechSegment segment = vad.Front();
|
SpeechSegment segment = vad.Front();
|
||||||
float startTime = segment.Start / (float)sampleRate;
|
var startTime = segment.Start / (float)sampleRate;
|
||||||
float duration = segment.Samples.Length / (float)sampleRate;
|
var duration = segment.Samples.Length / (float)sampleRate;
|
||||||
|
|
||||||
OfflineStream stream = recognizer.CreateStream();
|
OfflineStream stream = recognizer.CreateStream();
|
||||||
stream.AcceptWaveform(sampleRate, segment.Samples);
|
stream.AcceptWaveform(sampleRate, segment.Samples);
|
||||||
recognizer.Decode(stream);
|
recognizer.Decode(stream);
|
||||||
String text = stream.Result.Text;
|
var text = stream.Result.Text;
|
||||||
|
|
||||||
if (!String.IsNullOrEmpty(text)) {
|
if (!string.IsNullOrEmpty(text))
|
||||||
Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime),
|
{
|
||||||
String.Format("{0:0.00}", startTime+duration), text);
|
Console.WriteLine("{0}--{1}: {2}", string.Format("{0:0.00}", startTime),
|
||||||
|
string.Format("{0:0.00}", startTime + duration), text);
|
||||||
}
|
}
|
||||||
|
|
||||||
vad.Pop();
|
vad.Pop();
|
||||||
@@ -60,19 +62,21 @@ class VadNonStreamingAsrParaformer
|
|||||||
|
|
||||||
vad.Flush();
|
vad.Flush();
|
||||||
|
|
||||||
while (!vad.IsEmpty()) {
|
while (!vad.IsEmpty())
|
||||||
SpeechSegment segment = vad.Front();
|
{
|
||||||
|
var segment = vad.Front();
|
||||||
float startTime = segment.Start / (float)sampleRate;
|
float startTime = segment.Start / (float)sampleRate;
|
||||||
float duration = segment.Samples.Length / (float)sampleRate;
|
float duration = segment.Samples.Length / (float)sampleRate;
|
||||||
|
|
||||||
OfflineStream stream = recognizer.CreateStream();
|
var stream = recognizer.CreateStream();
|
||||||
stream.AcceptWaveform(sampleRate, segment.Samples);
|
stream.AcceptWaveform(sampleRate, segment.Samples);
|
||||||
recognizer.Decode(stream);
|
recognizer.Decode(stream);
|
||||||
String text = stream.Result.Text;
|
var text = stream.Result.Text;
|
||||||
|
|
||||||
if (!String.IsNullOrEmpty(text)) {
|
if (!string.IsNullOrEmpty(text))
|
||||||
Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime),
|
{
|
||||||
String.Format("{0:0.00}", startTime+duration), text);
|
Console.WriteLine("{0}--{1}: {2}", string.Format("{0:0.00}", startTime),
|
||||||
|
string.Format("{0:0.00}", startTime + duration), text);
|
||||||
}
|
}
|
||||||
|
|
||||||
vad.Pop();
|
vad.Pop();
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<OutputType>Exe</OutputType>
|
<OutputType>Exe</OutputType>
|
||||||
<TargetFramework>net6.0</TargetFramework>
|
<TargetFramework>net8.0</TargetFramework>
|
||||||
<RootNamespace>vad_non_streaming_asr_paraformer</RootNamespace>
|
<RootNamespace>vad_non_streaming_asr_paraformer</RootNamespace>
|
||||||
<ImplicitUsings>enable</ImplicitUsings>
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
<Nullable>enable</Nullable>
|
<Nullable>enable</Nullable>
|
||||||
|
|||||||
Reference in New Issue
Block a user