Refactor .Net example project (#1049)
Co-authored-by: 东风破 <birdfishs@163.com>
This commit is contained in:
3
dotnet-examples/.gitignore
vendored
3
dotnet-examples/.gitignore
vendored
@@ -1,3 +1,6 @@
|
||||
bin
|
||||
obj
|
||||
v17
|
||||
.vs
|
||||
!*.sh
|
||||
*.vsidx
|
||||
|
||||
13
dotnet-examples/Common/Common.csproj
Normal file
13
dotnet-examples/Common/Common.csproj
Normal file
@@ -0,0 +1,13 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net6.0</TargetFramework>
|
||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||
<RestoreSources>/tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="CommandLineParser" Version="2.9.1" />
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="1.10.1" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -1,174 +1,174 @@
|
||||
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
using System;
|
||||
using System.IO;
|
||||
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace SherpaOnnx
|
||||
{
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public struct WaveHeader
|
||||
{
|
||||
public Int32 ChunkID;
|
||||
public Int32 ChunkSize;
|
||||
public Int32 Format;
|
||||
public Int32 SubChunk1ID;
|
||||
public Int32 SubChunk1Size;
|
||||
public Int16 AudioFormat;
|
||||
public Int16 NumChannels;
|
||||
public Int32 SampleRate;
|
||||
public Int32 ByteRate;
|
||||
public Int16 BlockAlign;
|
||||
public Int16 BitsPerSample;
|
||||
public Int32 SubChunk2ID;
|
||||
public Int32 SubChunk2Size;
|
||||
|
||||
public bool Validate()
|
||||
{
|
||||
if (ChunkID != 0x46464952)
|
||||
{
|
||||
Console.WriteLine($"Invalid chunk ID: 0x{ChunkID:X}. Expect 0x46464952");
|
||||
return false;
|
||||
}
|
||||
|
||||
// E V A W
|
||||
if (Format != 0x45564157)
|
||||
{
|
||||
Console.WriteLine($"Invalid format: 0x{Format:X}. Expect 0x45564157");
|
||||
return false;
|
||||
}
|
||||
|
||||
// t m f
|
||||
if (SubChunk1ID != 0x20746d66)
|
||||
{
|
||||
Console.WriteLine($"Invalid SubChunk1ID: 0x{SubChunk1ID:X}. Expect 0x20746d66");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (SubChunk1Size != 16)
|
||||
{
|
||||
Console.WriteLine($"Invalid SubChunk1Size: {SubChunk1Size}. Expect 16");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (AudioFormat != 1)
|
||||
{
|
||||
Console.WriteLine($"Invalid AudioFormat: {AudioFormat}. Expect 1");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (NumChannels != 1)
|
||||
{
|
||||
Console.WriteLine($"Invalid NumChannels: {NumChannels}. Expect 1");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ByteRate != (SampleRate * NumChannels * BitsPerSample / 8))
|
||||
{
|
||||
Console.WriteLine($"Invalid byte rate: {ByteRate}.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (BlockAlign != (NumChannels * BitsPerSample / 8))
|
||||
{
|
||||
Console.WriteLine($"Invalid block align: {ByteRate}.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (BitsPerSample != 16)
|
||||
{ // we support only 16 bits per sample
|
||||
Console.WriteLine($"Invalid bits per sample: {BitsPerSample}. Expect 16");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// It supports only 16-bit, single channel WAVE format.
|
||||
// The sample rate can be any value.
|
||||
public class WaveReader
|
||||
{
|
||||
public WaveReader(String fileName)
|
||||
{
|
||||
if (!File.Exists(fileName))
|
||||
{
|
||||
throw new ApplicationException($"{fileName} does not exist!");
|
||||
}
|
||||
|
||||
using (var stream = File.Open(fileName, FileMode.Open))
|
||||
{
|
||||
using (var reader = new BinaryReader(stream))
|
||||
{
|
||||
_header = ReadHeader(reader);
|
||||
|
||||
if (!_header.Validate())
|
||||
{
|
||||
throw new ApplicationException($"Invalid wave file ${fileName}");
|
||||
}
|
||||
|
||||
SkipMetaData(reader);
|
||||
|
||||
// now read samples
|
||||
// _header.SubChunk2Size contains number of bytes in total.
|
||||
// we assume each sample is of type int16
|
||||
byte[] buffer = reader.ReadBytes(_header.SubChunk2Size);
|
||||
short[] samples_int16 = new short[_header.SubChunk2Size / 2];
|
||||
Buffer.BlockCopy(buffer, 0, samples_int16, 0, buffer.Length);
|
||||
|
||||
_samples = new float[samples_int16.Length];
|
||||
|
||||
for (var i = 0; i < samples_int16.Length; ++i)
|
||||
{
|
||||
_samples[i] = samples_int16[i] / 32768.0F;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static WaveHeader ReadHeader(BinaryReader reader)
|
||||
{
|
||||
byte[] bytes = reader.ReadBytes(Marshal.SizeOf(typeof(WaveHeader)));
|
||||
|
||||
GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned);
|
||||
WaveHeader header = (WaveHeader)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(WaveHeader))!;
|
||||
handle.Free();
|
||||
|
||||
return header;
|
||||
}
|
||||
|
||||
private void SkipMetaData(BinaryReader reader)
|
||||
{
|
||||
var bs = reader.BaseStream;
|
||||
|
||||
Int32 subChunk2ID = _header.SubChunk2ID;
|
||||
Int32 subChunk2Size = _header.SubChunk2Size;
|
||||
|
||||
while (bs.Position != bs.Length && subChunk2ID != 0x61746164)
|
||||
{
|
||||
bs.Seek(subChunk2Size, SeekOrigin.Current);
|
||||
subChunk2ID = reader.ReadInt32();
|
||||
subChunk2Size = reader.ReadInt32();
|
||||
}
|
||||
_header.SubChunk2ID = subChunk2ID;
|
||||
_header.SubChunk2Size = subChunk2Size;
|
||||
}
|
||||
|
||||
private WaveHeader _header;
|
||||
|
||||
// Samples are normalized to the range [-1, 1]
|
||||
private float[] _samples;
|
||||
|
||||
public int SampleRate => _header.SampleRate;
|
||||
public float[] Samples => _samples;
|
||||
|
||||
public static void Test(String fileName)
|
||||
{
|
||||
WaveReader reader = new WaveReader(fileName);
|
||||
Console.WriteLine($"samples length: {reader.Samples.Length}");
|
||||
Console.WriteLine($"samples rate: {reader.SampleRate}");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
using System;
|
||||
using System.IO;
|
||||
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace SherpaOnnx
|
||||
{
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public struct WaveHeader
|
||||
{
|
||||
public Int32 ChunkID;
|
||||
public Int32 ChunkSize;
|
||||
public Int32 Format;
|
||||
public Int32 SubChunk1ID;
|
||||
public Int32 SubChunk1Size;
|
||||
public Int16 AudioFormat;
|
||||
public Int16 NumChannels;
|
||||
public Int32 SampleRate;
|
||||
public Int32 ByteRate;
|
||||
public Int16 BlockAlign;
|
||||
public Int16 BitsPerSample;
|
||||
public Int32 SubChunk2ID;
|
||||
public Int32 SubChunk2Size;
|
||||
|
||||
public bool Validate()
|
||||
{
|
||||
if (ChunkID != 0x46464952)
|
||||
{
|
||||
Console.WriteLine($"Invalid chunk ID: 0x{ChunkID:X}. Expect 0x46464952");
|
||||
return false;
|
||||
}
|
||||
|
||||
// E V A W
|
||||
if (Format != 0x45564157)
|
||||
{
|
||||
Console.WriteLine($"Invalid format: 0x{Format:X}. Expect 0x45564157");
|
||||
return false;
|
||||
}
|
||||
|
||||
// t m f
|
||||
if (SubChunk1ID != 0x20746d66)
|
||||
{
|
||||
Console.WriteLine($"Invalid SubChunk1ID: 0x{SubChunk1ID:X}. Expect 0x20746d66");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (SubChunk1Size != 16)
|
||||
{
|
||||
Console.WriteLine($"Invalid SubChunk1Size: {SubChunk1Size}. Expect 16");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (AudioFormat != 1)
|
||||
{
|
||||
Console.WriteLine($"Invalid AudioFormat: {AudioFormat}. Expect 1");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (NumChannels != 1)
|
||||
{
|
||||
Console.WriteLine($"Invalid NumChannels: {NumChannels}. Expect 1");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ByteRate != (SampleRate * NumChannels * BitsPerSample / 8))
|
||||
{
|
||||
Console.WriteLine($"Invalid byte rate: {ByteRate}.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (BlockAlign != (NumChannels * BitsPerSample / 8))
|
||||
{
|
||||
Console.WriteLine($"Invalid block align: {ByteRate}.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (BitsPerSample != 16)
|
||||
{ // we support only 16 bits per sample
|
||||
Console.WriteLine($"Invalid bits per sample: {BitsPerSample}. Expect 16");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// It supports only 16-bit, single channel WAVE format.
|
||||
// The sample rate can be any value.
|
||||
public class WaveReader
|
||||
{
|
||||
public WaveReader(String fileName)
|
||||
{
|
||||
if (!File.Exists(fileName))
|
||||
{
|
||||
throw new ApplicationException($"{fileName} does not exist!");
|
||||
}
|
||||
|
||||
using (var stream = File.Open(fileName, FileMode.Open))
|
||||
{
|
||||
using (var reader = new BinaryReader(stream))
|
||||
{
|
||||
_header = ReadHeader(reader);
|
||||
|
||||
if (!_header.Validate())
|
||||
{
|
||||
throw new ApplicationException($"Invalid wave file ${fileName}");
|
||||
}
|
||||
|
||||
SkipMetaData(reader);
|
||||
|
||||
// now read samples
|
||||
// _header.SubChunk2Size contains number of bytes in total.
|
||||
// we assume each sample is of type int16
|
||||
byte[] buffer = reader.ReadBytes(_header.SubChunk2Size);
|
||||
short[] samples_int16 = new short[_header.SubChunk2Size / 2];
|
||||
Buffer.BlockCopy(buffer, 0, samples_int16, 0, buffer.Length);
|
||||
|
||||
_samples = new float[samples_int16.Length];
|
||||
|
||||
for (var i = 0; i < samples_int16.Length; ++i)
|
||||
{
|
||||
_samples[i] = samples_int16[i] / 32768.0F;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static WaveHeader ReadHeader(BinaryReader reader)
|
||||
{
|
||||
byte[] bytes = reader.ReadBytes(Marshal.SizeOf(typeof(WaveHeader)));
|
||||
|
||||
GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned);
|
||||
WaveHeader header = (WaveHeader)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(WaveHeader))!;
|
||||
handle.Free();
|
||||
|
||||
return header;
|
||||
}
|
||||
|
||||
private void SkipMetaData(BinaryReader reader)
|
||||
{
|
||||
var bs = reader.BaseStream;
|
||||
|
||||
Int32 subChunk2ID = _header.SubChunk2ID;
|
||||
Int32 subChunk2Size = _header.SubChunk2Size;
|
||||
|
||||
while (bs.Position != bs.Length && subChunk2ID != 0x61746164)
|
||||
{
|
||||
bs.Seek(subChunk2Size, SeekOrigin.Current);
|
||||
subChunk2ID = reader.ReadInt32();
|
||||
subChunk2Size = reader.ReadInt32();
|
||||
}
|
||||
_header.SubChunk2ID = subChunk2ID;
|
||||
_header.SubChunk2Size = subChunk2Size;
|
||||
}
|
||||
|
||||
private WaveHeader _header;
|
||||
|
||||
// Samples are normalized to the range [-1, 1]
|
||||
private float[] _samples;
|
||||
|
||||
public int SampleRate => _header.SampleRate;
|
||||
public float[] Samples => _samples;
|
||||
|
||||
public static void Test(String fileName)
|
||||
{
|
||||
WaveReader reader = new WaveReader(fileName);
|
||||
Console.WriteLine($"samples length: {reader.Samples.Length}");
|
||||
Console.WriteLine($"samples rate: {reader.SampleRate}");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1 +0,0 @@
|
||||
../online-decode-files/WaveReader.cs
|
||||
@@ -9,8 +9,7 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="CommandLineParser" Version="2.9.1" />
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
|
||||
<ProjectReference Include="..\Common\Common.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
@@ -1,43 +1,43 @@
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
//
|
||||
// This file shows how to add punctuations to text.
|
||||
//
|
||||
// 1. Download a model from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models
|
||||
//
|
||||
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
|
||||
//
|
||||
// 3. Now run it
|
||||
//
|
||||
// dotnet run
|
||||
|
||||
using SherpaOnnx;
|
||||
using System.Collections.Generic;
|
||||
using System;
|
||||
|
||||
class OfflinePunctuationDemo
|
||||
{
|
||||
static void Main(string[] args)
|
||||
{
|
||||
var config = new OfflinePunctuationConfig();
|
||||
config.Model.CtTransformer = "./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx";
|
||||
config.Model.Debug = 1;
|
||||
config.Model.NumThreads = 1;
|
||||
var punct = new OfflinePunctuation(config);
|
||||
|
||||
string[] textList = new string[] {
|
||||
"这是一个测试你好吗How are you我很好thank you are you ok谢谢你",
|
||||
"我们都是木头人不会说话不会动",
|
||||
"The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
|
||||
};
|
||||
|
||||
Console.WriteLine("---------");
|
||||
foreach (string text in textList)
|
||||
{
|
||||
string textWithPunct = punct.AddPunct(text);
|
||||
Console.WriteLine("Input text: {0}", text);
|
||||
Console.WriteLine("Output text: {0}", textWithPunct);
|
||||
Console.WriteLine("---------");
|
||||
}
|
||||
}
|
||||
}
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
//
|
||||
// This file shows how to add punctuations to text.
|
||||
//
|
||||
// 1. Download a model from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models
|
||||
//
|
||||
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
|
||||
//
|
||||
// 3. Now run it
|
||||
//
|
||||
// dotnet run
|
||||
|
||||
using SherpaOnnx;
|
||||
using System.Collections.Generic;
|
||||
using System;
|
||||
|
||||
class OfflinePunctuationDemo
|
||||
{
|
||||
static void Main(string[] args)
|
||||
{
|
||||
var config = new OfflinePunctuationConfig();
|
||||
config.Model.CtTransformer = "./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx";
|
||||
config.Model.Debug = 1;
|
||||
config.Model.NumThreads = 1;
|
||||
var punct = new OfflinePunctuation(config);
|
||||
|
||||
string[] textList = new string[] {
|
||||
"这是一个测试你好吗How are you我很好thank you are you ok谢谢你",
|
||||
"我们都是木头人不会说话不会动",
|
||||
"The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
|
||||
};
|
||||
|
||||
Console.WriteLine("---------");
|
||||
foreach (string text in textList)
|
||||
{
|
||||
string textWithPunct = punct.AddPunct(text);
|
||||
Console.WriteLine("Input text: {0}", text);
|
||||
Console.WriteLine("Output text: {0}", textWithPunct);
|
||||
Console.WriteLine("---------");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net6.0</TargetFramework>
|
||||
<RootNamespace>offline_punctuation</RootNamespace>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net6.0</TargetFramework>
|
||||
<RootNamespace>offline_punctuation</RootNamespace>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Common\Common.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
0
dotnet-examples/offline-punctuation/run.sh
Executable file → Normal file
0
dotnet-examples/offline-punctuation/run.sh
Executable file → Normal file
@@ -9,9 +9,11 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="CommandLineParser" Version="2.9.1" />
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
|
||||
<PackageReference Include="PortAudioSharp2" Version="*" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Common\Common.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
@@ -9,8 +9,7 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="CommandLineParser" Version="2.9.1" />
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
|
||||
<ProjectReference Include="..\Common\Common.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
@@ -9,8 +9,7 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="CommandLineParser" Version="2.9.1" />
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
|
||||
<ProjectReference Include="..\Common\Common.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
@@ -3,34 +3,33 @@ Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Version 17
|
||||
VisualStudioVersion = 17.0.31903.59
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "online-decode-files", "online-decode-files\online-decode-files.csproj", "{45307474-BECB-4ABE-9388-D01D55A1A9BE}"
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "online-decode-files", "online-decode-files\online-decode-files.csproj", "{45307474-BECB-4ABE-9388-D01D55A1A9BE}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-decode-files", "offline-decode-files\offline-decode-files.csproj", "{2DAB152C-9E24-47A0-9DB0-781297ECE458}"
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "offline-decode-files", "offline-decode-files\offline-decode-files.csproj", "{2DAB152C-9E24-47A0-9DB0-781297ECE458}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speech-recognition-from-microphone", "speech-recognition-from-microphone\speech-recognition-from-microphone.csproj", "{FE4EA1FF-062A-46B3-B78D-C828FED7B82E}"
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "speech-recognition-from-microphone", "speech-recognition-from-microphone\speech-recognition-from-microphone.csproj", "{FE4EA1FF-062A-46B3-B78D-C828FED7B82E}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-tts", "offline-tts\offline-tts.csproj", "{72196886-7143-4043-96E2-BCACEC6C79EB}"
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "offline-tts", "offline-tts\offline-tts.csproj", "{72196886-7143-4043-96E2-BCACEC6C79EB}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-tts-play", "offline-tts-play\offline-tts-play.csproj", "{40781464-5948-462B-BA4B-98932711513F}"
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "offline-tts-play", "offline-tts-play\offline-tts-play.csproj", "{40781464-5948-462B-BA4B-98932711513F}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "spoken-language-identification", "spoken-language-identification\spoken-language-identification.csproj", "{3D7CF3D6-AC45-4D50-9619-5687B1443E94}"
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "spoken-language-identification", "spoken-language-identification\spoken-language-identification.csproj", "{3D7CF3D6-AC45-4D50-9619-5687B1443E94}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "streaming-hlg-decoding", "streaming-hlg-decoding\streaming-hlg-decoding.csproj", "{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}"
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "streaming-hlg-decoding", "streaming-hlg-decoding\streaming-hlg-decoding.csproj", "{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speaker-identification", "speaker-identification\speaker-identification.csproj", "{2B1B140E-A92F-426B-B0DF-5D916B67304F}"
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "speaker-identification", "speaker-identification\speaker-identification.csproj", "{2B1B140E-A92F-426B-B0DF-5D916B67304F}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-punctuation", "offline-punctuation\offline-punctuation.csproj", "{42D85582-BB63-4259-A4EA-837D66AC078B}"
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "offline-punctuation", "offline-punctuation\offline-punctuation.csproj", "{42D85582-BB63-4259-A4EA-837D66AC078B}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "vad-non-streaming-asr-paraformer", "vad-non-streaming-asr-paraformer\vad-non-streaming-asr-paraformer.csproj", "{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}"
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "vad-non-streaming-asr-paraformer", "vad-non-streaming-asr-paraformer\vad-non-streaming-asr-paraformer.csproj", "{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common", "Common\Common.csproj", "{401E963F-E25A-43CE-987D-8DB2D4715756}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
Release|Any CPU = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{45307474-BECB-4ABE-9388-D01D55A1A9BE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{45307474-BECB-4ABE-9388-D01D55A1A9BE}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
@@ -72,5 +71,15 @@ Global
|
||||
{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{401E963F-E25A-43CE-987D-8DB2D4715756}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{401E963F-E25A-43CE-987D-8DB2D4715756}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{401E963F-E25A-43CE-987D-8DB2D4715756}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{401E963F-E25A-43CE-987D-8DB2D4715756}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
SolutionGuid = {07A6023C-0A37-4F82-A29F-896A3A338EAC}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
../offline-decode-files/WaveReader.cs
|
||||
@@ -9,7 +9,7 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
|
||||
<ProjectReference Include="..\Common\Common.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
@@ -9,9 +9,11 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="CommandLineParser" Version="2.9.1" />
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
|
||||
<PackageReference Include="PortAudioSharp2" Version="*" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Common\Common.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
../offline-decode-files/WaveReader.cs
|
||||
@@ -9,7 +9,7 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
|
||||
<ProjectReference Include="..\Common\Common.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
../online-decode-files/WaveReader.cs
|
||||
@@ -9,7 +9,7 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
|
||||
<ProjectReference Include="..\Common\Common.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
@@ -1,62 +1,62 @@
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
//
|
||||
// This file shows how to use a silero_vad model with a non-streaming Paraformer
|
||||
// for speech recognition.
|
||||
using SherpaOnnx;
|
||||
using System.Collections.Generic;
|
||||
using System;
|
||||
|
||||
class VadNonStreamingAsrParaformer
|
||||
{
|
||||
static void Main(string[] args)
|
||||
{
|
||||
// please download model files from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||
OfflineRecognizerConfig config = new OfflineRecognizerConfig();
|
||||
config.ModelConfig.Paraformer.Model = "./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx";
|
||||
config.ModelConfig.Tokens = "./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt";
|
||||
config.ModelConfig.Debug = 0;
|
||||
OfflineRecognizer recognizer = new OfflineRecognizer(config);
|
||||
|
||||
VadModelConfig vadModelConfig = new VadModelConfig();
|
||||
vadModelConfig.SileroVad.Model = "./silero_vad.onnx";
|
||||
vadModelConfig.Debug = 0;
|
||||
|
||||
VoiceActivityDetector vad = new VoiceActivityDetector(vadModelConfig, 60);
|
||||
|
||||
string testWaveFilename = "./lei-jun-test.wav";
|
||||
WaveReader reader = new WaveReader(testWaveFilename);
|
||||
|
||||
int numSamples = reader.Samples.Length;
|
||||
int windowSize = vadModelConfig.SileroVad.WindowSize;
|
||||
int sampleRate = vadModelConfig.SampleRate;
|
||||
int numIter = numSamples / windowSize;
|
||||
|
||||
for (int i = 0; i != numIter; ++i) {
|
||||
int start = i * windowSize;
|
||||
float[] samples = new float[windowSize];
|
||||
Array.Copy(reader.Samples, start, samples, 0, windowSize);
|
||||
vad.AcceptWaveform(samples);
|
||||
if (vad.IsSpeechDetected()) {
|
||||
while (!vad.IsEmpty()) {
|
||||
SpeechSegment segment = vad.Front();
|
||||
float startTime = segment.Start / (float)sampleRate;
|
||||
float duration = segment.Samples.Length / (float)sampleRate;
|
||||
|
||||
OfflineStream stream = recognizer.CreateStream();
|
||||
stream.AcceptWaveform(sampleRate, segment.Samples);
|
||||
recognizer.Decode(stream);
|
||||
String text = stream.Result.Text;
|
||||
|
||||
if (!String.IsNullOrEmpty(text)) {
|
||||
Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime),
|
||||
String.Format("{0:0.00}", startTime+duration), text);
|
||||
}
|
||||
|
||||
vad.Pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
//
|
||||
// This file shows how to use a silero_vad model with a non-streaming Paraformer
|
||||
// for speech recognition.
|
||||
using SherpaOnnx;
|
||||
using System.Collections.Generic;
|
||||
using System;
|
||||
|
||||
class VadNonStreamingAsrParaformer
|
||||
{
|
||||
static void Main(string[] args)
|
||||
{
|
||||
// please download model files from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||
OfflineRecognizerConfig config = new OfflineRecognizerConfig();
|
||||
config.ModelConfig.Paraformer.Model = "./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx";
|
||||
config.ModelConfig.Tokens = "./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt";
|
||||
config.ModelConfig.Debug = 0;
|
||||
OfflineRecognizer recognizer = new OfflineRecognizer(config);
|
||||
|
||||
VadModelConfig vadModelConfig = new VadModelConfig();
|
||||
vadModelConfig.SileroVad.Model = "./silero_vad.onnx";
|
||||
vadModelConfig.Debug = 0;
|
||||
|
||||
VoiceActivityDetector vad = new VoiceActivityDetector(vadModelConfig, 60);
|
||||
|
||||
string testWaveFilename = "./lei-jun-test.wav";
|
||||
WaveReader reader = new WaveReader(testWaveFilename);
|
||||
|
||||
int numSamples = reader.Samples.Length;
|
||||
int windowSize = vadModelConfig.SileroVad.WindowSize;
|
||||
int sampleRate = vadModelConfig.SampleRate;
|
||||
int numIter = numSamples / windowSize;
|
||||
|
||||
for (int i = 0; i != numIter; ++i) {
|
||||
int start = i * windowSize;
|
||||
float[] samples = new float[windowSize];
|
||||
Array.Copy(reader.Samples, start, samples, 0, windowSize);
|
||||
vad.AcceptWaveform(samples);
|
||||
if (vad.IsSpeechDetected()) {
|
||||
while (!vad.IsEmpty()) {
|
||||
SpeechSegment segment = vad.Front();
|
||||
float startTime = segment.Start / (float)sampleRate;
|
||||
float duration = segment.Samples.Length / (float)sampleRate;
|
||||
|
||||
OfflineStream stream = recognizer.CreateStream();
|
||||
stream.AcceptWaveform(sampleRate, segment.Samples);
|
||||
recognizer.Decode(stream);
|
||||
String text = stream.Result.Text;
|
||||
|
||||
if (!String.IsNullOrEmpty(text)) {
|
||||
Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime),
|
||||
String.Format("{0:0.00}", startTime+duration), text);
|
||||
}
|
||||
|
||||
vad.Pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
../online-decode-files/WaveReader.cs
|
||||
0
dotnet-examples/vad-non-streaming-asr-paraformer/run.sh
Executable file → Normal file
0
dotnet-examples/vad-non-streaming-asr-paraformer/run.sh
Executable file → Normal file
@@ -1,15 +1,15 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net6.0</TargetFramework>
|
||||
<RootNamespace>vad_non_streaming_asr_paraformer</RootNamespace>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net6.0</TargetFramework>
|
||||
<RootNamespace>vad_non_streaming_asr_paraformer</RootNamespace>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Common\Common.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
Reference in New Issue
Block a user