Refactor C# code and support building nuget packages for cross-platforms (#144)
This commit is contained in:
13
dotnet-examples/.editorconfig
Normal file
13
dotnet-examples/.editorconfig
Normal file
@@ -0,0 +1,13 @@
|
||||
# top-most EditorConfig file
|
||||
root = true
|
||||
|
||||
# Don't use tabs for indentation.
|
||||
[*]
|
||||
indent_style = space
|
||||
|
||||
# Code files
|
||||
[*.{cs,csx,vb,vbx}]
|
||||
indent_size = 2
|
||||
insert_final_newline = true
|
||||
charset = utf-8-bom
|
||||
end_of_line = crlf
|
||||
2
dotnet-examples/.gitignore
vendored
Normal file
2
dotnet-examples/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
bin
|
||||
obj
|
||||
179
dotnet-examples/offline-decode-files/Program.cs
Normal file
179
dotnet-examples/offline-decode-files/Program.cs
Normal file
@@ -0,0 +1,179 @@
|
||||
// Copyright (c) 2023 Xiaomi Corporation
|
||||
// Copyright (c) 2023 by manyeyes
|
||||
//
|
||||
// This file shows how to use a non-streaming model to decode files
|
||||
// Please refer to
|
||||
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||
// to download non-streaming models
|
||||
using CommandLine.Text;
|
||||
using CommandLine;
|
||||
using SherpaOnnx;
|
||||
using System.Collections.Generic;
|
||||
using System;
|
||||
|
||||
class OfflineDecodeFiles
|
||||
{
|
||||
class Options
|
||||
{
|
||||
[Option(Required = false, HelpText = "Path to tokens.txt")]
|
||||
public string Tokens { get; set; }
|
||||
|
||||
[Option(Required = false, HelpText = "Path to encoder.onnx. Used only for transducer models")]
|
||||
public string Encoder { get; set; }
|
||||
|
||||
[Option(Required = false, HelpText = "Path to decoder.onnx. Used only for transducer models")]
|
||||
public string Decoder { get; set; }
|
||||
|
||||
[Option(Required = false, HelpText = "Path to joiner.onnx. Used only for transducer models")]
|
||||
public string Joiner { get; set; }
|
||||
|
||||
[Option(Required = false, HelpText = "Path to model.onnx. Used only for paraformer models")]
|
||||
public string Paraformer { get; set; }
|
||||
|
||||
[Option("nemo-ctc", Required = false, HelpText = "Path to model.onnx. Used only for NeMo CTC models")]
|
||||
public string NeMoCtc { get; set; }
|
||||
|
||||
[Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
|
||||
public int NumThreads { get; set; }
|
||||
|
||||
[Option("decoding-method", Required = false, Default = "greedy_search",
|
||||
HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")]
|
||||
public string DecodingMethod { get; set; }
|
||||
|
||||
[Option("max-active-paths", Required = false, Default = 4,
|
||||
HelpText = @"Used only when --decoding--method is modified_beam_search.
|
||||
It specifies number of active paths to keep during the search")]
|
||||
public int MaxActivePaths { get; set; }
|
||||
|
||||
[Option("files", Required = true, HelpText = "Audio files for decoding")]
|
||||
public IEnumerable<string> Files { get; set; }
|
||||
}
|
||||
|
||||
static void Main(string[] args)
|
||||
{
|
||||
var parser = new CommandLine.Parser(with => with.HelpWriter = null);
|
||||
var parserResult = parser.ParseArguments<Options>(args);
|
||||
|
||||
parserResult
|
||||
.WithParsed<Options>(options => Run(options))
|
||||
.WithNotParsed(errs => DisplayHelp(parserResult, errs));
|
||||
}
|
||||
|
||||
private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
|
||||
{
|
||||
string usage = @"
|
||||
# Zipformer
|
||||
|
||||
dotnet run \
|
||||
--tokens=./sherpa-onnx-zipformer-en-2023-04-01/tokens.txt \
|
||||
--encoder=./sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx \
|
||||
--decoder=./sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx \
|
||||
--joiner=./sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.onnx \
|
||||
--files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \
|
||||
./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/1.wav \
|
||||
./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/8k.wav
|
||||
|
||||
Please refer to
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/index.html
|
||||
to download pre-trained non-streaming zipformer models.
|
||||
|
||||
# Paraformer
|
||||
|
||||
dotnet run \
|
||||
--tokens=./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \
|
||||
--paraformer=./sherpa-onnx-paraformer-zh-2023-03-28/model.onnx \
|
||||
--files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \
|
||||
./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav \
|
||||
./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/1.wav \
|
||||
./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/2.wav \
|
||||
./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/8k.wav
|
||||
|
||||
Please refer to
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html
|
||||
to download pre-trained paraformer models
|
||||
|
||||
# NeMo CTC
|
||||
|
||||
dotnet run \
|
||||
--tokens=./sherpa-onnx-nemo-ctc-en-conformer-medium/tokens.txt \
|
||||
--nemo-ctc=./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \
|
||||
--num-threads=1 \
|
||||
--files ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/0.wav \
|
||||
./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/1.wav \
|
||||
./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/8k.wav
|
||||
|
||||
Please refer to
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html
|
||||
to download pre-trained paraformer models
|
||||
";
|
||||
|
||||
var helpText = HelpText.AutoBuild(result, h =>
|
||||
{
|
||||
h.AdditionalNewLineAfterOption = false;
|
||||
h.Heading = usage;
|
||||
h.Copyright = "Copyright (c) 2023 Xiaomi Corporation";
|
||||
return HelpText.DefaultParsingErrorsHandler(result, h);
|
||||
}, e => e);
|
||||
Console.WriteLine(helpText);
|
||||
}
|
||||
|
||||
private static void Run(Options options)
|
||||
{
|
||||
OfflineRecognizerConfig config = new OfflineRecognizerConfig();
|
||||
config.ModelConfig.Tokens = options.Tokens;
|
||||
|
||||
if (!String.IsNullOrEmpty(options.Encoder))
|
||||
{
|
||||
// this is a transducer model
|
||||
config.ModelConfig.Transducer.Encoder = options.Encoder;
|
||||
config.ModelConfig.Transducer.Decoder = options.Decoder;
|
||||
config.ModelConfig.Transducer.Joiner = options.Joiner;
|
||||
}
|
||||
else if (!String.IsNullOrEmpty(options.Paraformer))
|
||||
{
|
||||
config.ModelConfig.Paraformer.Model = options.Paraformer;
|
||||
}
|
||||
else if (!String.IsNullOrEmpty(options.NeMoCtc))
|
||||
{
|
||||
config.ModelConfig.NeMoCtc.Model = options.NeMoCtc;
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine("Please provide a model");
|
||||
return;
|
||||
}
|
||||
|
||||
config.DecodingMethod = options.DecodingMethod;
|
||||
config.MaxActivePaths = options.MaxActivePaths;
|
||||
config.ModelConfig.Debug = 0;
|
||||
|
||||
OfflineRecognizer recognizer = new OfflineRecognizer(config);
|
||||
|
||||
string[] files = options.Files.ToArray();
|
||||
|
||||
// We create a separate stream for each file
|
||||
List<OfflineStream> streams = new List<OfflineStream>();
|
||||
streams.EnsureCapacity(files.Length);
|
||||
|
||||
for (int i = 0; i != files.Length; ++i)
|
||||
{
|
||||
OfflineStream s = recognizer.CreateStream();
|
||||
|
||||
WaveReader waveReader = new WaveReader(files[i]);
|
||||
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
|
||||
streams.Add(s);
|
||||
}
|
||||
|
||||
recognizer.Decode(streams);
|
||||
|
||||
// display results
|
||||
for (int i = 0; i != files.Length; ++i)
|
||||
{
|
||||
var text = streams[i].Result.Text;
|
||||
Console.WriteLine("--------------------");
|
||||
Console.WriteLine(files[i]);
|
||||
Console.WriteLine(text);
|
||||
}
|
||||
Console.WriteLine("--------------------");
|
||||
}
|
||||
}
|
||||
1
dotnet-examples/offline-decode-files/WaveReader.cs
Symbolic link
1
dotnet-examples/offline-decode-files/WaveReader.cs
Symbolic link
@@ -0,0 +1 @@
|
||||
../online-decode-files/WaveReader.cs
|
||||
@@ -0,0 +1,16 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net6.0</TargetFramework>
|
||||
<RootNamespace>offline_decode_files</RootNamespace>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="CommandLineParser" Version="2.9.1" />
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
16
dotnet-examples/offline-decode-files/run-nemo-ctc.sh
Executable file
16
dotnet-examples/offline-decode-files/run-nemo-ctc.sh
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
if [ ! -d ./sherpa-onnx-nemo-ctc-en-conformer-medium ]; then
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-medium
|
||||
cd sherpa-onnx-nemo-ctc-en-conformer-medium
|
||||
git lfs pull --include "*.onnx"
|
||||
cd ..
|
||||
fi
|
||||
|
||||
dotnet run \
|
||||
--tokens=./sherpa-onnx-nemo-ctc-en-conformer-medium/tokens.txt \
|
||||
--nemo-ctc=./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \
|
||||
--num-threads=1 \
|
||||
--files ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/0.wav \
|
||||
./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/1.wav \
|
||||
./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/8k.wav
|
||||
17
dotnet-examples/offline-decode-files/run-paraformer.sh
Executable file
17
dotnet-examples/offline-decode-files/run-paraformer.sh
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
if [ ! -d ./sherpa-onnx-paraformer-zh-2023-03-28 ]; then
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28
|
||||
cd sherpa-onnx-paraformer-zh-2023-03-28
|
||||
git lfs pull --include "*.onnx"
|
||||
cd ..
|
||||
fi
|
||||
|
||||
dotnet run \
|
||||
--tokens=./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \
|
||||
--paraformer=./sherpa-onnx-paraformer-zh-2023-03-28/model.onnx \
|
||||
--num-threads=2 \
|
||||
--files ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav \
|
||||
./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/1.wav \
|
||||
./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/2.wav \
|
||||
./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/8k.wav
|
||||
19
dotnet-examples/offline-decode-files/run-zipformer.sh
Executable file
19
dotnet-examples/offline-decode-files/run-zipformer.sh
Executable file
@@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-04-01
|
||||
cd sherpa-onnx-zipformer-en-2023-04-01
|
||||
git lfs pull --include "*.onnx"
|
||||
cd ..
|
||||
fi
|
||||
|
||||
dotnet run \
|
||||
--tokens=./sherpa-onnx-zipformer-en-2023-04-01/tokens.txt \
|
||||
--encoder=./sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx \
|
||||
--decoder=./sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx \
|
||||
--joiner=./sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.onnx \
|
||||
--num-threads=2 \
|
||||
--decoding-method=modified_beam_search \
|
||||
--files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \
|
||||
./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/1.wav \
|
||||
./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/8k.wav
|
||||
181
dotnet-examples/online-decode-files/Program.cs
Normal file
181
dotnet-examples/online-decode-files/Program.cs
Normal file
@@ -0,0 +1,181 @@
|
||||
// Copyright (c) 2023 Xiaomi Corporation
|
||||
// Copyright (c) 2023 by manyeyes
|
||||
//
|
||||
// This file shows how to use a streaming model to decode files
|
||||
// Please refer to
|
||||
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html
|
||||
// to download streaming models
|
||||
|
||||
using CommandLine.Text;
|
||||
using CommandLine;
|
||||
using SherpaOnnx;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System;
|
||||
|
||||
class OnlineDecodeFiles
|
||||
{
|
||||
class Options
|
||||
{
|
||||
[Option(Required = true, HelpText = "Path to tokens.txt")]
|
||||
public string Tokens { get; set; }
|
||||
|
||||
[Option(Required = true, HelpText = "Path to encoder.onnx")]
|
||||
public string Encoder { get; set; }
|
||||
|
||||
[Option(Required = true, HelpText = "Path to decoder.onnx")]
|
||||
public string Decoder { get; set; }
|
||||
|
||||
[Option(Required = true, HelpText = "Path to joiner.onnx")]
|
||||
public string Joiner { get; set; }
|
||||
|
||||
[Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
|
||||
public int NumThreads { get; set; }
|
||||
|
||||
[Option("decoding-method", Required = false, Default = "greedy_search",
|
||||
HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")]
|
||||
public string DecodingMethod { get; set; }
|
||||
|
||||
[Option(Required = false, Default = false, HelpText = "True to show model info during loading")]
|
||||
public bool Debug { get; set; }
|
||||
|
||||
[Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")]
|
||||
public int SampleRate { get; set; }
|
||||
|
||||
[Option("max-active-paths", Required = false, Default = 4,
|
||||
HelpText = @"Used only when --decoding--method is modified_beam_search.
|
||||
It specifies number of active paths to keep during the search")]
|
||||
public int MaxActivePaths { get; set; }
|
||||
|
||||
[Option("enable-endpoint", Required = false, Default = false,
|
||||
HelpText = "True to enable endpoint detection.")]
|
||||
public bool EnableEndpoint { get; set; }
|
||||
|
||||
[Option("rule1-min-trailing-silence", Required = false, Default = 2.4F,
|
||||
HelpText = @"An endpoint is detected if trailing silence in seconds is
|
||||
larger than this value even if nothing has been decoded. Used only when --enable-endpoint is true.")]
|
||||
public float Rule1MinTrailingSilence { get; set; }
|
||||
|
||||
[Option("rule2-min-trailing-silence", Required = false, Default = 1.2F,
|
||||
HelpText = @"An endpoint is detected if trailing silence in seconds is
|
||||
larger than this value after something that is not blank has been decoded. Used
|
||||
only when --enable-endpoint is true.")]
|
||||
public float Rule2MinTrailingSilence { get; set; }
|
||||
|
||||
[Option("rule3-min-utterance-length", Required = false, Default = 20.0F,
|
||||
HelpText = @"An endpoint is detected if the utterance in seconds is
|
||||
larger than this value. Used only when --enable-endpoint is true.")]
|
||||
public float Rule3MinUtteranceLength { get; set; }
|
||||
|
||||
[Option("files", Required = true, HelpText = "Audio files for decoding")]
|
||||
public IEnumerable<string> Files { get; set; }
|
||||
|
||||
}
|
||||
|
||||
static void Main(string[] args)
|
||||
{
|
||||
var parser = new CommandLine.Parser(with => with.HelpWriter = null);
|
||||
var parserResult = parser.ParseArguments<Options>(args);
|
||||
|
||||
parserResult
|
||||
.WithParsed<Options>(options => Run(options))
|
||||
.WithNotParsed(errs => DisplayHelp(parserResult, errs));
|
||||
}
|
||||
|
||||
private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
|
||||
{
|
||||
string usage = @"
|
||||
dotnet run \
|
||||
--tokens=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
|
||||
--encoder=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx \
|
||||
--decoder=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
|
||||
--joiner=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx \
|
||||
--num-threads=2 \
|
||||
--decoding-method=modified_beam_search \
|
||||
--debug=false \
|
||||
./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav \
|
||||
./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav
|
||||
|
||||
Please refer to
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
|
||||
to download pre-trained streaming models.
|
||||
";
|
||||
|
||||
var helpText = HelpText.AutoBuild(result, h =>
|
||||
{
|
||||
h.AdditionalNewLineAfterOption = false;
|
||||
h.Heading = usage;
|
||||
h.Copyright = "Copyright (c) 2023 Xiaomi Corporation";
|
||||
return HelpText.DefaultParsingErrorsHandler(result, h);
|
||||
}, e => e);
|
||||
Console.WriteLine(helpText);
|
||||
}
|
||||
|
||||
private static void Run(Options options)
|
||||
{
|
||||
OnlineRecognizerConfig config = new OnlineRecognizerConfig();
|
||||
config.FeatConfig.SampleRate = options.SampleRate;
|
||||
|
||||
// All models from icefall using feature dim 80.
|
||||
// You can change it if your model has a different feature dim.
|
||||
config.FeatConfig.FeatureDim = 80;
|
||||
|
||||
config.TransducerModelConfig.Encoder = options.Encoder;
|
||||
config.TransducerModelConfig.Decoder = options.Decoder;
|
||||
config.TransducerModelConfig.Joiner = options.Joiner;
|
||||
config.TransducerModelConfig.Tokens = options.Tokens;
|
||||
config.TransducerModelConfig.NumThreads = options.NumThreads;
|
||||
config.TransducerModelConfig.Debug = options.Debug ? 1 : 0;
|
||||
|
||||
config.DecodingMethod = options.DecodingMethod;
|
||||
config.MaxActivePaths = options.MaxActivePaths;
|
||||
config.EnableEndpoint = options.EnableEndpoint ? 1 : 0;
|
||||
|
||||
config.Rule1MinTrailingSilence = options.Rule1MinTrailingSilence;
|
||||
config.Rule2MinTrailingSilence = options.Rule2MinTrailingSilence;
|
||||
config.Rule3MinUtteranceLength = options.Rule3MinUtteranceLength;
|
||||
|
||||
OnlineRecognizer recognizer = new OnlineRecognizer(config);
|
||||
|
||||
string[] files = options.Files.ToArray();
|
||||
|
||||
// We create a separate stream for each file
|
||||
List<OnlineStream> streams = new List<OnlineStream>();
|
||||
streams.EnsureCapacity(files.Length);
|
||||
|
||||
for (int i = 0; i != files.Length; ++i)
|
||||
{
|
||||
OnlineStream s = recognizer.CreateStream();
|
||||
|
||||
WaveReader waveReader = new WaveReader(files[i]);
|
||||
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
|
||||
|
||||
float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
|
||||
s.AcceptWaveform(waveReader.SampleRate, tailPadding);
|
||||
s.InputFinished();
|
||||
|
||||
streams.Add(s);
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
var readyStreams = streams.Where(s => recognizer.IsReady(s));
|
||||
if (!readyStreams.Any())
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
recognizer.Decode(readyStreams);
|
||||
}
|
||||
|
||||
// display results
|
||||
for (int i = 0; i != files.Length; ++i)
|
||||
{
|
||||
var text = recognizer.GetResult(streams[i]).Text;
|
||||
Console.WriteLine("--------------------");
|
||||
Console.WriteLine(files[i]);
|
||||
Console.WriteLine(text);
|
||||
}
|
||||
Console.WriteLine("--------------------");
|
||||
}
|
||||
}
|
||||
174
dotnet-examples/online-decode-files/WaveReader.cs
Normal file
174
dotnet-examples/online-decode-files/WaveReader.cs
Normal file
@@ -0,0 +1,174 @@
|
||||
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
using System;
|
||||
using System.IO;
|
||||
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace SherpaOnnx
|
||||
{
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public struct WaveHeader
|
||||
{
|
||||
public Int32 ChunkID;
|
||||
public Int32 ChunkSize;
|
||||
public Int32 Format;
|
||||
public Int32 SubChunk1ID;
|
||||
public Int32 SubChunk1Size;
|
||||
public Int16 AudioFormat;
|
||||
public Int16 NumChannels;
|
||||
public Int32 SampleRate;
|
||||
public Int32 ByteRate;
|
||||
public Int16 BlockAlign;
|
||||
public Int16 BitsPerSample;
|
||||
public Int32 SubChunk2ID;
|
||||
public Int32 SubChunk2Size;
|
||||
|
||||
public bool Validate()
|
||||
{
|
||||
if (ChunkID != 0x46464952)
|
||||
{
|
||||
Console.WriteLine($"Invalid chunk ID: 0x{ChunkID:X}. Expect 0x46464952");
|
||||
return false;
|
||||
}
|
||||
|
||||
// E V A W
|
||||
if (Format != 0x45564157)
|
||||
{
|
||||
Console.WriteLine($"Invalid format: 0x{Format:X}. Expect 0x45564157");
|
||||
return false;
|
||||
}
|
||||
|
||||
// t m f
|
||||
if (SubChunk1ID != 0x20746d66)
|
||||
{
|
||||
Console.WriteLine($"Invalid SubChunk1ID: 0x{SubChunk1ID:X}. Expect 0x20746d66");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (SubChunk1Size != 16)
|
||||
{
|
||||
Console.WriteLine($"Invalid SubChunk1Size: {SubChunk1Size}. Expect 16");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (AudioFormat != 1)
|
||||
{
|
||||
Console.WriteLine($"Invalid AudioFormat: {AudioFormat}. Expect 1");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (NumChannels != 1)
|
||||
{
|
||||
Console.WriteLine($"Invalid NumChannels: {NumChannels}. Expect 1");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ByteRate != (SampleRate * NumChannels * BitsPerSample / 8))
|
||||
{
|
||||
Console.WriteLine($"Invalid byte rate: {ByteRate}.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (BlockAlign != (NumChannels * BitsPerSample / 8))
|
||||
{
|
||||
Console.WriteLine($"Invalid block align: {ByteRate}.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (BitsPerSample != 16)
|
||||
{ // we support only 16 bits per sample
|
||||
Console.WriteLine($"Invalid bits per sample: {BitsPerSample}. Expect 16");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// It supports only 16-bit, single channel WAVE format.
|
||||
// The sample rate can be any value.
|
||||
public class WaveReader
|
||||
{
|
||||
public WaveReader(String fileName)
|
||||
{
|
||||
if (!File.Exists(fileName))
|
||||
{
|
||||
throw new ApplicationException($"{fileName} does not exist!");
|
||||
}
|
||||
|
||||
using (var stream = File.Open(fileName, FileMode.Open))
|
||||
{
|
||||
using (var reader = new BinaryReader(stream))
|
||||
{
|
||||
_header = ReadHeader(reader);
|
||||
|
||||
if (!_header.Validate())
|
||||
{
|
||||
throw new ApplicationException($"Invalid wave file ${fileName}");
|
||||
}
|
||||
|
||||
SkipMetaData(reader);
|
||||
|
||||
// now read samples
|
||||
// _header.SubChunk2Size contains number of bytes in total.
|
||||
// we assume each sample is of type int16
|
||||
byte[] buffer = reader.ReadBytes(_header.SubChunk2Size);
|
||||
short[] samples_int16 = new short[_header.SubChunk2Size / 2];
|
||||
Buffer.BlockCopy(buffer, 0, samples_int16, 0, buffer.Length);
|
||||
|
||||
_samples = new float[samples_int16.Length];
|
||||
|
||||
for (var i = 0; i < samples_int16.Length; ++i)
|
||||
{
|
||||
_samples[i] = samples_int16[i] / 32768.0F;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static WaveHeader ReadHeader(BinaryReader reader)
|
||||
{
|
||||
byte[] bytes = reader.ReadBytes(Marshal.SizeOf(typeof(WaveHeader)));
|
||||
|
||||
GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned);
|
||||
WaveHeader header = (WaveHeader)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(WaveHeader))!;
|
||||
handle.Free();
|
||||
|
||||
return header;
|
||||
}
|
||||
|
||||
private void SkipMetaData(BinaryReader reader)
|
||||
{
|
||||
var bs = reader.BaseStream;
|
||||
|
||||
Int32 subChunk2ID = _header.SubChunk2ID;
|
||||
Int32 subChunk2Size = _header.SubChunk2Size;
|
||||
|
||||
while (bs.Position != bs.Length && subChunk2ID != 0x61746164)
|
||||
{
|
||||
bs.Seek(subChunk2Size, SeekOrigin.Current);
|
||||
subChunk2ID = reader.ReadInt32();
|
||||
subChunk2Size = reader.ReadInt32();
|
||||
}
|
||||
_header.SubChunk2ID = subChunk2ID;
|
||||
_header.SubChunk2Size = subChunk2Size;
|
||||
}
|
||||
|
||||
private WaveHeader _header;
|
||||
|
||||
// Samples are normalized to the range [-1, 1]
|
||||
private float[] _samples;
|
||||
|
||||
public int SampleRate => _header.SampleRate;
|
||||
public float[] Samples => _samples;
|
||||
|
||||
public static void Test(String fileName)
|
||||
{
|
||||
WaveReader reader = new WaveReader(fileName);
|
||||
Console.WriteLine($"samples length: {reader.Samples.Length}");
|
||||
Console.WriteLine($"samples rate: {reader.SampleRate}");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net6.0</TargetFramework>
|
||||
<RootNamespace>online_decode_files</RootNamespace>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="CommandLineParser" Version="2.9.1" />
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
21
dotnet-examples/online-decode-files/run.sh
Executable file
21
dotnet-examples/online-decode-files/run.sh
Executable file
@@ -0,0 +1,21 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Please refer to
|
||||
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
|
||||
# to download the model files
|
||||
|
||||
if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
|
||||
cd sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
|
||||
git lfs pull --include "*.onnx"
|
||||
cd ..
|
||||
fi
|
||||
|
||||
dotnet run -c Release \
|
||||
--tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
|
||||
--encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \
|
||||
--decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx \
|
||||
--joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \
|
||||
--decoding-method greedy_search \
|
||||
--files ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav \
|
||||
./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav \
|
||||
28
dotnet-examples/sherpa-onnx.sln
Normal file
28
dotnet-examples/sherpa-onnx.sln
Normal file
@@ -0,0 +1,28 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Version 17
|
||||
VisualStudioVersion = 17.0.31903.59
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "online-decode-files", "online-decode-files\online-decode-files.csproj", "{45307474-BECB-4ABE-9388-D01D55A1A9BE}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-decode-files", "offline-decode-files\offline-decode-files.csproj", "{2DAB152C-9E24-47A0-9DB0-781297ECE458}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
Release|Any CPU = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{45307474-BECB-4ABE-9388-D01D55A1A9BE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{45307474-BECB-4ABE-9388-D01D55A1A9BE}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{45307474-BECB-4ABE-9388-D01D55A1A9BE}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{45307474-BECB-4ABE-9388-D01D55A1A9BE}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{2DAB152C-9E24-47A0-9DB0-781297ECE458}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{2DAB152C-9E24-47A0-9DB0-781297ECE458}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{2DAB152C-9E24-47A0-9DB0-781297ECE458}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{2DAB152C-9E24-47A0-9DB0-781297ECE458}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
Reference in New Issue
Block a user