Add a C# api for offline-recognizer of sherpa-onnx (#129)
This commit is contained in:
255
csharp-api-examples/OfflineDecodeFiles.cs
Normal file
255
csharp-api-examples/OfflineDecodeFiles.cs
Normal file
@@ -0,0 +1,255 @@
|
|||||||
|
// See https://aka.ms/new-console-template for more information
|
||||||
|
// Copyright (c) 2023 by manyeyes
|
||||||
|
using SherpaOnnx;
|
||||||
|
/// Please refer to
|
||||||
|
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||||
|
/// to download pre-trained models. That is, you can find encoder-xxx.onnx
|
||||||
|
/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct
|
||||||
|
/// from there.
|
||||||
|
|
||||||
|
/// download model eg:
|
||||||
|
/// (The directory where the application runs)
|
||||||
|
/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory
|
||||||
|
/// cd /path/to
|
||||||
|
/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-04-01
|
||||||
|
/// git clone https://huggingface.co/csukuangfj/paraformer-onnxruntime-python-example
|
||||||
|
/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-citrinet-512
|
||||||
|
|
||||||
|
/// NuGet for sherpa-onnx
|
||||||
|
/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx
|
||||||
|
/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx
|
||||||
|
|
||||||
|
// transducer Usage:
|
||||||
|
/*
|
||||||
|
.\SherpaOnnx.Examples.exe `
|
||||||
|
--tokens=./all_models/sherpa-onnx-conformer-en-2023-03-18/tokens.txt `
|
||||||
|
--encoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/encoder-epoch-99-avg-1.onnx `
|
||||||
|
--decoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/decoder-epoch-99-avg-1.onnx `
|
||||||
|
--joiner=./all_models/sherpa-onnx-conformer-en-2023-03-18/joiner-epoch-99-avg-1.onnx `
|
||||||
|
--num-threads=2 `
|
||||||
|
--decoding-method=greedy_search `
|
||||||
|
--debug=false `
|
||||||
|
./all_models/sherpa-onnx-conformer-en-2023-03-18/test_wavs/0.wav
|
||||||
|
*/
|
||||||
|
|
||||||
|
// paraformer Usage:
|
||||||
|
/*
|
||||||
|
.\SherpaOnnx.Examples.exe `
|
||||||
|
--tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt `
|
||||||
|
--paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx `
|
||||||
|
--num-threads=2 `
|
||||||
|
--decoding-method=greedy_search `
|
||||||
|
--debug=false `
|
||||||
|
./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav
|
||||||
|
*/
|
||||||
|
|
||||||
|
// paraformer Usage:
|
||||||
|
/*
|
||||||
|
.\SherpaOnnx.Examples.exe `
|
||||||
|
--tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt `
|
||||||
|
--paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx `
|
||||||
|
--num-threads=2 `
|
||||||
|
--decoding-method=greedy_search `
|
||||||
|
--debug=false `
|
||||||
|
./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
internal class OfflineDecodeFiles
|
||||||
|
{
|
||||||
|
static void Main(string[] args)
|
||||||
|
{
|
||||||
|
string usage = @"
|
||||||
|
-----------------------------
|
||||||
|
transducer Usage:
|
||||||
|
--tokens=./all_models/sherpa-onnx-conformer-en-2023-03-18/tokens.txt `
|
||||||
|
--encoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/encoder-epoch-99-avg-1.onnx `
|
||||||
|
--decoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/decoder-epoch-99-avg-1.onnx `
|
||||||
|
--joiner=./all_models/sherpa-onnx-conformer-en-2023-03-18/joiner-epoch-99-avg-1.onnx `
|
||||||
|
--num-threads=2 `
|
||||||
|
--decoding-method=greedy_search `
|
||||||
|
--debug=false `
|
||||||
|
./all_models/sherpa-onnx-conformer-en-2023-03-18/test_wavs/0.wav
|
||||||
|
|
||||||
|
paraformer Usage:
|
||||||
|
--tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt `
|
||||||
|
--paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx `
|
||||||
|
--num-threads=2 `
|
||||||
|
--decoding-method=greedy_search `
|
||||||
|
--debug=false `
|
||||||
|
./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav
|
||||||
|
|
||||||
|
nemo Usage:
|
||||||
|
--tokens=./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/tokens.txt `
|
||||||
|
--nemo_ctc=./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/model.onnx `
|
||||||
|
--num-threads=2 `
|
||||||
|
--decoding-method=greedy_search `
|
||||||
|
--debug=false `
|
||||||
|
./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/0.wav
|
||||||
|
-----------------------------
|
||||||
|
";
|
||||||
|
if (args.Length == 0)
|
||||||
|
{
|
||||||
|
System.Console.WriteLine("Please enter the correct parameters:");
|
||||||
|
System.Console.WriteLine(usage);
|
||||||
|
System.Text.StringBuilder sb = new System.Text.StringBuilder();
|
||||||
|
//args = Console.ReadLine().Split(" ");
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
string input = Console.ReadLine();
|
||||||
|
sb.AppendLine(input);
|
||||||
|
if (Console.ReadKey().Key == ConsoleKey.Enter)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
args = sb.ToString().Split("\r\n");
|
||||||
|
}
|
||||||
|
Console.WriteLine("Started!\n");
|
||||||
|
string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory;
|
||||||
|
List<string> wavFiles = new List<string>();
|
||||||
|
Dictionary<string, string> argsDict = GetDict(args, applicationBase, ref wavFiles);
|
||||||
|
string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : "";
|
||||||
|
string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : "";
|
||||||
|
string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : "";
|
||||||
|
string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : "";
|
||||||
|
string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : "";
|
||||||
|
string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : "";
|
||||||
|
string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : "";
|
||||||
|
string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : "";
|
||||||
|
string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : "";
|
||||||
|
|
||||||
|
OfflineTransducer offlineTransducer = new OfflineTransducer();
|
||||||
|
offlineTransducer.EncoderFilename = encoder;
|
||||||
|
offlineTransducer.DecoderFilename = decoder;
|
||||||
|
offlineTransducer.JoinerFilename = joiner;
|
||||||
|
|
||||||
|
OfflineParaformer offlineParaformer = new OfflineParaformer();
|
||||||
|
offlineParaformer.Model = paraformer;
|
||||||
|
|
||||||
|
OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc();
|
||||||
|
offlineNemoEncDecCtc.Model = nemo_ctc;
|
||||||
|
|
||||||
|
int numThreads = 0;
|
||||||
|
int.TryParse(num_threads, out numThreads);
|
||||||
|
bool isDebug = false;
|
||||||
|
bool.TryParse(debug, out isDebug);
|
||||||
|
|
||||||
|
string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method;
|
||||||
|
|
||||||
|
if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))
|
||||||
|
&& string.IsNullOrEmpty(paraformer)
|
||||||
|
&& string.IsNullOrEmpty(nemo_ctc))
|
||||||
|
{
|
||||||
|
Console.WriteLine("Please specify at least one model");
|
||||||
|
Console.WriteLine(usage);
|
||||||
|
}
|
||||||
|
// batch decode
|
||||||
|
TimeSpan total_duration = TimeSpan.Zero;
|
||||||
|
TimeSpan start_time = TimeSpan.Zero;
|
||||||
|
TimeSpan end_time = TimeSpan.Zero;
|
||||||
|
List<OfflineRecognizerResultEntity> results = new List<OfflineRecognizerResultEntity>();
|
||||||
|
if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)))
|
||||||
|
{
|
||||||
|
OfflineRecognizer<OfflineTransducer> offlineRecognizer = new OfflineRecognizer<OfflineTransducer>(
|
||||||
|
offlineTransducer,
|
||||||
|
tokens,
|
||||||
|
num_threads: numThreads,
|
||||||
|
debug: isDebug,
|
||||||
|
decoding_method: decodingMethod);
|
||||||
|
List<float[]> samplesList = new List<float[]>();
|
||||||
|
foreach (string wavFile in wavFiles)
|
||||||
|
{
|
||||||
|
TimeSpan duration = TimeSpan.Zero;
|
||||||
|
float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration);
|
||||||
|
samplesList.Add(samples);
|
||||||
|
total_duration += duration;
|
||||||
|
}
|
||||||
|
OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList);
|
||||||
|
start_time = new TimeSpan(DateTime.Now.Ticks);
|
||||||
|
offlineRecognizer.DecodeMultipleOfflineStreams(streams);
|
||||||
|
results = offlineRecognizer.GetResults(streams);
|
||||||
|
end_time = new TimeSpan(DateTime.Now.Ticks);
|
||||||
|
}
|
||||||
|
else if (!string.IsNullOrEmpty(paraformer))
|
||||||
|
{
|
||||||
|
OfflineRecognizer<OfflineParaformer> offlineRecognizer = new OfflineRecognizer<OfflineParaformer>(
|
||||||
|
offlineParaformer,
|
||||||
|
tokens,
|
||||||
|
num_threads: numThreads,
|
||||||
|
debug: isDebug,
|
||||||
|
decoding_method: decodingMethod);
|
||||||
|
List<float[]> samplesList = new List<float[]>();
|
||||||
|
foreach (string wavFile in wavFiles)
|
||||||
|
{
|
||||||
|
TimeSpan duration = TimeSpan.Zero;
|
||||||
|
float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration);
|
||||||
|
samplesList.Add(samples);
|
||||||
|
total_duration += duration;
|
||||||
|
}
|
||||||
|
OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList);
|
||||||
|
start_time = new TimeSpan(DateTime.Now.Ticks);
|
||||||
|
offlineRecognizer.DecodeMultipleOfflineStreams(streams);
|
||||||
|
results = offlineRecognizer.GetResults(streams);
|
||||||
|
end_time = new TimeSpan(DateTime.Now.Ticks);
|
||||||
|
}
|
||||||
|
else if (!string.IsNullOrEmpty(nemo_ctc))
|
||||||
|
{
|
||||||
|
OfflineRecognizer<OfflineNemoEncDecCtc> offlineRecognizer = new OfflineRecognizer<OfflineNemoEncDecCtc>(
|
||||||
|
offlineNemoEncDecCtc,
|
||||||
|
tokens,
|
||||||
|
num_threads: numThreads,
|
||||||
|
debug: isDebug,
|
||||||
|
decoding_method: decodingMethod);
|
||||||
|
List<float[]> samplesList = new List<float[]>();
|
||||||
|
foreach (string wavFile in wavFiles)
|
||||||
|
{
|
||||||
|
TimeSpan duration = TimeSpan.Zero;
|
||||||
|
float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration);
|
||||||
|
samplesList.Add(samples);
|
||||||
|
total_duration += duration;
|
||||||
|
}
|
||||||
|
OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList);
|
||||||
|
start_time = new TimeSpan(DateTime.Now.Ticks);
|
||||||
|
offlineRecognizer.DecodeMultipleOfflineStreams(streams);
|
||||||
|
results = offlineRecognizer.GetResults(streams);
|
||||||
|
end_time = new TimeSpan(DateTime.Now.Ticks);
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach (var item in results.Zip<OfflineRecognizerResultEntity, string>(wavFiles))
|
||||||
|
{
|
||||||
|
Console.WriteLine("wavFile:{0}", item.Second);
|
||||||
|
Console.WriteLine("text:{0}", item.First.text.ToLower());
|
||||||
|
Console.WriteLine("text_len:{0}\n", item.First.text_len.ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds;
|
||||||
|
double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds;
|
||||||
|
Console.WriteLine("num_threads:{0}", num_threads);
|
||||||
|
Console.WriteLine("decoding_method:{0}", decodingMethod);
|
||||||
|
Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString());
|
||||||
|
Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString());
|
||||||
|
Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString());
|
||||||
|
|
||||||
|
Console.WriteLine("End!");
|
||||||
|
}
|
||||||
|
|
||||||
|
static Dictionary<string, string> GetDict(string[] args, string applicationBase, ref List<string> wavFiles)
|
||||||
|
{
|
||||||
|
Dictionary<string, string> argsDict = new Dictionary<string, string>();
|
||||||
|
foreach (string input in args)
|
||||||
|
{
|
||||||
|
string[] ss = input.Split("=");
|
||||||
|
if (ss.Length == 1)
|
||||||
|
{
|
||||||
|
if (!string.IsNullOrEmpty(ss[0]))
|
||||||
|
{
|
||||||
|
wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' })));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' }));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return argsDict;
|
||||||
|
}
|
||||||
|
}
|
||||||
171
csharp-api-examples/OnlineDecodeFile.cs
Normal file
171
csharp-api-examples/OnlineDecodeFile.cs
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
// See https://aka.ms/new-console-template for more information
|
||||||
|
// Copyright (c) 2023 by manyeyes
|
||||||
|
using SherpaOnnx;
|
||||||
|
/// Please refer to
|
||||||
|
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||||
|
/// to download pre-trained models. That is, you can find encoder-xxx.onnx
|
||||||
|
/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct
|
||||||
|
/// from there.
|
||||||
|
|
||||||
|
/// download model eg:
|
||||||
|
/// (The directory where the application runs)
|
||||||
|
/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory
|
||||||
|
/// cd /path/to
|
||||||
|
/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
|
||||||
|
|
||||||
|
/// NuGet for sherpa-onnx
|
||||||
|
/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx
|
||||||
|
/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx
|
||||||
|
|
||||||
|
// transducer Usage:
|
||||||
|
/*
|
||||||
|
.\SherpaOnnx.Examples.exe `
|
||||||
|
--tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt `
|
||||||
|
--encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx `
|
||||||
|
--decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx `
|
||||||
|
--joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx `
|
||||||
|
--num-threads=2 `
|
||||||
|
--decoding-method=modified_beam_search `
|
||||||
|
--debug=false `
|
||||||
|
./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav
|
||||||
|
*/
|
||||||
|
|
||||||
|
internal class OnlineDecodeFile
|
||||||
|
{
|
||||||
|
static void Main(string[] args)
|
||||||
|
{
|
||||||
|
string usage = @"
|
||||||
|
-----------------------------
|
||||||
|
transducer Usage:
|
||||||
|
--tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt `
|
||||||
|
--encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx `
|
||||||
|
--decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx `
|
||||||
|
--joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx `
|
||||||
|
--num-threads=2 `
|
||||||
|
--decoding-method=modified_beam_search `
|
||||||
|
--debug=false `
|
||||||
|
./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav
|
||||||
|
-----------------------------
|
||||||
|
";
|
||||||
|
if (args.Length == 0)
|
||||||
|
{
|
||||||
|
System.Console.WriteLine("Please enter the correct parameters:");
|
||||||
|
System.Console.WriteLine(usage);
|
||||||
|
System.Text.StringBuilder sb = new System.Text.StringBuilder();
|
||||||
|
//args = Console.ReadLine().Split(" ");
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
string input = Console.ReadLine();
|
||||||
|
sb.AppendLine(input);
|
||||||
|
if (Console.ReadKey().Key == ConsoleKey.Enter)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
args = sb.ToString().Split("\r\n");
|
||||||
|
}
|
||||||
|
Console.WriteLine("Started!\n");
|
||||||
|
string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory;
|
||||||
|
List<string> wavFiles = new List<string>();
|
||||||
|
Dictionary<string, string> argsDict = GetDict(args, applicationBase, ref wavFiles);
|
||||||
|
string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : "";
|
||||||
|
string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : "";
|
||||||
|
string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : "";
|
||||||
|
string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : "";
|
||||||
|
string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : "";
|
||||||
|
string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : "";
|
||||||
|
string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : "";
|
||||||
|
string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : "";
|
||||||
|
string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : "";
|
||||||
|
|
||||||
|
OfflineTransducer offlineTransducer = new OfflineTransducer();
|
||||||
|
offlineTransducer.EncoderFilename = encoder;
|
||||||
|
offlineTransducer.DecoderFilename = decoder;
|
||||||
|
offlineTransducer.JoinerFilename = joiner;
|
||||||
|
|
||||||
|
OfflineParaformer offlineParaformer = new OfflineParaformer();
|
||||||
|
offlineParaformer.Model = paraformer;
|
||||||
|
|
||||||
|
OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc();
|
||||||
|
offlineNemoEncDecCtc.Model = nemo_ctc;
|
||||||
|
|
||||||
|
int numThreads = 0;
|
||||||
|
int.TryParse(num_threads, out numThreads);
|
||||||
|
bool isDebug = false;
|
||||||
|
bool.TryParse(debug, out isDebug);
|
||||||
|
|
||||||
|
string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method;
|
||||||
|
|
||||||
|
if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))
|
||||||
|
&& string.IsNullOrEmpty(paraformer)
|
||||||
|
&& string.IsNullOrEmpty(nemo_ctc))
|
||||||
|
{
|
||||||
|
Console.WriteLine("Please specify at least one model");
|
||||||
|
Console.WriteLine(usage);
|
||||||
|
}
|
||||||
|
// batch decode
|
||||||
|
TimeSpan total_duration = TimeSpan.Zero;
|
||||||
|
TimeSpan start_time = TimeSpan.Zero;
|
||||||
|
TimeSpan end_time = TimeSpan.Zero;
|
||||||
|
List<OfflineRecognizerResultEntity> results = new List<OfflineRecognizerResultEntity>();
|
||||||
|
if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)))
|
||||||
|
{
|
||||||
|
OnlineTransducer onlineTransducer = new OnlineTransducer();
|
||||||
|
onlineTransducer.EncoderFilename = encoder;
|
||||||
|
onlineTransducer.DecoderFilename = decoder;
|
||||||
|
onlineTransducer.JoinerFilename = joiner;
|
||||||
|
//test online
|
||||||
|
OnlineRecognizer<OnlineTransducer> onlineRecognizer = new OnlineRecognizer<OnlineTransducer>(
|
||||||
|
onlineTransducer,
|
||||||
|
tokens,
|
||||||
|
num_threads: numThreads,
|
||||||
|
debug: isDebug,
|
||||||
|
decoding_method: decodingMethod);
|
||||||
|
foreach (string wavFile in wavFiles)
|
||||||
|
{
|
||||||
|
TimeSpan duration = TimeSpan.Zero;
|
||||||
|
List<float[]> samplesList = AudioHelper.GetChunkSamplesList(wavFile, ref duration);
|
||||||
|
OnlineStream stream = onlineRecognizer.CreateStream();
|
||||||
|
start_time = new TimeSpan(DateTime.Now.Ticks);
|
||||||
|
for (int i = 0; i < samplesList.Count; i++)
|
||||||
|
{
|
||||||
|
onlineRecognizer.AcceptWaveForm(stream, 16000, samplesList[i]);
|
||||||
|
onlineRecognizer.DecodeStream(stream);
|
||||||
|
OnlineRecognizerResultEntity result_on = onlineRecognizer.GetResult(stream);
|
||||||
|
Console.WriteLine(result_on.text);
|
||||||
|
}
|
||||||
|
total_duration += duration;
|
||||||
|
}
|
||||||
|
end_time = new TimeSpan(DateTime.Now.Ticks);
|
||||||
|
}
|
||||||
|
double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds;
|
||||||
|
double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds;
|
||||||
|
Console.WriteLine("num_threads:{0}", num_threads);
|
||||||
|
Console.WriteLine("decoding_method:{0}", decodingMethod);
|
||||||
|
Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString());
|
||||||
|
Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString());
|
||||||
|
Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString());
|
||||||
|
|
||||||
|
Console.WriteLine("End!");
|
||||||
|
}
|
||||||
|
|
||||||
|
static Dictionary<string, string> GetDict(string[] args, string applicationBase, ref List<string> wavFiles)
|
||||||
|
{
|
||||||
|
Dictionary<string, string> argsDict = new Dictionary<string, string>();
|
||||||
|
foreach (string input in args)
|
||||||
|
{
|
||||||
|
string[] ss = input.Split("=");
|
||||||
|
if (ss.Length == 1)
|
||||||
|
{
|
||||||
|
if (!string.IsNullOrEmpty(ss[0]))
|
||||||
|
{
|
||||||
|
wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' })));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' }));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return argsDict;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
221
csharp-api-examples/OnlineDecodeFiles.cs
Normal file
221
csharp-api-examples/OnlineDecodeFiles.cs
Normal file
@@ -0,0 +1,221 @@
|
|||||||
|
// See https://aka.ms/new-console-template for more information
|
||||||
|
// Copyright (c) 2023 by manyeyes
|
||||||
|
using SherpaOnnx;
|
||||||
|
/// Please refer to
|
||||||
|
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||||
|
/// to download pre-trained models. That is, you can find encoder-xxx.onnx
|
||||||
|
/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct
|
||||||
|
/// from there.
|
||||||
|
|
||||||
|
/// download model eg:
|
||||||
|
/// (The directory where the application runs)
|
||||||
|
/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory
|
||||||
|
/// cd /path/to
|
||||||
|
/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
|
||||||
|
|
||||||
|
/// NuGet for sherpa-onnx
|
||||||
|
/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx
|
||||||
|
/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx
|
||||||
|
|
||||||
|
// transducer Usage:
|
||||||
|
/*
|
||||||
|
.\SherpaOnnx.Examples.exe `
|
||||||
|
--tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt `
|
||||||
|
--encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx `
|
||||||
|
--decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx `
|
||||||
|
--joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx `
|
||||||
|
--num-threads=2 `
|
||||||
|
--decoding-method=modified_beam_search `
|
||||||
|
--debug=false `
|
||||||
|
./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav `
|
||||||
|
./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav
|
||||||
|
*/
|
||||||
|
|
||||||
|
internal class OnlineDecodeFiles
|
||||||
|
{
|
||||||
|
static void Main(string[] args)
|
||||||
|
{
|
||||||
|
string usage = @"
|
||||||
|
-----------------------------
|
||||||
|
transducer Usage:
|
||||||
|
--tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt `
|
||||||
|
--encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx `
|
||||||
|
--decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx `
|
||||||
|
--joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx `
|
||||||
|
--num-threads=2 `
|
||||||
|
--decoding-method=modified_beam_search `
|
||||||
|
--debug=false `
|
||||||
|
./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav `
|
||||||
|
./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav
|
||||||
|
-----------------------------
|
||||||
|
";
|
||||||
|
if (args.Length == 0)
|
||||||
|
{
|
||||||
|
System.Console.WriteLine("Please enter the correct parameters:");
|
||||||
|
System.Console.WriteLine(usage);
|
||||||
|
System.Text.StringBuilder sb = new System.Text.StringBuilder();
|
||||||
|
//args = Console.ReadLine().Split(" ");
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
string input = Console.ReadLine();
|
||||||
|
sb.AppendLine(input);
|
||||||
|
if (Console.ReadKey().Key == ConsoleKey.Enter)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
args = sb.ToString().Split("\r\n");
|
||||||
|
}
|
||||||
|
Console.WriteLine("Started!\n");
|
||||||
|
string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory;
|
||||||
|
List<string> wavFiles = new List<string>();
|
||||||
|
Dictionary<string, string> argsDict = GetDict(args, applicationBase, ref wavFiles);
|
||||||
|
string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : "";
|
||||||
|
string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : "";
|
||||||
|
string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : "";
|
||||||
|
string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : "";
|
||||||
|
string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : "";
|
||||||
|
string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : "";
|
||||||
|
string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : "";
|
||||||
|
string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : "";
|
||||||
|
string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : "";
|
||||||
|
|
||||||
|
OfflineTransducer offlineTransducer = new OfflineTransducer();
|
||||||
|
offlineTransducer.EncoderFilename = encoder;
|
||||||
|
offlineTransducer.DecoderFilename = decoder;
|
||||||
|
offlineTransducer.JoinerFilename = joiner;
|
||||||
|
|
||||||
|
OfflineParaformer offlineParaformer = new OfflineParaformer();
|
||||||
|
offlineParaformer.Model = paraformer;
|
||||||
|
|
||||||
|
OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc();
|
||||||
|
offlineNemoEncDecCtc.Model = nemo_ctc;
|
||||||
|
|
||||||
|
int numThreads = 0;
|
||||||
|
int.TryParse(num_threads, out numThreads);
|
||||||
|
bool isDebug = false;
|
||||||
|
bool.TryParse(debug, out isDebug);
|
||||||
|
|
||||||
|
string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method;
|
||||||
|
|
||||||
|
if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))
|
||||||
|
&& string.IsNullOrEmpty(paraformer)
|
||||||
|
&& string.IsNullOrEmpty(nemo_ctc))
|
||||||
|
{
|
||||||
|
Console.WriteLine("Please specify at least one model");
|
||||||
|
Console.WriteLine(usage);
|
||||||
|
}
|
||||||
|
// batch decode
|
||||||
|
TimeSpan total_duration = TimeSpan.Zero;
|
||||||
|
TimeSpan start_time = TimeSpan.Zero;
|
||||||
|
TimeSpan end_time = TimeSpan.Zero;
|
||||||
|
List<OnlineRecognizerResultEntity> results = new List<OnlineRecognizerResultEntity>();
|
||||||
|
if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)))
|
||||||
|
{
|
||||||
|
OnlineTransducer onlineTransducer = new OnlineTransducer();
|
||||||
|
onlineTransducer.EncoderFilename = encoder;
|
||||||
|
onlineTransducer.DecoderFilename = decoder;
|
||||||
|
onlineTransducer.JoinerFilename = joiner;
|
||||||
|
//test online
|
||||||
|
OnlineRecognizer<OnlineTransducer> onlineRecognizer = new OnlineRecognizer<OnlineTransducer>(
|
||||||
|
onlineTransducer,
|
||||||
|
tokens,
|
||||||
|
num_threads: numThreads,
|
||||||
|
debug: isDebug,
|
||||||
|
decoding_method: decodingMethod);
|
||||||
|
List<float[]> samplesList = new List<float[]>();
|
||||||
|
foreach (string wavFile in wavFiles)
|
||||||
|
{
|
||||||
|
TimeSpan duration = TimeSpan.Zero;
|
||||||
|
float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration);
|
||||||
|
samplesList.Add(samples);
|
||||||
|
total_duration += duration;
|
||||||
|
}
|
||||||
|
start_time = new TimeSpan(DateTime.Now.Ticks);
|
||||||
|
List<OnlineStream> streams = new List<OnlineStream>();
|
||||||
|
foreach (float[] samples in samplesList)
|
||||||
|
{
|
||||||
|
OnlineStream stream = onlineRecognizer.CreateStream();
|
||||||
|
onlineRecognizer.AcceptWaveForm(stream, 16000, samples);
|
||||||
|
streams.Add(stream);
|
||||||
|
onlineRecognizer.InputFinished(stream);
|
||||||
|
}
|
||||||
|
onlineRecognizer.DecodeMultipleStreams(streams);
|
||||||
|
results = onlineRecognizer.GetResults(streams);
|
||||||
|
foreach (OnlineRecognizerResultEntity result in results)
|
||||||
|
{
|
||||||
|
Console.WriteLine(result.text);
|
||||||
|
}
|
||||||
|
end_time = new TimeSpan(DateTime.Now.Ticks);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
foreach (var item in results.Zip<OnlineRecognizerResultEntity, string>(wavFiles))
|
||||||
|
{
|
||||||
|
Console.WriteLine("wavFile:{0}", item.Second);
|
||||||
|
Console.WriteLine("text:{0}", item.First.text.ToLower());
|
||||||
|
Console.WriteLine("text_len:{0}\n", item.First.text_len.ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds;
|
||||||
|
double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds;
|
||||||
|
Console.WriteLine("num_threads:{0}", num_threads);
|
||||||
|
Console.WriteLine("decoding_method:{0}", decodingMethod);
|
||||||
|
Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString());
|
||||||
|
Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString());
|
||||||
|
Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString());
|
||||||
|
|
||||||
|
Console.WriteLine("End!");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void AnotherWayOfDecodeFiles(string encoder, string decoder, string joiner, string tokens, int numThreads, bool isDebug, string decodingMethod, List<string> wavFiles, ref TimeSpan total_duration)
|
||||||
|
{
|
||||||
|
OnlineTransducer onlineTransducer = new OnlineTransducer();
|
||||||
|
onlineTransducer.EncoderFilename = encoder;
|
||||||
|
onlineTransducer.DecoderFilename = decoder;
|
||||||
|
onlineTransducer.JoinerFilename = joiner;
|
||||||
|
//test online
|
||||||
|
OnlineRecognizer<OnlineTransducer> onlineRecognizer = new OnlineRecognizer<OnlineTransducer>(
|
||||||
|
onlineTransducer,
|
||||||
|
tokens,
|
||||||
|
num_threads: numThreads,
|
||||||
|
debug: isDebug,
|
||||||
|
decoding_method: decodingMethod);
|
||||||
|
List<float[]> samplesList = new List<float[]>();
|
||||||
|
foreach (string wavFile in wavFiles)
|
||||||
|
{
|
||||||
|
TimeSpan duration = TimeSpan.Zero;
|
||||||
|
float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration);
|
||||||
|
samplesList.Add(samples);
|
||||||
|
total_duration += duration;
|
||||||
|
}
|
||||||
|
TimeSpan start_time = new TimeSpan(DateTime.Now.Ticks);
|
||||||
|
List<OnlineStream> streams = onlineRecognizer.CreateStreams(samplesList);
|
||||||
|
onlineRecognizer.DecodeMultipleStreams(streams);
|
||||||
|
List<OnlineRecognizerResultEntity> results = onlineRecognizer.GetResults(streams);
|
||||||
|
foreach (OnlineRecognizerResultEntity result in results)
|
||||||
|
{
|
||||||
|
Console.WriteLine(result.text);
|
||||||
|
}
|
||||||
|
TimeSpan end_time = new TimeSpan(DateTime.Now.Ticks);
|
||||||
|
}
|
||||||
|
|
||||||
|
static Dictionary<string, string> GetDict(string[] args, string applicationBase, ref List<string> wavFiles)
|
||||||
|
{
|
||||||
|
Dictionary<string, string> argsDict = new Dictionary<string, string>();
|
||||||
|
foreach (string input in args)
|
||||||
|
{
|
||||||
|
string[] ss = input.Split("=");
|
||||||
|
if (ss.Length == 1)
|
||||||
|
{
|
||||||
|
if (!string.IsNullOrEmpty(ss[0]))
|
||||||
|
{
|
||||||
|
wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' })));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' }));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return argsDict;
|
||||||
|
}
|
||||||
|
}
|
||||||
9
csharp-api-examples/README.md
Normal file
9
csharp-api-examples/README.md
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
#ProjectReference csharp-api
|
||||||
|
`<ProjectReference Include="..\SherpaOnnx\SherpaOnnx.csproj" />`
|
||||||
|
The location of the 'SherpaOnnx' file is ../sherpa-onnx/csharp-api.
|
||||||
|
This C # API is cross platform and you can compile it yourself in Windows, Mac OS, and Linux environments.
|
||||||
|
|
||||||
|
------------
|
||||||
|
Alternatively, install sherpaonnx through nuget.
|
||||||
|
#NuGet for sherpa-onnx
|
||||||
|
PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx
|
||||||
67
csharp-api-examples/Utils/AudioHelper.cs
Normal file
67
csharp-api-examples/Utils/AudioHelper.cs
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
using NAudio.Wave;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Diagnostics;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// audio processing
|
||||||
|
/// Copyright (c) 2023 by manyeyes
|
||||||
|
/// </summary>
|
||||||
|
public class AudioHelper
|
||||||
|
{
|
||||||
|
public static float[] GetFileSamples(string wavFilePath, ref TimeSpan duration)
|
||||||
|
{
|
||||||
|
if (!File.Exists(wavFilePath))
|
||||||
|
{
|
||||||
|
Trace.Assert(File.Exists(wavFilePath), "file does not exist:" + wavFilePath);
|
||||||
|
return new float[1];
|
||||||
|
}
|
||||||
|
AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath);
|
||||||
|
byte[] datas = new byte[_audioFileReader.Length];
|
||||||
|
_audioFileReader.Read(datas, 0, datas.Length);
|
||||||
|
duration = _audioFileReader.TotalTime;
|
||||||
|
float[] wavdata = new float[datas.Length / sizeof(float)];
|
||||||
|
Buffer.BlockCopy(datas, 0, wavdata, 0, datas.Length);
|
||||||
|
return wavdata;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<float[]> GetChunkSamplesList(string wavFilePath, ref TimeSpan duration)
|
||||||
|
{
|
||||||
|
List<float[]> wavdatas = new List<float[]>();
|
||||||
|
if (!File.Exists(wavFilePath))
|
||||||
|
{
|
||||||
|
Trace.Assert(File.Exists(wavFilePath), "file does not exist:" + wavFilePath);
|
||||||
|
wavdatas.Add(new float[1]);
|
||||||
|
return wavdatas;
|
||||||
|
}
|
||||||
|
AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath);
|
||||||
|
byte[] datas = new byte[_audioFileReader.Length];
|
||||||
|
int chunkSize = 16000;// datas.Length / sizeof(float);
|
||||||
|
int chunkNum = (int)Math.Ceiling((double)datas.Length / chunkSize);
|
||||||
|
for (int i = 0; i < chunkNum; i++)
|
||||||
|
{
|
||||||
|
int offset = 0;
|
||||||
|
int dataCount = 0;
|
||||||
|
if (Math.Abs(datas.Length - i * chunkSize) > chunkSize)
|
||||||
|
{
|
||||||
|
offset = i * chunkSize;
|
||||||
|
dataCount = chunkSize;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
offset = i * chunkSize;
|
||||||
|
dataCount = datas.Length - i * chunkSize;
|
||||||
|
}
|
||||||
|
_audioFileReader.Read(datas, offset, dataCount);
|
||||||
|
duration += _audioFileReader.TotalTime;
|
||||||
|
float[] wavdata = new float[chunkSize / sizeof(float)];
|
||||||
|
Buffer.BlockCopy(datas, offset, wavdata, 0, dataCount);
|
||||||
|
wavdatas.Add(wavdata);
|
||||||
|
|
||||||
|
}
|
||||||
|
return wavdatas;
|
||||||
|
}
|
||||||
|
}
|
||||||
20
csharp-api-examples/sherpa-onnx.csproj
Normal file
20
csharp-api-examples/sherpa-onnx.csproj
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<OutputType>Exe</OutputType>
|
||||||
|
<TargetFramework>net6.0</TargetFramework>
|
||||||
|
<RootNamespace>sherpa_onnx</RootNamespace>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
<StartupObject>OnlineDecodeFiles</StartupObject>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="NAudio" Version="2.1.0" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="..\SherpaOnnx\SherpaOnnx.csproj" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
872
sherpa-onnx/csharp-api/SherpaOnnx.cs
Normal file
872
sherpa-onnx/csharp-api/SherpaOnnx.cs
Normal file
@@ -0,0 +1,872 @@
|
|||||||
|
using System.Runtime.InteropServices;
|
||||||
|
using System.Diagnostics;
|
||||||
|
|
||||||
|
namespace SherpaOnnx
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// online recognizer package
|
||||||
|
/// Copyright (c) 2023 by manyeyes
|
||||||
|
/// </summary>
|
||||||
|
public class OnlineBase : IDisposable
|
||||||
|
{
|
||||||
|
public void Dispose()
|
||||||
|
{
|
||||||
|
Dispose(disposing: true);
|
||||||
|
GC.SuppressFinalize(this);
|
||||||
|
}
|
||||||
|
protected virtual void Dispose(bool disposing)
|
||||||
|
{
|
||||||
|
if (!disposing)
|
||||||
|
{
|
||||||
|
if (_onlineRecognizerResult != IntPtr.Zero)
|
||||||
|
{
|
||||||
|
SherpaOnnxSharp.DestroyOnlineRecognizerResult(_onlineRecognizerResult);
|
||||||
|
_onlineRecognizerResult = IntPtr.Zero;
|
||||||
|
}
|
||||||
|
if (_onlineStream.impl != IntPtr.Zero)
|
||||||
|
{
|
||||||
|
SherpaOnnxSharp.DestroyOnlineStream(_onlineStream);
|
||||||
|
_onlineStream.impl = IntPtr.Zero;
|
||||||
|
}
|
||||||
|
if (_onlineRecognizer.impl != IntPtr.Zero)
|
||||||
|
{
|
||||||
|
SherpaOnnxSharp.DestroyOnlineRecognizer(_onlineRecognizer);
|
||||||
|
_onlineRecognizer.impl = IntPtr.Zero;
|
||||||
|
}
|
||||||
|
this._disposed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
~OnlineBase()
|
||||||
|
{
|
||||||
|
Dispose(this._disposed);
|
||||||
|
}
|
||||||
|
internal SherpaOnnxOnlineStream _onlineStream;
|
||||||
|
internal IntPtr _onlineRecognizerResult;
|
||||||
|
internal SherpaOnnxOnlineRecognizer _onlineRecognizer;
|
||||||
|
internal bool _disposed = false;
|
||||||
|
}
|
||||||
|
public class OnlineStream : OnlineBase
|
||||||
|
{
|
||||||
|
internal OnlineStream(SherpaOnnxOnlineStream onlineStream)
|
||||||
|
{
|
||||||
|
this._onlineStream = onlineStream;
|
||||||
|
}
|
||||||
|
protected override void Dispose(bool disposing)
|
||||||
|
{
|
||||||
|
if (!disposing)
|
||||||
|
{
|
||||||
|
SherpaOnnxSharp.DestroyOnlineStream(_onlineStream);
|
||||||
|
_onlineStream.impl = IntPtr.Zero;
|
||||||
|
this._disposed = true;
|
||||||
|
base.Dispose();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public class OnlineRecognizerResult : OnlineBase
|
||||||
|
{
|
||||||
|
internal OnlineRecognizerResult(IntPtr onlineRecognizerResult)
|
||||||
|
{
|
||||||
|
this._onlineRecognizerResult = onlineRecognizerResult;
|
||||||
|
}
|
||||||
|
protected override void Dispose(bool disposing)
|
||||||
|
{
|
||||||
|
if (!disposing)
|
||||||
|
{
|
||||||
|
SherpaOnnxSharp.DestroyOnlineRecognizerResult(_onlineRecognizerResult);
|
||||||
|
_onlineRecognizerResult = IntPtr.Zero;
|
||||||
|
this._disposed = true;
|
||||||
|
base.Dispose(disposing);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public class OnlineRecognizer<T> : OnlineBase
|
||||||
|
where T : class, new()
|
||||||
|
{
|
||||||
|
|
||||||
|
public OnlineRecognizer(T t,
|
||||||
|
string tokensFilePath, string decoding_method = "greedy_search",
|
||||||
|
int sample_rate = 16000, int feature_dim = 80,
|
||||||
|
int num_threads = 2, bool debug = false, int max_active_paths = 4,
|
||||||
|
int enable_endpoint=0,int rule1_min_trailing_silence=0,
|
||||||
|
int rule2_min_trailing_silence=0,int rule3_min_utterance_length=0)
|
||||||
|
{
|
||||||
|
SherpaOnnxOnlineTransducer transducer = new SherpaOnnxOnlineTransducer();
|
||||||
|
SherpaOnnxOnlineModelConfig model_config = new SherpaOnnxOnlineModelConfig();
|
||||||
|
if (t is not null && t.GetType() == typeof(OnlineTransducer))
|
||||||
|
{
|
||||||
|
OnlineTransducer? onlineTransducer = t as OnlineTransducer;
|
||||||
|
#pragma warning disable CS8602 // 解引用可能出现空引用。
|
||||||
|
Trace.Assert(File.Exists(onlineTransducer.DecoderFilename)
|
||||||
|
&& File.Exists(onlineTransducer.EncoderFilename)
|
||||||
|
&& File.Exists(onlineTransducer.JoinerFilename), "Please provide a model");
|
||||||
|
#pragma warning restore CS8602 // 解引用可能出现空引用。
|
||||||
|
Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens");
|
||||||
|
Trace.Assert(num_threads > 0, "num_threads must be greater than 0");
|
||||||
|
transducer.encoder_filename = onlineTransducer.EncoderFilename;
|
||||||
|
transducer.decoder_filename = onlineTransducer.DecoderFilename;
|
||||||
|
transducer.joiner_filename = onlineTransducer.JoinerFilename;
|
||||||
|
}
|
||||||
|
|
||||||
|
model_config.transducer = transducer;
|
||||||
|
model_config.num_threads = num_threads;
|
||||||
|
model_config.debug = debug;
|
||||||
|
model_config.tokens = tokensFilePath;
|
||||||
|
|
||||||
|
SherpaOnnxFeatureConfig feat_config = new SherpaOnnxFeatureConfig();
|
||||||
|
feat_config.sample_rate = sample_rate;
|
||||||
|
feat_config.feature_dim = feature_dim;
|
||||||
|
|
||||||
|
SherpaOnnxOnlineRecognizerConfig sherpaOnnxOnlineRecognizerConfig;
|
||||||
|
sherpaOnnxOnlineRecognizerConfig.decoding_method = decoding_method;
|
||||||
|
sherpaOnnxOnlineRecognizerConfig.feat_config = feat_config;
|
||||||
|
sherpaOnnxOnlineRecognizerConfig.model_config = model_config;
|
||||||
|
sherpaOnnxOnlineRecognizerConfig.max_active_paths = max_active_paths;
|
||||||
|
//endpoint
|
||||||
|
sherpaOnnxOnlineRecognizerConfig.enable_endpoint = enable_endpoint;
|
||||||
|
sherpaOnnxOnlineRecognizerConfig.rule1_min_trailing_silence = rule1_min_trailing_silence;
|
||||||
|
sherpaOnnxOnlineRecognizerConfig.rule2_min_trailing_silence = rule2_min_trailing_silence;
|
||||||
|
sherpaOnnxOnlineRecognizerConfig.rule3_min_utterance_length = rule3_min_utterance_length;
|
||||||
|
|
||||||
|
_onlineRecognizer =
|
||||||
|
SherpaOnnxSharp.CreateOnlineRecognizer(sherpaOnnxOnlineRecognizerConfig);
|
||||||
|
}
|
||||||
|
internal OnlineStream CreateOnlineStream()
|
||||||
|
{
|
||||||
|
SherpaOnnxOnlineStream stream = SherpaOnnxSharp.CreateOnlineStream(_onlineRecognizer);
|
||||||
|
return new OnlineStream(stream);
|
||||||
|
}
|
||||||
|
public void InputFinished(OnlineStream stream)
|
||||||
|
{
|
||||||
|
SherpaOnnxSharp.InputFinished(stream._onlineStream);
|
||||||
|
}
|
||||||
|
public List<OnlineStream> CreateStreams(List<float[]> samplesList)
|
||||||
|
{
|
||||||
|
int batch_size = samplesList.Count;
|
||||||
|
List<OnlineStream> streams = new List<OnlineStream>();
|
||||||
|
for (int i = 0; i < batch_size; i++)
|
||||||
|
{
|
||||||
|
OnlineStream stream = CreateOnlineStream();
|
||||||
|
AcceptWaveform(stream._onlineStream, 16000, samplesList[i]);
|
||||||
|
InputFinished(stream);
|
||||||
|
streams.Add(stream);
|
||||||
|
}
|
||||||
|
return streams;
|
||||||
|
}
|
||||||
|
public OnlineStream CreateStream()
|
||||||
|
{
|
||||||
|
OnlineStream stream = CreateOnlineStream();
|
||||||
|
return stream;
|
||||||
|
}
|
||||||
|
internal void AcceptWaveform(SherpaOnnxOnlineStream stream, int sample_rate, float[] samples)
|
||||||
|
{
|
||||||
|
SherpaOnnxSharp.AcceptOnlineWaveform(stream, sample_rate, samples, samples.Length);
|
||||||
|
}
|
||||||
|
public void AcceptWaveForm(OnlineStream stream, int sample_rate, float[] samples)
|
||||||
|
{
|
||||||
|
AcceptWaveform(stream._onlineStream, sample_rate, samples);
|
||||||
|
}
|
||||||
|
internal IntPtr GetStreamsIntPtr(OnlineStream[] streams)
|
||||||
|
{
|
||||||
|
int streams_len = streams.Length;
|
||||||
|
int size = Marshal.SizeOf(typeof(SherpaOnnxOnlineStream));
|
||||||
|
IntPtr streamsIntPtr = Marshal.AllocHGlobal(size * streams_len);
|
||||||
|
unsafe
|
||||||
|
{
|
||||||
|
byte* ptrbds = (byte*)(streamsIntPtr.ToPointer());
|
||||||
|
for (int i = 0; i < streams_len; i++, ptrbds += (size))
|
||||||
|
{
|
||||||
|
IntPtr streamIntptr = new IntPtr(ptrbds);
|
||||||
|
Marshal.StructureToPtr(streams[i]._onlineStream, streamIntptr, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return streamsIntPtr;
|
||||||
|
}
|
||||||
|
internal bool IsReady(OnlineStream stream)
|
||||||
|
{
|
||||||
|
return SherpaOnnxSharp.IsOnlineStreamReady(_onlineRecognizer, stream._onlineStream) != 0;
|
||||||
|
}
|
||||||
|
public void DecodeMultipleStreams(List<OnlineStream> streams)
|
||||||
|
{
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
List<OnlineStream> streamList = new List<OnlineStream>();
|
||||||
|
foreach (OnlineStream stream in streams)
|
||||||
|
{
|
||||||
|
if (IsReady(stream))
|
||||||
|
{
|
||||||
|
streamList.Add(stream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (streamList.Count == 0)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
OnlineStream[] streamsBatch = new OnlineStream[streamList.Count];
|
||||||
|
for (int i = 0; i < streamsBatch.Length; i++)
|
||||||
|
{
|
||||||
|
streamsBatch[i] = streamList[i];
|
||||||
|
}
|
||||||
|
streamList.Clear();
|
||||||
|
IntPtr streamsIntPtr = GetStreamsIntPtr(streamsBatch);
|
||||||
|
SherpaOnnxSharp.DecodeMultipleOnlineStreams(_onlineRecognizer, streamsIntPtr, streamsBatch.Length);
|
||||||
|
Marshal.FreeHGlobal(streamsIntPtr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public void DecodeStream(OnlineStream stream)
|
||||||
|
{
|
||||||
|
while (IsReady(stream))
|
||||||
|
{
|
||||||
|
SherpaOnnxSharp.DecodeOnlineStream(_onlineRecognizer, stream._onlineStream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
internal OnlineRecognizerResultEntity GetResult(SherpaOnnxOnlineStream stream)
|
||||||
|
{
|
||||||
|
IntPtr result_ip = SherpaOnnxSharp.GetOnlineStreamResult(_onlineRecognizer, stream);
|
||||||
|
OnlineRecognizerResult onlineRecognizerResult = new OnlineRecognizerResult(result_ip);
|
||||||
|
#pragma warning disable CS8605 // 取消装箱可能为 null 的值。
|
||||||
|
SherpaOnnxOnlineRecognizerResult result =
|
||||||
|
(SherpaOnnxOnlineRecognizerResult)Marshal.PtrToStructure(
|
||||||
|
onlineRecognizerResult._onlineRecognizerResult, typeof(SherpaOnnxOnlineRecognizerResult));
|
||||||
|
#pragma warning restore CS8605 // 取消装箱可能为 null 的值。
|
||||||
|
|
||||||
|
#pragma warning disable CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。
|
||||||
|
string text = Marshal.PtrToStringAnsi(result.text);
|
||||||
|
#pragma warning restore CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。
|
||||||
|
OnlineRecognizerResultEntity onlineRecognizerResultEntity =
|
||||||
|
new OnlineRecognizerResultEntity();
|
||||||
|
onlineRecognizerResultEntity.text = text;
|
||||||
|
onlineRecognizerResultEntity.text_len = result.text_len;
|
||||||
|
|
||||||
|
return onlineRecognizerResultEntity;
|
||||||
|
}
|
||||||
|
public OnlineRecognizerResultEntity GetResult(OnlineStream stream)
|
||||||
|
{
|
||||||
|
OnlineRecognizerResultEntity result = GetResult(stream._onlineStream);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
public List<OnlineRecognizerResultEntity> GetResults(List<OnlineStream> streams)
|
||||||
|
{
|
||||||
|
List<OnlineRecognizerResultEntity> results = new List<OnlineRecognizerResultEntity>();
|
||||||
|
foreach (OnlineStream stream in streams)
|
||||||
|
{
|
||||||
|
OnlineRecognizerResultEntity onlineRecognizerResultEntity = GetResult(stream._onlineStream);
|
||||||
|
results.Add(onlineRecognizerResultEntity);
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
protected override void Dispose(bool disposing)
|
||||||
|
{
|
||||||
|
if (!disposing)
|
||||||
|
{
|
||||||
|
SherpaOnnxSharp.DestroyOnlineRecognizer(_onlineRecognizer);
|
||||||
|
_onlineRecognizer.impl = IntPtr.Zero;
|
||||||
|
this._disposed = true;
|
||||||
|
base.Dispose();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public class OfflineBase : IDisposable
|
||||||
|
{
|
||||||
|
public void Dispose()
|
||||||
|
{
|
||||||
|
Dispose(disposing: true);
|
||||||
|
GC.SuppressFinalize(this);
|
||||||
|
}
|
||||||
|
protected virtual void Dispose(bool disposing)
|
||||||
|
{
|
||||||
|
if (!disposing)
|
||||||
|
{
|
||||||
|
if (_offlineRecognizerResult != IntPtr.Zero)
|
||||||
|
{
|
||||||
|
SherpaOnnxSharp.DestroyOfflineRecognizerResult(_offlineRecognizerResult);
|
||||||
|
_offlineRecognizerResult = IntPtr.Zero;
|
||||||
|
}
|
||||||
|
if (_offlineStream.impl != IntPtr.Zero)
|
||||||
|
{
|
||||||
|
SherpaOnnxSharp.DestroyOfflineStream(_offlineStream);
|
||||||
|
_offlineStream.impl = IntPtr.Zero;
|
||||||
|
}
|
||||||
|
if (_offlineRecognizer.impl != IntPtr.Zero)
|
||||||
|
{
|
||||||
|
SherpaOnnxSharp.DestroyOfflineRecognizer(_offlineRecognizer);
|
||||||
|
_offlineRecognizer.impl = IntPtr.Zero;
|
||||||
|
}
|
||||||
|
this._disposed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
~OfflineBase()
|
||||||
|
{
|
||||||
|
Dispose(this._disposed);
|
||||||
|
}
|
||||||
|
internal SherpaOnnxOfflineStream _offlineStream;
|
||||||
|
internal IntPtr _offlineRecognizerResult;
|
||||||
|
internal SherpaOnnxOfflineRecognizer _offlineRecognizer;
|
||||||
|
internal bool _disposed = false;
|
||||||
|
}
|
||||||
|
public class OfflineStream : OfflineBase
|
||||||
|
{
|
||||||
|
internal OfflineStream(SherpaOnnxOfflineStream offlineStream)
|
||||||
|
{
|
||||||
|
this._offlineStream = offlineStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected override void Dispose(bool disposing)
|
||||||
|
{
|
||||||
|
if (!disposing)
|
||||||
|
{
|
||||||
|
SherpaOnnxSharp.DestroyOfflineStream(_offlineStream);
|
||||||
|
_offlineStream.impl = IntPtr.Zero;
|
||||||
|
this._disposed = true;
|
||||||
|
base.Dispose();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public class OfflineRecognizerResult : OfflineBase
|
||||||
|
{
|
||||||
|
internal OfflineRecognizerResult(IntPtr offlineRecognizerResult)
|
||||||
|
{
|
||||||
|
this._offlineRecognizerResult = offlineRecognizerResult;
|
||||||
|
}
|
||||||
|
protected override void Dispose(bool disposing)
|
||||||
|
{
|
||||||
|
if (!disposing)
|
||||||
|
{
|
||||||
|
SherpaOnnxSharp.DestroyOfflineRecognizerResult(_offlineRecognizerResult);
|
||||||
|
_offlineRecognizerResult = IntPtr.Zero;
|
||||||
|
this._disposed = true;
|
||||||
|
base.Dispose(disposing);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public class OfflineRecognizer<T> : OfflineBase
|
||||||
|
where T : class, new()
|
||||||
|
{
|
||||||
|
public OfflineRecognizer(T t,
|
||||||
|
string tokensFilePath, string decoding_method = "greedy_search",
|
||||||
|
int sample_rate = 16000, int feature_dim = 80,
|
||||||
|
int num_threads = 2, bool debug = false)
|
||||||
|
{
|
||||||
|
SherpaOnnxOfflineTransducer transducer = new SherpaOnnxOfflineTransducer();
|
||||||
|
SherpaOnnxOfflineParaformer paraformer = new SherpaOnnxOfflineParaformer();
|
||||||
|
SherpaOnnxOfflineNemoEncDecCtc nemo_ctc = new SherpaOnnxOfflineNemoEncDecCtc();
|
||||||
|
SherpaOnnxOfflineModelConfig model_config = new SherpaOnnxOfflineModelConfig();
|
||||||
|
if (t is not null && t.GetType() == typeof(OfflineTransducer))
|
||||||
|
{
|
||||||
|
OfflineTransducer? offlineTransducer = t as OfflineTransducer;
|
||||||
|
#pragma warning disable CS8602 // 解引用可能出现空引用。
|
||||||
|
Trace.Assert(File.Exists(offlineTransducer.DecoderFilename)
|
||||||
|
&& File.Exists(offlineTransducer.EncoderFilename)
|
||||||
|
&& File.Exists(offlineTransducer.JoinerFilename), "Please provide a model");
|
||||||
|
#pragma warning restore CS8602 // 解引用可能出现空引用。
|
||||||
|
Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens");
|
||||||
|
Trace.Assert(num_threads > 0, "num_threads must be greater than 0");
|
||||||
|
transducer.encoder_filename = offlineTransducer.EncoderFilename;
|
||||||
|
transducer.decoder_filename = offlineTransducer.DecoderFilename;
|
||||||
|
transducer.joiner_filename = offlineTransducer.JoinerFilename;
|
||||||
|
}
|
||||||
|
else if (t is not null && t.GetType() == typeof(OfflineParaformer))
|
||||||
|
{
|
||||||
|
OfflineParaformer? offlineParaformer = t as OfflineParaformer;
|
||||||
|
#pragma warning disable CS8602 // 解引用可能出现空引用。
|
||||||
|
Trace.Assert(File.Exists(offlineParaformer.Model), "Please provide a model");
|
||||||
|
#pragma warning restore CS8602 // 解引用可能出现空引用。
|
||||||
|
Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens");
|
||||||
|
Trace.Assert(num_threads > 0, "num_threads must be greater than 0");
|
||||||
|
paraformer.model = offlineParaformer.Model;
|
||||||
|
}
|
||||||
|
else if (t is not null && t.GetType() == typeof(OfflineNemoEncDecCtc))
|
||||||
|
{
|
||||||
|
OfflineNemoEncDecCtc? offlineNemoEncDecCtc = t as OfflineNemoEncDecCtc;
|
||||||
|
#pragma warning disable CS8602 // 解引用可能出现空引用。
|
||||||
|
Trace.Assert(File.Exists(offlineNemoEncDecCtc.Model), "Please provide a model");
|
||||||
|
#pragma warning restore CS8602 // 解引用可能出现空引用。
|
||||||
|
Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens");
|
||||||
|
Trace.Assert(num_threads > 0, "num_threads must be greater than 0");
|
||||||
|
nemo_ctc.model = offlineNemoEncDecCtc.Model;
|
||||||
|
}
|
||||||
|
|
||||||
|
model_config.transducer = transducer;
|
||||||
|
model_config.paraformer = paraformer;
|
||||||
|
model_config.nemo_ctc = nemo_ctc;
|
||||||
|
model_config.num_threads = num_threads;
|
||||||
|
model_config.debug = debug;
|
||||||
|
model_config.tokens = tokensFilePath;
|
||||||
|
|
||||||
|
SherpaOnnxFeatureConfig feat_config = new SherpaOnnxFeatureConfig();
|
||||||
|
feat_config.sample_rate = sample_rate;
|
||||||
|
feat_config.feature_dim = feature_dim;
|
||||||
|
|
||||||
|
SherpaOnnxOfflineRecognizerConfig sherpaOnnxOfflineRecognizerConfig;
|
||||||
|
sherpaOnnxOfflineRecognizerConfig.decoding_method = decoding_method;
|
||||||
|
sherpaOnnxOfflineRecognizerConfig.feat_config = feat_config;
|
||||||
|
sherpaOnnxOfflineRecognizerConfig.model_config = model_config;
|
||||||
|
|
||||||
|
_offlineRecognizer =
|
||||||
|
SherpaOnnxSharp.CreateOfflineRecognizer(sherpaOnnxOfflineRecognizerConfig);
|
||||||
|
}
|
||||||
|
internal OfflineStream CreateOfflineStream()
|
||||||
|
{
|
||||||
|
SherpaOnnxOfflineStream stream = SherpaOnnxSharp.CreateOfflineStream(_offlineRecognizer);
|
||||||
|
return new OfflineStream(stream);
|
||||||
|
}
|
||||||
|
public OfflineStream[] CreateOfflineStream(List<float[]> samplesList)
|
||||||
|
{
|
||||||
|
int batch_size = samplesList.Count;
|
||||||
|
OfflineStream[] streams = new OfflineStream[batch_size];
|
||||||
|
List<string> wavFiles = new List<string>();
|
||||||
|
for (int i = 0; i < batch_size; i++)
|
||||||
|
{
|
||||||
|
OfflineStream stream = CreateOfflineStream();
|
||||||
|
AcceptWaveform(stream._offlineStream, 16000, samplesList[i]);
|
||||||
|
streams[i] = stream;
|
||||||
|
}
|
||||||
|
return streams;
|
||||||
|
}
|
||||||
|
internal void AcceptWaveform(SherpaOnnxOfflineStream stream, int sample_rate, float[] samples)
|
||||||
|
{
|
||||||
|
SherpaOnnxSharp.AcceptWaveform(stream, sample_rate, samples, samples.Length);
|
||||||
|
}
|
||||||
|
internal IntPtr GetStreamsIntPtr(OfflineStream[] streams)
|
||||||
|
{
|
||||||
|
int streams_len = streams.Length;
|
||||||
|
int size = Marshal.SizeOf(typeof(SherpaOnnxOfflineStream));
|
||||||
|
IntPtr streamsIntPtr = Marshal.AllocHGlobal(size * streams_len);
|
||||||
|
unsafe
|
||||||
|
{
|
||||||
|
byte* ptrbds = (byte*)(streamsIntPtr.ToPointer());
|
||||||
|
for (int i = 0; i < streams_len; i++, ptrbds += (size))
|
||||||
|
{
|
||||||
|
IntPtr streamIntptr = new IntPtr(ptrbds);
|
||||||
|
Marshal.StructureToPtr(streams[i]._offlineStream, streamIntptr, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return streamsIntPtr;
|
||||||
|
}
|
||||||
|
public void DecodeMultipleOfflineStreams(OfflineStream[] streams)
|
||||||
|
{
|
||||||
|
IntPtr streamsIntPtr = GetStreamsIntPtr(streams);
|
||||||
|
SherpaOnnxSharp.DecodeMultipleOfflineStreams(_offlineRecognizer, streamsIntPtr, streams.Length);
|
||||||
|
Marshal.FreeHGlobal(streamsIntPtr);
|
||||||
|
}
|
||||||
|
internal OfflineRecognizerResultEntity GetResult(SherpaOnnxOfflineStream stream)
|
||||||
|
{
|
||||||
|
IntPtr result_ip = SherpaOnnxSharp.GetOfflineStreamResult(stream);
|
||||||
|
OfflineRecognizerResult offlineRecognizerResult = new OfflineRecognizerResult(result_ip);
|
||||||
|
#pragma warning disable CS8605 // 取消装箱可能为 null 的值。
|
||||||
|
SherpaOnnxOfflineRecognizerResult result =
|
||||||
|
(SherpaOnnxOfflineRecognizerResult)Marshal.PtrToStructure(
|
||||||
|
offlineRecognizerResult._offlineRecognizerResult, typeof(SherpaOnnxOfflineRecognizerResult));
|
||||||
|
#pragma warning restore CS8605 // 取消装箱可能为 null 的值。
|
||||||
|
|
||||||
|
#pragma warning disable CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。
|
||||||
|
string text = Marshal.PtrToStringAnsi(result.text);
|
||||||
|
#pragma warning restore CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。
|
||||||
|
OfflineRecognizerResultEntity offlineRecognizerResultEntity =
|
||||||
|
new OfflineRecognizerResultEntity();
|
||||||
|
offlineRecognizerResultEntity.text = text;
|
||||||
|
offlineRecognizerResultEntity.text_len = result.text_len;
|
||||||
|
|
||||||
|
return offlineRecognizerResultEntity;
|
||||||
|
}
|
||||||
|
public List<OfflineRecognizerResultEntity> GetResults(OfflineStream[] streams)
|
||||||
|
{
|
||||||
|
List<OfflineRecognizerResultEntity> results = new List<OfflineRecognizerResultEntity>();
|
||||||
|
foreach (OfflineStream stream in streams)
|
||||||
|
{
|
||||||
|
OfflineRecognizerResultEntity offlineRecognizerResultEntity = GetResult(stream._offlineStream);
|
||||||
|
results.Add(offlineRecognizerResultEntity);
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
protected override void Dispose(bool disposing)
|
||||||
|
{
|
||||||
|
if (!disposing)
|
||||||
|
{
|
||||||
|
SherpaOnnxSharp.DestroyOfflineRecognizer(_offlineRecognizer);
|
||||||
|
_offlineRecognizer.impl = IntPtr.Zero;
|
||||||
|
this._disposed = true;
|
||||||
|
base.Dispose();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
internal static partial class SherpaOnnxSharp
|
||||||
|
{
|
||||||
|
private const string dllName = @"SherpaOnnxSharp";
|
||||||
|
|
||||||
|
[DllImport(dllName, EntryPoint = "CreateOfflineRecognizer", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
internal static extern SherpaOnnxOfflineRecognizer CreateOfflineRecognizer(SherpaOnnxOfflineRecognizerConfig config);
|
||||||
|
|
||||||
|
[DllImport(dllName, EntryPoint = "CreateOfflineStream", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
internal static extern SherpaOnnxOfflineStream CreateOfflineStream(SherpaOnnxOfflineRecognizer offlineRecognizer);
|
||||||
|
|
||||||
|
[DllImport(dllName, EntryPoint = "AcceptWaveform", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
internal static extern void AcceptWaveform(SherpaOnnxOfflineStream stream, int sample_rate, float[] samples, int samples_size);
|
||||||
|
|
||||||
|
[DllImport(dllName, EntryPoint = "DecodeOfflineStream", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
internal static extern void DecodeOfflineStream(SherpaOnnxOfflineRecognizer recognizer, SherpaOnnxOfflineStream stream);
|
||||||
|
|
||||||
|
[DllImport(dllName, EntryPoint = "DecodeMultipleOfflineStreams", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
internal static extern void DecodeMultipleOfflineStreams(SherpaOnnxOfflineRecognizer recognizer, IntPtr
|
||||||
|
streams, int n);
|
||||||
|
|
||||||
|
[DllImport(dllName, EntryPoint = "GetOfflineStreamResult", CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
internal static extern IntPtr GetOfflineStreamResult(SherpaOnnxOfflineStream stream);
|
||||||
|
|
||||||
|
[DllImport(dllName, EntryPoint = "DestroyOfflineRecognizerResult", CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
internal static extern void DestroyOfflineRecognizerResult(IntPtr result);
|
||||||
|
|
||||||
|
[DllImport(dllName, EntryPoint = "DestroyOfflineStream", CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
internal static extern void DestroyOfflineStream(SherpaOnnxOfflineStream stream);
|
||||||
|
|
||||||
|
[DllImport(dllName, EntryPoint = "DestroyOfflineRecognizer", CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
internal static extern void DestroyOfflineRecognizer(SherpaOnnxOfflineRecognizer offlineRecognizer);
|
||||||
|
|
||||||
|
[DllImport(dllName, EntryPoint = "CreateOnlineRecognizer", CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
internal static extern SherpaOnnxOnlineRecognizer CreateOnlineRecognizer(SherpaOnnxOnlineRecognizerConfig config);
|
||||||
|
|
||||||
|
/// Free a pointer returned by CreateOnlineRecognizer()
|
||||||
|
///
|
||||||
|
/// @param p A pointer returned by CreateOnlineRecognizer()
|
||||||
|
[DllImport(dllName, EntryPoint = "DestroyOnlineRecognizer", CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
internal static extern void DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer recognizer);
|
||||||
|
|
||||||
|
/// Create an online stream for accepting wave samples.
|
||||||
|
///
|
||||||
|
/// @param recognizer A pointer returned by CreateOnlineRecognizer()
|
||||||
|
/// @return Return a pointer to an OnlineStream. The user has to invoke
|
||||||
|
/// DestroyOnlineStream() to free it to avoid memory leak.
|
||||||
|
[DllImport(dllName, EntryPoint = "CreateOnlineStream", CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
internal static extern SherpaOnnxOnlineStream CreateOnlineStream(
|
||||||
|
SherpaOnnxOnlineRecognizer recognizer);
|
||||||
|
|
||||||
|
/// Destroy an online stream.
|
||||||
|
///
|
||||||
|
/// @param stream A pointer returned by CreateOnlineStream()
|
||||||
|
[DllImport(dllName, EntryPoint = "DestroyOnlineStream", CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
internal static extern void DestroyOnlineStream(SherpaOnnxOnlineStream stream);
|
||||||
|
|
||||||
|
/// Accept input audio samples and compute the features.
|
||||||
|
/// The user has to invoke DecodeOnlineStream() to run the neural network and
|
||||||
|
/// decoding.
|
||||||
|
///
|
||||||
|
/// @param stream A pointer returned by CreateOnlineStream().
|
||||||
|
/// @param sample_rate Sample rate of the input samples. If it is different
|
||||||
|
/// from config.feat_config.sample_rate, we will do
|
||||||
|
/// resampling inside sherpa-onnx.
|
||||||
|
/// @param samples A pointer to a 1-D array containing audio samples.
|
||||||
|
/// The range of samples has to be normalized to [-1, 1].
|
||||||
|
/// @param n Number of elements in the samples array.
|
||||||
|
[DllImport(dllName, EntryPoint = "AcceptOnlineWaveform", CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
internal static extern void AcceptOnlineWaveform(SherpaOnnxOnlineStream stream, int sample_rate,
|
||||||
|
float[] samples, int n);
|
||||||
|
|
||||||
|
/// Return 1 if there are enough number of feature frames for decoding.
|
||||||
|
/// Return 0 otherwise.
|
||||||
|
///
|
||||||
|
/// @param recognizer A pointer returned by CreateOnlineRecognizer
|
||||||
|
/// @param stream A pointer returned by CreateOnlineStream
|
||||||
|
[DllImport(dllName, EntryPoint = "IsOnlineStreamReady", CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
internal static extern int IsOnlineStreamReady(SherpaOnnxOnlineRecognizer recognizer,
|
||||||
|
SherpaOnnxOnlineStream stream);
|
||||||
|
|
||||||
|
/// Call this function to run the neural network model and decoding.
|
||||||
|
//
|
||||||
|
/// Precondition for this function: IsOnlineStreamReady() MUST return 1.
|
||||||
|
///
|
||||||
|
/// Usage example:
|
||||||
|
///
|
||||||
|
/// while (IsOnlineStreamReady(recognizer, stream)) {
|
||||||
|
/// DecodeOnlineStream(recognizer, stream);
|
||||||
|
/// }
|
||||||
|
///
|
||||||
|
[DllImport(dllName, EntryPoint = "DecodeOnlineStream", CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
internal static extern void DecodeOnlineStream(SherpaOnnxOnlineRecognizer recognizer,
|
||||||
|
SherpaOnnxOnlineStream stream);
|
||||||
|
|
||||||
|
/// This function is similar to DecodeOnlineStream(). It decodes multiple
|
||||||
|
/// OnlineStream in parallel.
|
||||||
|
///
|
||||||
|
/// Caution: The caller has to ensure each OnlineStream is ready, i.e.,
|
||||||
|
/// IsOnlineStreamReady() for that stream should return 1.
|
||||||
|
///
|
||||||
|
/// @param recognizer A pointer returned by CreateOnlineRecognizer()
|
||||||
|
/// @param streams A pointer array containing pointers returned by
|
||||||
|
/// CreateOnlineRecognizer()
|
||||||
|
/// @param n Number of elements in the given streams array.
|
||||||
|
[DllImport(dllName, EntryPoint = "DecodeMultipleOnlineStreams", CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
internal static extern void DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer recognizer,
|
||||||
|
IntPtr streams, int n);
|
||||||
|
|
||||||
|
/// Get the decoding results so far for an OnlineStream.
|
||||||
|
///
|
||||||
|
/// @param recognizer A pointer returned by CreateOnlineRecognizer().
|
||||||
|
/// @param stream A pointer returned by CreateOnlineStream().
|
||||||
|
/// @return A pointer containing the result. The user has to invoke
|
||||||
|
/// DestroyOnlineRecognizerResult() to free the returned pointer to
|
||||||
|
/// avoid memory leak.
|
||||||
|
[DllImport(dllName, EntryPoint = "GetOnlineStreamResult", CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
internal static extern IntPtr GetOnlineStreamResult(
|
||||||
|
SherpaOnnxOnlineRecognizer recognizer, SherpaOnnxOnlineStream stream);
|
||||||
|
|
||||||
|
/// Destroy the pointer returned by GetOnlineStreamResult().
|
||||||
|
///
|
||||||
|
/// @param r A pointer returned by GetOnlineStreamResult()
|
||||||
|
[DllImport(dllName, EntryPoint = "DestroyOnlineRecognizerResult", CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
internal static extern void DestroyOnlineRecognizerResult(IntPtr result);
|
||||||
|
|
||||||
|
/// Reset an OnlineStream , which clears the neural network model state
|
||||||
|
/// and the state for decoding.
|
||||||
|
///
|
||||||
|
/// @param recognizer A pointer returned by CreateOnlineRecognizer().
|
||||||
|
/// @param stream A pointer returned by CreateOnlineStream
|
||||||
|
[DllImport(dllName, EntryPoint = "Reset", CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
internal static extern void Reset(SherpaOnnxOnlineRecognizer recognizer,
|
||||||
|
SherpaOnnxOnlineStream stream);
|
||||||
|
|
||||||
|
/// Signal that no more audio samples would be available.
|
||||||
|
/// After this call, you cannot call AcceptWaveform() any more.
|
||||||
|
///
|
||||||
|
/// @param stream A pointer returned by CreateOnlineStream()
|
||||||
|
[DllImport(dllName, EntryPoint = "InputFinished", CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
internal static extern void InputFinished(SherpaOnnxOnlineStream stream);
|
||||||
|
|
||||||
|
/// Return 1 if an endpoint has been detected.
|
||||||
|
///
|
||||||
|
/// @param recognizer A pointer returned by CreateOnlineRecognizer()
|
||||||
|
/// @param stream A pointer returned by CreateOnlineStream()
|
||||||
|
/// @return Return 1 if an endpoint is detected. Return 0 otherwise.
|
||||||
|
[DllImport(dllName, EntryPoint = "IsEndpoint", CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
internal static extern int IsEndpoint(SherpaOnnxOnlineRecognizer recognizer,
|
||||||
|
SherpaOnnxOnlineStream stream);
|
||||||
|
}
|
||||||
|
internal struct SherpaOnnxOfflineTransducer
|
||||||
|
{
|
||||||
|
public string encoder_filename;
|
||||||
|
public string decoder_filename;
|
||||||
|
public string joiner_filename;
|
||||||
|
public SherpaOnnxOfflineTransducer()
|
||||||
|
{
|
||||||
|
encoder_filename = "";
|
||||||
|
decoder_filename = "";
|
||||||
|
joiner_filename = "";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
internal struct SherpaOnnxOfflineParaformer
|
||||||
|
{
|
||||||
|
public string model;
|
||||||
|
public SherpaOnnxOfflineParaformer()
|
||||||
|
{
|
||||||
|
model = "";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
internal struct SherpaOnnxOfflineNemoEncDecCtc
|
||||||
|
{
|
||||||
|
public string model;
|
||||||
|
public SherpaOnnxOfflineNemoEncDecCtc()
|
||||||
|
{
|
||||||
|
model = "";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
internal struct SherpaOnnxOfflineModelConfig
|
||||||
|
{
|
||||||
|
public SherpaOnnxOfflineTransducer transducer;
|
||||||
|
public SherpaOnnxOfflineParaformer paraformer;
|
||||||
|
public SherpaOnnxOfflineNemoEncDecCtc nemo_ctc;
|
||||||
|
public string tokens;
|
||||||
|
public int num_threads;
|
||||||
|
public bool debug;
|
||||||
|
};
|
||||||
|
/// It expects 16 kHz 16-bit single channel wave format.
|
||||||
|
internal struct SherpaOnnxFeatureConfig
|
||||||
|
{
|
||||||
|
/// Sample rate of the input data. MUST match the one expected
|
||||||
|
/// by the model. For instance, it should be 16000 for models provided
|
||||||
|
/// by us.
|
||||||
|
public int sample_rate;
|
||||||
|
|
||||||
|
/// Feature dimension of the model.
|
||||||
|
/// For instance, it should be 80 for models provided by us.
|
||||||
|
public int feature_dim;
|
||||||
|
};
|
||||||
|
internal struct SherpaOnnxOfflineRecognizerConfig
|
||||||
|
{
|
||||||
|
public SherpaOnnxFeatureConfig feat_config;
|
||||||
|
public SherpaOnnxOfflineModelConfig model_config;
|
||||||
|
|
||||||
|
/// Possible values are: greedy_search, modified_beam_search
|
||||||
|
public string decoding_method;
|
||||||
|
|
||||||
|
};
|
||||||
|
internal struct SherpaOnnxOfflineRecognizer
|
||||||
|
{
|
||||||
|
public IntPtr impl;
|
||||||
|
};
|
||||||
|
[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi, Pack = 1)]
|
||||||
|
internal struct SherpaOnnxOfflineStream
|
||||||
|
{
|
||||||
|
public IntPtr impl;
|
||||||
|
};
|
||||||
|
internal struct SherpaOnnxOfflineRecognizerResult
|
||||||
|
{
|
||||||
|
public IntPtr text;
|
||||||
|
public int text_len;
|
||||||
|
}
|
||||||
|
internal struct SherpaOnnxOnlineTransducer
|
||||||
|
{
|
||||||
|
public string encoder_filename;
|
||||||
|
public string decoder_filename;
|
||||||
|
public string joiner_filename;
|
||||||
|
public SherpaOnnxOnlineTransducer()
|
||||||
|
{
|
||||||
|
encoder_filename = string.Empty;
|
||||||
|
decoder_filename = string.Empty;
|
||||||
|
joiner_filename = string.Empty;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
internal struct SherpaOnnxOnlineModelConfig
|
||||||
|
{
|
||||||
|
public SherpaOnnxOnlineTransducer transducer;
|
||||||
|
public string tokens;
|
||||||
|
public int num_threads;
|
||||||
|
public bool debug; // true to print debug information of the model
|
||||||
|
};
|
||||||
|
internal struct SherpaOnnxOnlineRecognizerConfig
|
||||||
|
{
|
||||||
|
public SherpaOnnxFeatureConfig feat_config;
|
||||||
|
public SherpaOnnxOnlineModelConfig model_config;
|
||||||
|
|
||||||
|
/// Possible values are: greedy_search, modified_beam_search
|
||||||
|
public string decoding_method;
|
||||||
|
|
||||||
|
/// Used only when decoding_method is modified_beam_search
|
||||||
|
/// Example value: 4
|
||||||
|
public int max_active_paths;
|
||||||
|
|
||||||
|
/// 0 to disable endpoint detection.
|
||||||
|
/// A non-zero value to enable endpoint detection.
|
||||||
|
public int enable_endpoint;
|
||||||
|
|
||||||
|
/// An endpoint is detected if trailing silence in seconds is larger than
|
||||||
|
/// this value even if nothing has been decoded.
|
||||||
|
/// Used only when enable_endpoint is not 0.
|
||||||
|
public float rule1_min_trailing_silence;
|
||||||
|
|
||||||
|
/// An endpoint is detected if trailing silence in seconds is larger than
|
||||||
|
/// this value after something that is not blank has been decoded.
|
||||||
|
/// Used only when enable_endpoint is not 0.
|
||||||
|
public float rule2_min_trailing_silence;
|
||||||
|
|
||||||
|
/// An endpoint is detected if the utterance in seconds is larger than
|
||||||
|
/// this value.
|
||||||
|
/// Used only when enable_endpoint is not 0.
|
||||||
|
public float rule3_min_utterance_length;
|
||||||
|
};
|
||||||
|
internal struct SherpaOnnxOnlineRecognizerResult
|
||||||
|
{
|
||||||
|
public IntPtr text;
|
||||||
|
public int text_len;
|
||||||
|
// TODO: Add more fields
|
||||||
|
}
|
||||||
|
internal struct SherpaOnnxOnlineRecognizer
|
||||||
|
{
|
||||||
|
public IntPtr impl;
|
||||||
|
};
|
||||||
|
[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi, Pack = 1)]
|
||||||
|
internal struct SherpaOnnxOnlineStream
|
||||||
|
{
|
||||||
|
public IntPtr impl;
|
||||||
|
};
|
||||||
|
public class OfflineNemoEncDecCtc
|
||||||
|
{
|
||||||
|
private string model = string.Empty;
|
||||||
|
public string Model { get => model; set => model = value; }
|
||||||
|
}
|
||||||
|
public class OfflineParaformer
|
||||||
|
{
|
||||||
|
private string model = string.Empty;
|
||||||
|
public string Model { get => model; set => model = value; }
|
||||||
|
}
|
||||||
|
public class OfflineRecognizerResultEntity
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// recognizer result
|
||||||
|
/// </summary>
|
||||||
|
public string? text { get; set; }
|
||||||
|
/// <summary>
|
||||||
|
/// recognizer result length
|
||||||
|
/// </summary>
|
||||||
|
public int text_len { get; set; }
|
||||||
|
/// <summary>
|
||||||
|
/// decode tokens
|
||||||
|
/// </summary>
|
||||||
|
public List<string>? tokens { get; set; }
|
||||||
|
/// <summary>
|
||||||
|
/// timestamps
|
||||||
|
/// </summary>
|
||||||
|
public List<float>? timestamps { get; set; }
|
||||||
|
}
|
||||||
|
public class OfflineTransducer
|
||||||
|
{
|
||||||
|
private string encoderFilename = string.Empty;
|
||||||
|
private string decoderFilename = string.Empty;
|
||||||
|
private string joinerFilename = string.Empty;
|
||||||
|
public string EncoderFilename { get => encoderFilename; set => encoderFilename = value; }
|
||||||
|
public string DecoderFilename { get => decoderFilename; set => decoderFilename = value; }
|
||||||
|
public string JoinerFilename { get => joinerFilename; set => joinerFilename = value; }
|
||||||
|
}
|
||||||
|
public class OnlineEndpoint
|
||||||
|
{
|
||||||
|
/// 0 to disable endpoint detection.
|
||||||
|
/// A non-zero value to enable endpoint detection.
|
||||||
|
private int enableEndpoint;
|
||||||
|
|
||||||
|
/// An endpoint is detected if trailing silence in seconds is larger than
|
||||||
|
/// this value even if nothing has been decoded.
|
||||||
|
/// Used only when enable_endpoint is not 0.
|
||||||
|
private float rule1MinTrailingSilence;
|
||||||
|
|
||||||
|
/// An endpoint is detected if trailing silence in seconds is larger than
|
||||||
|
/// this value after something that is not blank has been decoded.
|
||||||
|
/// Used only when enable_endpoint is not 0.
|
||||||
|
private float rule2MinTrailingSilence;
|
||||||
|
|
||||||
|
/// An endpoint is detected if the utterance in seconds is larger than
|
||||||
|
/// this value.
|
||||||
|
/// Used only when enable_endpoint is not 0.
|
||||||
|
private float rule3MinUtteranceLength;
|
||||||
|
|
||||||
|
public int EnableEndpoint { get => enableEndpoint; set => enableEndpoint = value; }
|
||||||
|
public float Rule1MinTrailingSilence { get => rule1MinTrailingSilence; set => rule1MinTrailingSilence = value; }
|
||||||
|
public float Rule2MinTrailingSilence { get => rule2MinTrailingSilence; set => rule2MinTrailingSilence = value; }
|
||||||
|
public float Rule3MinUtteranceLength { get => rule3MinUtteranceLength; set => rule3MinUtteranceLength = value; }
|
||||||
|
}
|
||||||
|
public class OnlineRecognizerResultEntity
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// recognizer result
|
||||||
|
/// </summary>
|
||||||
|
public string? text { get; set; }
|
||||||
|
/// <summary>
|
||||||
|
/// recognizer result length
|
||||||
|
/// </summary>
|
||||||
|
public int text_len { get; set; }
|
||||||
|
/// <summary>
|
||||||
|
/// decode tokens
|
||||||
|
/// </summary>
|
||||||
|
public List<string>? tokens { get; set; }
|
||||||
|
/// <summary>
|
||||||
|
/// timestamps
|
||||||
|
/// </summary>
|
||||||
|
public List<float>? timestamps { get; set; }
|
||||||
|
}
|
||||||
|
public class OnlineTransducer
|
||||||
|
{
|
||||||
|
private string encoderFilename = string.Empty;
|
||||||
|
private string decoderFilename = string.Empty;
|
||||||
|
private string joinerFilename = string.Empty;
|
||||||
|
public string EncoderFilename { get => encoderFilename; set => encoderFilename = value; }
|
||||||
|
public string DecoderFilename { get => decoderFilename; set => decoderFilename = value; }
|
||||||
|
public string JoinerFilename { get => joinerFilename; set => joinerFilename = value; }
|
||||||
|
}
|
||||||
|
}
|
||||||
10
sherpa-onnx/csharp-api/SherpaOnnx.csproj
Normal file
10
sherpa-onnx/csharp-api/SherpaOnnx.csproj
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<TargetFramework>net6.0</TargetFramework>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
136
sherpa-onnx/csharp-api/offline-api.cpp
Normal file
136
sherpa-onnx/csharp-api/offline-api.cpp
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
// sherpa-onnx/sharp-api/offline-api.cpp
|
||||||
|
//
|
||||||
|
// Copyright (c) 2023 Manyeyes Corporation
|
||||||
|
|
||||||
|
#include "offline-api.h"
|
||||||
|
|
||||||
|
#include "sherpa-onnx/csrc/display.h"
|
||||||
|
#include "sherpa-onnx/csrc/offline-recognizer.h"
|
||||||
|
|
||||||
|
namespace sherpa_onnx
|
||||||
|
{
|
||||||
|
struct SherpaOnnxOfflineRecognizer {
|
||||||
|
sherpa_onnx::OfflineRecognizer* impl;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct SherpaOnnxOfflineStream {
|
||||||
|
std::unique_ptr<sherpa_onnx::OfflineStream> impl;
|
||||||
|
explicit SherpaOnnxOfflineStream(std::unique_ptr<sherpa_onnx::OfflineStream> p)
|
||||||
|
: impl(std::move(p)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct SherpaOnnxDisplay {
|
||||||
|
std::unique_ptr<sherpa_onnx::Display> impl;
|
||||||
|
};
|
||||||
|
|
||||||
|
SherpaOnnxOfflineRecognizer* __stdcall CreateOfflineRecognizer(
|
||||||
|
const SherpaOnnxOfflineRecognizerConfig* config) {
|
||||||
|
sherpa_onnx::OfflineRecognizerConfig recognizer_config;
|
||||||
|
|
||||||
|
recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate;
|
||||||
|
recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim;
|
||||||
|
|
||||||
|
if (strlen(config->model_config.transducer.encoder_filename) > 0) {
|
||||||
|
recognizer_config.model_config.transducer.encoder_filename =
|
||||||
|
config->model_config.transducer.encoder_filename;
|
||||||
|
recognizer_config.model_config.transducer.decoder_filename =
|
||||||
|
config->model_config.transducer.decoder_filename;
|
||||||
|
recognizer_config.model_config.transducer.joiner_filename =
|
||||||
|
config->model_config.transducer.joiner_filename;
|
||||||
|
}
|
||||||
|
else if (strlen(config->model_config.paraformer.model) > 0) {
|
||||||
|
recognizer_config.model_config.paraformer.model =
|
||||||
|
config->model_config.paraformer.model;
|
||||||
|
}
|
||||||
|
else if (strlen(config->model_config.nemo_ctc.model) > 0) {
|
||||||
|
recognizer_config.model_config.nemo_ctc.model =
|
||||||
|
config->model_config.nemo_ctc.model;
|
||||||
|
}
|
||||||
|
|
||||||
|
recognizer_config.model_config.tokens =
|
||||||
|
config->model_config.tokens;
|
||||||
|
recognizer_config.model_config.num_threads =
|
||||||
|
config->model_config.num_threads;
|
||||||
|
recognizer_config.model_config.debug =
|
||||||
|
config->model_config.debug;
|
||||||
|
|
||||||
|
recognizer_config.decoding_method = config->decoding_method;
|
||||||
|
|
||||||
|
SherpaOnnxOfflineRecognizer* recognizer =
|
||||||
|
new SherpaOnnxOfflineRecognizer;
|
||||||
|
recognizer->impl =
|
||||||
|
new sherpa_onnx::OfflineRecognizer(recognizer_config);
|
||||||
|
|
||||||
|
return recognizer;
|
||||||
|
}
|
||||||
|
|
||||||
|
SherpaOnnxOfflineStream* __stdcall CreateOfflineStream(
|
||||||
|
SherpaOnnxOfflineRecognizer* recognizer) {
|
||||||
|
SherpaOnnxOfflineStream* stream =
|
||||||
|
new SherpaOnnxOfflineStream(recognizer->impl->CreateStream());
|
||||||
|
return stream;
|
||||||
|
}
|
||||||
|
|
||||||
|
void __stdcall AcceptWaveform(
|
||||||
|
SherpaOnnxOfflineStream* stream,
|
||||||
|
int32_t sample_rate,
|
||||||
|
const float* samples, int32_t samples_size) {
|
||||||
|
std::vector<float> waveform{ samples, samples + samples_size };
|
||||||
|
stream->impl->AcceptWaveform(sample_rate, waveform.data(), waveform.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
void __stdcall DecodeOfflineStream(
|
||||||
|
SherpaOnnxOfflineRecognizer* recognizer,
|
||||||
|
SherpaOnnxOfflineStream* stream) {
|
||||||
|
recognizer->impl->DecodeStream(stream->impl.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
void __stdcall DecodeMultipleOfflineStreams(
|
||||||
|
SherpaOnnxOfflineRecognizer* recognizer,
|
||||||
|
SherpaOnnxOfflineStream** streams, int32_t n) {
|
||||||
|
std::vector<sherpa_onnx::OfflineStream*> ss(n);
|
||||||
|
for (int32_t i = 0; i != n; ++i) {
|
||||||
|
ss[i] = streams[i]->impl.get();
|
||||||
|
}
|
||||||
|
recognizer->impl->DecodeStreams(ss.data(), n);
|
||||||
|
}
|
||||||
|
|
||||||
|
SherpaOnnxOfflineRecognizerResult* __stdcall GetOfflineStreamResult(
|
||||||
|
SherpaOnnxOfflineStream* stream) {
|
||||||
|
sherpa_onnx::OfflineRecognitionResult result =
|
||||||
|
stream->impl->GetResult();
|
||||||
|
const auto& text = result.text;
|
||||||
|
auto r = new SherpaOnnxOfflineRecognizerResult;
|
||||||
|
r->text = new char[text.size() + 1];
|
||||||
|
std::copy(text.begin(), text.end(), const_cast<char*>(r->text));
|
||||||
|
const_cast<char*>(r->text)[text.size()] = 0;
|
||||||
|
r->text_len = text.size();
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Free a pointer returned by CreateOfflineRecognizer()
|
||||||
|
///
|
||||||
|
/// @param p A pointer returned by CreateOfflineRecognizer()
|
||||||
|
void __stdcall DestroyOfflineRecognizer(
|
||||||
|
SherpaOnnxOfflineRecognizer* recognizer) {
|
||||||
|
delete recognizer->impl;
|
||||||
|
delete recognizer;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Destory an offline stream.
|
||||||
|
///
|
||||||
|
/// @param stream A pointer returned by CreateOfflineStream()
|
||||||
|
void __stdcall DestroyOfflineStream(SherpaOnnxOfflineStream* stream) {
|
||||||
|
delete stream;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Destroy the pointer returned by GetOfflineStreamResult().
|
||||||
|
///
|
||||||
|
/// @param r A pointer returned by GetOfflineStreamResult()
|
||||||
|
void __stdcall DestroyOfflineRecognizerResult(
|
||||||
|
SherpaOnnxOfflineRecognizerResult* r) {
|
||||||
|
delete r->text;
|
||||||
|
delete r;
|
||||||
|
}
|
||||||
|
}// namespace sherpa_onnx
|
||||||
122
sherpa-onnx/csharp-api/offline-api.h
Normal file
122
sherpa-onnx/csharp-api/offline-api.h
Normal file
@@ -0,0 +1,122 @@
|
|||||||
|
// sherpa-onnx/sharp-api/offline-api.h
|
||||||
|
//
|
||||||
|
// Copyright (c) 2023 Manyeyes Corporation
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <list>
|
||||||
|
|
||||||
|
namespace sherpa_onnx
|
||||||
|
{
|
||||||
|
/// Please refer to
|
||||||
|
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||||
|
/// to download pre-trained models. That is, you can find encoder-xxx.onnx
|
||||||
|
/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct
|
||||||
|
/// from there.
|
||||||
|
typedef struct SherpaOnnxOfflineTransducer {
|
||||||
|
const char* encoder_filename;
|
||||||
|
const char* decoder_filename;
|
||||||
|
const char* joiner_filename;
|
||||||
|
} SherpaOnnxOfflineTransducer;
|
||||||
|
|
||||||
|
typedef struct SherpaOnnxOfflineParaformer {
|
||||||
|
const char* model;
|
||||||
|
}SherpaOnnxOfflineParaformer;
|
||||||
|
|
||||||
|
typedef struct SherpaOnnxOfflineNemoEncDecCtc {
|
||||||
|
const char* model;
|
||||||
|
}SherpaOnnxOfflineNemoEncDecCtc;
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct SherpaOnnxOfflineModelConfig {
|
||||||
|
SherpaOnnxOfflineTransducer transducer;
|
||||||
|
SherpaOnnxOfflineParaformer paraformer;
|
||||||
|
SherpaOnnxOfflineNemoEncDecCtc nemo_ctc;
|
||||||
|
const char* tokens;
|
||||||
|
const int32_t num_threads;
|
||||||
|
const bool debug;
|
||||||
|
} SherpaOnnxOfflineModelConfig;
|
||||||
|
|
||||||
|
/// It expects 16 kHz 16-bit single channel wave format.
|
||||||
|
typedef struct SherpaOnnxFeatureConfig {
|
||||||
|
/// Sample rate of the input data. MUST match the one expected
|
||||||
|
/// by the model. For instance, it should be 16000 for models provided
|
||||||
|
/// by us.
|
||||||
|
int32_t sample_rate;
|
||||||
|
|
||||||
|
/// Feature dimension of the model.
|
||||||
|
/// For instance, it should be 80 for models provided by us.
|
||||||
|
int32_t feature_dim;
|
||||||
|
} SherpaOnnxFeatureConfig;
|
||||||
|
|
||||||
|
typedef struct SherpaOnnxOfflineRecognizerConfig {
|
||||||
|
SherpaOnnxFeatureConfig feat_config;
|
||||||
|
SherpaOnnxOfflineModelConfig model_config;
|
||||||
|
|
||||||
|
/// Possible values are: greedy_search, modified_beam_search
|
||||||
|
const char* decoding_method;
|
||||||
|
|
||||||
|
} SherpaOnnxOfflineRecognizerConfig;
|
||||||
|
|
||||||
|
typedef struct SherpaOnnxOfflineRecognizerResult {
|
||||||
|
// Recognition results.
|
||||||
|
// For English, it consists of space separated words.
|
||||||
|
// For Chinese, it consists of Chinese words without spaces.
|
||||||
|
char* text;
|
||||||
|
int text_len;
|
||||||
|
|
||||||
|
// Decoded results at the token level.
|
||||||
|
// For instance, for BPE-based models it consists of a list of BPE tokens.
|
||||||
|
// std::vector<std::string> tokens;
|
||||||
|
|
||||||
|
// timestamps.size() == tokens.size()
|
||||||
|
// timestamps[i] records the time in seconds when tokens[i] is decoded.
|
||||||
|
// std::vector<float> timestamps;
|
||||||
|
} SherpaOnnxOfflineRecognizerResult;
|
||||||
|
|
||||||
|
/// Note: OfflineRecognizer here means StreamingRecognizer.
|
||||||
|
/// It does not need to access the Internet during recognition.
|
||||||
|
/// Everything is run locally.
|
||||||
|
typedef struct SherpaOnnxOfflineRecognizer SherpaOnnxOfflineRecognizer;
|
||||||
|
|
||||||
|
typedef struct SherpaOnnxOfflineStream SherpaOnnxOfflineStream;
|
||||||
|
|
||||||
|
extern "C" __declspec(dllexport)
|
||||||
|
SherpaOnnxOfflineRecognizer * __stdcall CreateOfflineRecognizer(
|
||||||
|
const SherpaOnnxOfflineRecognizerConfig * config);
|
||||||
|
|
||||||
|
extern "C" __declspec(dllexport)
|
||||||
|
SherpaOnnxOfflineStream * __stdcall CreateOfflineStream(
|
||||||
|
SherpaOnnxOfflineRecognizer * sherpaOnnxOfflineRecognizer);
|
||||||
|
|
||||||
|
extern "C" __declspec(dllexport)
|
||||||
|
void __stdcall AcceptWaveform(
|
||||||
|
SherpaOnnxOfflineStream * stream, int32_t sample_rate,
|
||||||
|
const float* samples, int32_t samples_size);
|
||||||
|
|
||||||
|
extern "C" __declspec(dllexport)
|
||||||
|
void __stdcall DecodeOfflineStream(
|
||||||
|
SherpaOnnxOfflineRecognizer * recognizer,
|
||||||
|
SherpaOnnxOfflineStream * stream);
|
||||||
|
|
||||||
|
extern "C" __declspec(dllexport)
|
||||||
|
void __stdcall DecodeMultipleOfflineStreams(
|
||||||
|
SherpaOnnxOfflineRecognizer * recognizer,
|
||||||
|
SherpaOnnxOfflineStream * *streams, int32_t n);
|
||||||
|
|
||||||
|
extern "C" __declspec(dllexport)
|
||||||
|
SherpaOnnxOfflineRecognizerResult * __stdcall GetOfflineStreamResult(
|
||||||
|
SherpaOnnxOfflineStream * stream);
|
||||||
|
|
||||||
|
extern "C" __declspec(dllexport)
|
||||||
|
void __stdcall DestroyOfflineRecognizer(
|
||||||
|
SherpaOnnxOfflineRecognizer * recognizer);
|
||||||
|
|
||||||
|
extern "C" __declspec(dllexport)
|
||||||
|
void __stdcall DestroyOfflineStream(
|
||||||
|
SherpaOnnxOfflineStream * stream);
|
||||||
|
|
||||||
|
extern "C" __declspec(dllexport)
|
||||||
|
void __stdcall DestroyOfflineRecognizerResult(
|
||||||
|
SherpaOnnxOfflineRecognizerResult * r);
|
||||||
|
}// namespace sherpa_onnx
|
||||||
Reference in New Issue
Block a user