diff --git a/csharp-api-examples/OfflineDecodeFiles.cs b/csharp-api-examples/OfflineDecodeFiles.cs new file mode 100644 index 00000000..177200a4 --- /dev/null +++ b/csharp-api-examples/OfflineDecodeFiles.cs @@ -0,0 +1,255 @@ +// See https://aka.ms/new-console-template for more information +// Copyright (c) 2023 by manyeyes +using SherpaOnnx; +/// Please refer to +/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html +/// to download pre-trained models. That is, you can find encoder-xxx.onnx +/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct +/// from there. + +/// download model eg: +/// (The directory where the application runs) +/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory +/// cd /path/to +/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-04-01 +/// git clone https://huggingface.co/csukuangfj/paraformer-onnxruntime-python-example +/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-citrinet-512 + +/// NuGet for sherpa-onnx +/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx +/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx + +// transducer Usage: +/* + .\SherpaOnnx.Examples.exe ` + --tokens=./all_models/sherpa-onnx-conformer-en-2023-03-18/tokens.txt ` + --encoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/encoder-epoch-99-avg-1.onnx ` + --decoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/decoder-epoch-99-avg-1.onnx ` + --joiner=./all_models/sherpa-onnx-conformer-en-2023-03-18/joiner-epoch-99-avg-1.onnx ` + --num-threads=2 ` + --decoding-method=greedy_search ` + --debug=false ` + ./all_models/sherpa-onnx-conformer-en-2023-03-18/test_wavs/0.wav + */ + +// paraformer Usage: +/* + .\SherpaOnnx.Examples.exe ` + --tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt ` + --paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx ` + --num-threads=2 ` + --decoding-method=greedy_search ` + --debug=false ` + ./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav + */ + +// paraformer Usage: +/* + .\SherpaOnnx.Examples.exe ` + --tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt ` + --paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx ` + --num-threads=2 ` + --decoding-method=greedy_search ` + --debug=false ` + ./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav + */ + + +internal class OfflineDecodeFiles +{ + static void Main(string[] args) + { + string usage = @" +----------------------------- +transducer Usage: + --tokens=./all_models/sherpa-onnx-conformer-en-2023-03-18/tokens.txt ` + --encoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/encoder-epoch-99-avg-1.onnx ` + --decoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/decoder-epoch-99-avg-1.onnx ` + --joiner=./all_models/sherpa-onnx-conformer-en-2023-03-18/joiner-epoch-99-avg-1.onnx ` + --num-threads=2 ` + --decoding-method=greedy_search ` + --debug=false ` + ./all_models/sherpa-onnx-conformer-en-2023-03-18/test_wavs/0.wav + +paraformer Usage: + --tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt ` + --paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx ` + --num-threads=2 ` + --decoding-method=greedy_search ` + --debug=false ` + ./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav + +nemo Usage: + --tokens=./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/tokens.txt ` + --nemo_ctc=./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/model.onnx ` + --num-threads=2 ` + --decoding-method=greedy_search ` + --debug=false ` + ./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/0.wav +----------------------------- +"; + if (args.Length == 0) + { + System.Console.WriteLine("Please enter the correct parameters:"); + System.Console.WriteLine(usage); + System.Text.StringBuilder sb = new System.Text.StringBuilder(); + //args = Console.ReadLine().Split(" "); + while (true) + { + string input = Console.ReadLine(); + sb.AppendLine(input); + if (Console.ReadKey().Key == ConsoleKey.Enter) + break; + } + args = sb.ToString().Split("\r\n"); + } + Console.WriteLine("Started!\n"); + string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory; + List wavFiles = new List(); + Dictionary argsDict = GetDict(args, applicationBase, ref wavFiles); + string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : ""; + string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : ""; + string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : ""; + string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : ""; + string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : ""; + string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : ""; + string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : ""; + string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : ""; + string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : ""; + + OfflineTransducer offlineTransducer = new OfflineTransducer(); + offlineTransducer.EncoderFilename = encoder; + offlineTransducer.DecoderFilename = decoder; + offlineTransducer.JoinerFilename = joiner; + + OfflineParaformer offlineParaformer = new OfflineParaformer(); + offlineParaformer.Model = paraformer; + + OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc(); + offlineNemoEncDecCtc.Model = nemo_ctc; + + int numThreads = 0; + int.TryParse(num_threads, out numThreads); + bool isDebug = false; + bool.TryParse(debug, out isDebug); + + string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method; + + if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)) + && string.IsNullOrEmpty(paraformer) + && string.IsNullOrEmpty(nemo_ctc)) + { + Console.WriteLine("Please specify at least one model"); + Console.WriteLine(usage); + } + // batch decode + TimeSpan total_duration = TimeSpan.Zero; + TimeSpan start_time = TimeSpan.Zero; + TimeSpan end_time = TimeSpan.Zero; + List results = new List(); + if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))) + { + OfflineRecognizer offlineRecognizer = new OfflineRecognizer( + offlineTransducer, + tokens, + num_threads: numThreads, + debug: isDebug, + decoding_method: decodingMethod); + List samplesList = new List(); + foreach (string wavFile in wavFiles) + { + TimeSpan duration = TimeSpan.Zero; + float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration); + samplesList.Add(samples); + total_duration += duration; + } + OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList); + start_time = new TimeSpan(DateTime.Now.Ticks); + offlineRecognizer.DecodeMultipleOfflineStreams(streams); + results = offlineRecognizer.GetResults(streams); + end_time = new TimeSpan(DateTime.Now.Ticks); + } + else if (!string.IsNullOrEmpty(paraformer)) + { + OfflineRecognizer offlineRecognizer = new OfflineRecognizer( + offlineParaformer, + tokens, + num_threads: numThreads, + debug: isDebug, + decoding_method: decodingMethod); + List samplesList = new List(); + foreach (string wavFile in wavFiles) + { + TimeSpan duration = TimeSpan.Zero; + float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration); + samplesList.Add(samples); + total_duration += duration; + } + OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList); + start_time = new TimeSpan(DateTime.Now.Ticks); + offlineRecognizer.DecodeMultipleOfflineStreams(streams); + results = offlineRecognizer.GetResults(streams); + end_time = new TimeSpan(DateTime.Now.Ticks); + } + else if (!string.IsNullOrEmpty(nemo_ctc)) + { + OfflineRecognizer offlineRecognizer = new OfflineRecognizer( + offlineNemoEncDecCtc, + tokens, + num_threads: numThreads, + debug: isDebug, + decoding_method: decodingMethod); + List samplesList = new List(); + foreach (string wavFile in wavFiles) + { + TimeSpan duration = TimeSpan.Zero; + float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration); + samplesList.Add(samples); + total_duration += duration; + } + OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList); + start_time = new TimeSpan(DateTime.Now.Ticks); + offlineRecognizer.DecodeMultipleOfflineStreams(streams); + results = offlineRecognizer.GetResults(streams); + end_time = new TimeSpan(DateTime.Now.Ticks); + } + + foreach (var item in results.Zip(wavFiles)) + { + Console.WriteLine("wavFile:{0}", item.Second); + Console.WriteLine("text:{0}", item.First.text.ToLower()); + Console.WriteLine("text_len:{0}\n", item.First.text_len.ToString()); + } + + double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds; + double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds; + Console.WriteLine("num_threads:{0}", num_threads); + Console.WriteLine("decoding_method:{0}", decodingMethod); + Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString()); + Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString()); + Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString()); + + Console.WriteLine("End!"); + } + + static Dictionary GetDict(string[] args, string applicationBase, ref List wavFiles) + { + Dictionary argsDict = new Dictionary(); + foreach (string input in args) + { + string[] ss = input.Split("="); + if (ss.Length == 1) + { + if (!string.IsNullOrEmpty(ss[0])) + { + wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' }))); + } + } + else + { + argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' })); + } + } + return argsDict; + } +} \ No newline at end of file diff --git a/csharp-api-examples/OnlineDecodeFile.cs b/csharp-api-examples/OnlineDecodeFile.cs new file mode 100644 index 00000000..20a027e0 --- /dev/null +++ b/csharp-api-examples/OnlineDecodeFile.cs @@ -0,0 +1,171 @@ +// See https://aka.ms/new-console-template for more information +// Copyright (c) 2023 by manyeyes +using SherpaOnnx; +/// Please refer to +/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html +/// to download pre-trained models. That is, you can find encoder-xxx.onnx +/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct +/// from there. + +/// download model eg: +/// (The directory where the application runs) +/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory +/// cd /path/to +/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 + +/// NuGet for sherpa-onnx +/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx +/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx + +// transducer Usage: +/* + .\SherpaOnnx.Examples.exe ` + --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ` + --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx ` + --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ` + --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx ` + --num-threads=2 ` + --decoding-method=modified_beam_search ` + --debug=false ` + ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav + */ + +internal class OnlineDecodeFile +{ + static void Main(string[] args) + { + string usage = @" +----------------------------- +transducer Usage: + --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ` + --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx ` + --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ` + --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx ` + --num-threads=2 ` + --decoding-method=modified_beam_search ` + --debug=false ` + ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav +----------------------------- +"; + if (args.Length == 0) + { + System.Console.WriteLine("Please enter the correct parameters:"); + System.Console.WriteLine(usage); + System.Text.StringBuilder sb = new System.Text.StringBuilder(); + //args = Console.ReadLine().Split(" "); + while (true) + { + string input = Console.ReadLine(); + sb.AppendLine(input); + if (Console.ReadKey().Key == ConsoleKey.Enter) + break; + } + args = sb.ToString().Split("\r\n"); + } + Console.WriteLine("Started!\n"); + string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory; + List wavFiles = new List(); + Dictionary argsDict = GetDict(args, applicationBase, ref wavFiles); + string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : ""; + string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : ""; + string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : ""; + string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : ""; + string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : ""; + string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : ""; + string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : ""; + string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : ""; + string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : ""; + + OfflineTransducer offlineTransducer = new OfflineTransducer(); + offlineTransducer.EncoderFilename = encoder; + offlineTransducer.DecoderFilename = decoder; + offlineTransducer.JoinerFilename = joiner; + + OfflineParaformer offlineParaformer = new OfflineParaformer(); + offlineParaformer.Model = paraformer; + + OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc(); + offlineNemoEncDecCtc.Model = nemo_ctc; + + int numThreads = 0; + int.TryParse(num_threads, out numThreads); + bool isDebug = false; + bool.TryParse(debug, out isDebug); + + string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method; + + if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)) + && string.IsNullOrEmpty(paraformer) + && string.IsNullOrEmpty(nemo_ctc)) + { + Console.WriteLine("Please specify at least one model"); + Console.WriteLine(usage); + } + // batch decode + TimeSpan total_duration = TimeSpan.Zero; + TimeSpan start_time = TimeSpan.Zero; + TimeSpan end_time = TimeSpan.Zero; + List results = new List(); + if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))) + { + OnlineTransducer onlineTransducer = new OnlineTransducer(); + onlineTransducer.EncoderFilename = encoder; + onlineTransducer.DecoderFilename = decoder; + onlineTransducer.JoinerFilename = joiner; + //test online + OnlineRecognizer onlineRecognizer = new OnlineRecognizer( + onlineTransducer, + tokens, + num_threads: numThreads, + debug: isDebug, + decoding_method: decodingMethod); + foreach (string wavFile in wavFiles) + { + TimeSpan duration = TimeSpan.Zero; + List samplesList = AudioHelper.GetChunkSamplesList(wavFile, ref duration); + OnlineStream stream = onlineRecognizer.CreateStream(); + start_time = new TimeSpan(DateTime.Now.Ticks); + for (int i = 0; i < samplesList.Count; i++) + { + onlineRecognizer.AcceptWaveForm(stream, 16000, samplesList[i]); + onlineRecognizer.DecodeStream(stream); + OnlineRecognizerResultEntity result_on = onlineRecognizer.GetResult(stream); + Console.WriteLine(result_on.text); + } + total_duration += duration; + } + end_time = new TimeSpan(DateTime.Now.Ticks); + } + double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds; + double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds; + Console.WriteLine("num_threads:{0}", num_threads); + Console.WriteLine("decoding_method:{0}", decodingMethod); + Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString()); + Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString()); + Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString()); + + Console.WriteLine("End!"); + } + + static Dictionary GetDict(string[] args, string applicationBase, ref List wavFiles) + { + Dictionary argsDict = new Dictionary(); + foreach (string input in args) + { + string[] ss = input.Split("="); + if (ss.Length == 1) + { + if (!string.IsNullOrEmpty(ss[0])) + { + wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' }))); + } + } + else + { + argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' })); + } + } + return argsDict; + } + +} \ No newline at end of file diff --git a/csharp-api-examples/OnlineDecodeFiles.cs b/csharp-api-examples/OnlineDecodeFiles.cs new file mode 100644 index 00000000..66f5492f --- /dev/null +++ b/csharp-api-examples/OnlineDecodeFiles.cs @@ -0,0 +1,221 @@ +// See https://aka.ms/new-console-template for more information +// Copyright (c) 2023 by manyeyes +using SherpaOnnx; +/// Please refer to +/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html +/// to download pre-trained models. That is, you can find encoder-xxx.onnx +/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct +/// from there. + +/// download model eg: +/// (The directory where the application runs) +/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory +/// cd /path/to +/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 + +/// NuGet for sherpa-onnx +/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx +/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx + +// transducer Usage: +/* + .\SherpaOnnx.Examples.exe ` + --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ` + --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx ` + --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ` + --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx ` + --num-threads=2 ` + --decoding-method=modified_beam_search ` + --debug=false ` + ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav ` + ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav + */ + +internal class OnlineDecodeFiles +{ + static void Main(string[] args) + { + string usage = @" +----------------------------- +transducer Usage: + --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ` + --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx ` + --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ` + --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx ` + --num-threads=2 ` + --decoding-method=modified_beam_search ` + --debug=false ` + ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav ` + ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav +----------------------------- +"; + if (args.Length == 0) + { + System.Console.WriteLine("Please enter the correct parameters:"); + System.Console.WriteLine(usage); + System.Text.StringBuilder sb = new System.Text.StringBuilder(); + //args = Console.ReadLine().Split(" "); + while (true) + { + string input = Console.ReadLine(); + sb.AppendLine(input); + if (Console.ReadKey().Key == ConsoleKey.Enter) + break; + } + args = sb.ToString().Split("\r\n"); + } + Console.WriteLine("Started!\n"); + string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory; + List wavFiles = new List(); + Dictionary argsDict = GetDict(args, applicationBase, ref wavFiles); + string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : ""; + string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : ""; + string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : ""; + string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : ""; + string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : ""; + string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : ""; + string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : ""; + string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : ""; + string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : ""; + + OfflineTransducer offlineTransducer = new OfflineTransducer(); + offlineTransducer.EncoderFilename = encoder; + offlineTransducer.DecoderFilename = decoder; + offlineTransducer.JoinerFilename = joiner; + + OfflineParaformer offlineParaformer = new OfflineParaformer(); + offlineParaformer.Model = paraformer; + + OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc(); + offlineNemoEncDecCtc.Model = nemo_ctc; + + int numThreads = 0; + int.TryParse(num_threads, out numThreads); + bool isDebug = false; + bool.TryParse(debug, out isDebug); + + string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method; + + if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)) + && string.IsNullOrEmpty(paraformer) + && string.IsNullOrEmpty(nemo_ctc)) + { + Console.WriteLine("Please specify at least one model"); + Console.WriteLine(usage); + } + // batch decode + TimeSpan total_duration = TimeSpan.Zero; + TimeSpan start_time = TimeSpan.Zero; + TimeSpan end_time = TimeSpan.Zero; + List results = new List(); + if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))) + { + OnlineTransducer onlineTransducer = new OnlineTransducer(); + onlineTransducer.EncoderFilename = encoder; + onlineTransducer.DecoderFilename = decoder; + onlineTransducer.JoinerFilename = joiner; + //test online + OnlineRecognizer onlineRecognizer = new OnlineRecognizer( + onlineTransducer, + tokens, + num_threads: numThreads, + debug: isDebug, + decoding_method: decodingMethod); + List samplesList = new List(); + foreach (string wavFile in wavFiles) + { + TimeSpan duration = TimeSpan.Zero; + float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration); + samplesList.Add(samples); + total_duration += duration; + } + start_time = new TimeSpan(DateTime.Now.Ticks); + List streams = new List(); + foreach (float[] samples in samplesList) + { + OnlineStream stream = onlineRecognizer.CreateStream(); + onlineRecognizer.AcceptWaveForm(stream, 16000, samples); + streams.Add(stream); + onlineRecognizer.InputFinished(stream); + } + onlineRecognizer.DecodeMultipleStreams(streams); + results = onlineRecognizer.GetResults(streams); + foreach (OnlineRecognizerResultEntity result in results) + { + Console.WriteLine(result.text); + } + end_time = new TimeSpan(DateTime.Now.Ticks); + } + + + foreach (var item in results.Zip(wavFiles)) + { + Console.WriteLine("wavFile:{0}", item.Second); + Console.WriteLine("text:{0}", item.First.text.ToLower()); + Console.WriteLine("text_len:{0}\n", item.First.text_len.ToString()); + } + + double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds; + double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds; + Console.WriteLine("num_threads:{0}", num_threads); + Console.WriteLine("decoding_method:{0}", decodingMethod); + Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString()); + Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString()); + Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString()); + + Console.WriteLine("End!"); + } + + public void AnotherWayOfDecodeFiles(string encoder, string decoder, string joiner, string tokens, int numThreads, bool isDebug, string decodingMethod, List wavFiles, ref TimeSpan total_duration) + { + OnlineTransducer onlineTransducer = new OnlineTransducer(); + onlineTransducer.EncoderFilename = encoder; + onlineTransducer.DecoderFilename = decoder; + onlineTransducer.JoinerFilename = joiner; + //test online + OnlineRecognizer onlineRecognizer = new OnlineRecognizer( + onlineTransducer, + tokens, + num_threads: numThreads, + debug: isDebug, + decoding_method: decodingMethod); + List samplesList = new List(); + foreach (string wavFile in wavFiles) + { + TimeSpan duration = TimeSpan.Zero; + float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration); + samplesList.Add(samples); + total_duration += duration; + } + TimeSpan start_time = new TimeSpan(DateTime.Now.Ticks); + List streams = onlineRecognizer.CreateStreams(samplesList); + onlineRecognizer.DecodeMultipleStreams(streams); + List results = onlineRecognizer.GetResults(streams); + foreach (OnlineRecognizerResultEntity result in results) + { + Console.WriteLine(result.text); + } + TimeSpan end_time = new TimeSpan(DateTime.Now.Ticks); + } + + static Dictionary GetDict(string[] args, string applicationBase, ref List wavFiles) + { + Dictionary argsDict = new Dictionary(); + foreach (string input in args) + { + string[] ss = input.Split("="); + if (ss.Length == 1) + { + if (!string.IsNullOrEmpty(ss[0])) + { + wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' }))); + } + } + else + { + argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' })); + } + } + return argsDict; + } +} \ No newline at end of file diff --git a/csharp-api-examples/README.md b/csharp-api-examples/README.md new file mode 100644 index 00000000..2d2eded7 --- /dev/null +++ b/csharp-api-examples/README.md @@ -0,0 +1,9 @@ +#ProjectReference csharp-api +`` +The location of the 'SherpaOnnx' file is ../sherpa-onnx/csharp-api. +This C # API is cross platform and you can compile it yourself in Windows, Mac OS, and Linux environments. + +------------ +Alternatively, install sherpaonnx through nuget. +#NuGet for sherpa-onnx +PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx \ No newline at end of file diff --git a/csharp-api-examples/Utils/AudioHelper.cs b/csharp-api-examples/Utils/AudioHelper.cs new file mode 100644 index 00000000..c70065c8 --- /dev/null +++ b/csharp-api-examples/Utils/AudioHelper.cs @@ -0,0 +1,67 @@ +using NAudio.Wave; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +/// +/// audio processing +/// Copyright (c) 2023 by manyeyes +/// +public class AudioHelper +{ + public static float[] GetFileSamples(string wavFilePath, ref TimeSpan duration) + { + if (!File.Exists(wavFilePath)) + { + Trace.Assert(File.Exists(wavFilePath), "file does not exist:" + wavFilePath); + return new float[1]; + } + AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath); + byte[] datas = new byte[_audioFileReader.Length]; + _audioFileReader.Read(datas, 0, datas.Length); + duration = _audioFileReader.TotalTime; + float[] wavdata = new float[datas.Length / sizeof(float)]; + Buffer.BlockCopy(datas, 0, wavdata, 0, datas.Length); + return wavdata; + } + + public static List GetChunkSamplesList(string wavFilePath, ref TimeSpan duration) + { + List wavdatas = new List(); + if (!File.Exists(wavFilePath)) + { + Trace.Assert(File.Exists(wavFilePath), "file does not exist:" + wavFilePath); + wavdatas.Add(new float[1]); + return wavdatas; + } + AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath); + byte[] datas = new byte[_audioFileReader.Length]; + int chunkSize = 16000;// datas.Length / sizeof(float); + int chunkNum = (int)Math.Ceiling((double)datas.Length / chunkSize); + for (int i = 0; i < chunkNum; i++) + { + int offset = 0; + int dataCount = 0; + if (Math.Abs(datas.Length - i * chunkSize) > chunkSize) + { + offset = i * chunkSize; + dataCount = chunkSize; + } + else + { + offset = i * chunkSize; + dataCount = datas.Length - i * chunkSize; + } + _audioFileReader.Read(datas, offset, dataCount); + duration += _audioFileReader.TotalTime; + float[] wavdata = new float[chunkSize / sizeof(float)]; + Buffer.BlockCopy(datas, offset, wavdata, 0, dataCount); + wavdatas.Add(wavdata); + + } + return wavdatas; + } +} diff --git a/csharp-api-examples/sherpa-onnx.csproj b/csharp-api-examples/sherpa-onnx.csproj new file mode 100644 index 00000000..c00f0948 --- /dev/null +++ b/csharp-api-examples/sherpa-onnx.csproj @@ -0,0 +1,20 @@ + + + + Exe + net6.0 + sherpa_onnx + enable + enable + OnlineDecodeFiles + + + + + + + + + + + diff --git a/sherpa-onnx/csharp-api/SherpaOnnx.cs b/sherpa-onnx/csharp-api/SherpaOnnx.cs new file mode 100644 index 00000000..4cb25839 --- /dev/null +++ b/sherpa-onnx/csharp-api/SherpaOnnx.cs @@ -0,0 +1,872 @@ +using System.Runtime.InteropServices; +using System.Diagnostics; + +namespace SherpaOnnx +{ + /// + /// online recognizer package + /// Copyright (c) 2023 by manyeyes + /// + public class OnlineBase : IDisposable + { + public void Dispose() + { + Dispose(disposing: true); + GC.SuppressFinalize(this); + } + protected virtual void Dispose(bool disposing) + { + if (!disposing) + { + if (_onlineRecognizerResult != IntPtr.Zero) + { + SherpaOnnxSharp.DestroyOnlineRecognizerResult(_onlineRecognizerResult); + _onlineRecognizerResult = IntPtr.Zero; + } + if (_onlineStream.impl != IntPtr.Zero) + { + SherpaOnnxSharp.DestroyOnlineStream(_onlineStream); + _onlineStream.impl = IntPtr.Zero; + } + if (_onlineRecognizer.impl != IntPtr.Zero) + { + SherpaOnnxSharp.DestroyOnlineRecognizer(_onlineRecognizer); + _onlineRecognizer.impl = IntPtr.Zero; + } + this._disposed = true; + } + } + ~OnlineBase() + { + Dispose(this._disposed); + } + internal SherpaOnnxOnlineStream _onlineStream; + internal IntPtr _onlineRecognizerResult; + internal SherpaOnnxOnlineRecognizer _onlineRecognizer; + internal bool _disposed = false; + } + public class OnlineStream : OnlineBase + { + internal OnlineStream(SherpaOnnxOnlineStream onlineStream) + { + this._onlineStream = onlineStream; + } + protected override void Dispose(bool disposing) + { + if (!disposing) + { + SherpaOnnxSharp.DestroyOnlineStream(_onlineStream); + _onlineStream.impl = IntPtr.Zero; + this._disposed = true; + base.Dispose(); + } + } + } + public class OnlineRecognizerResult : OnlineBase + { + internal OnlineRecognizerResult(IntPtr onlineRecognizerResult) + { + this._onlineRecognizerResult = onlineRecognizerResult; + } + protected override void Dispose(bool disposing) + { + if (!disposing) + { + SherpaOnnxSharp.DestroyOnlineRecognizerResult(_onlineRecognizerResult); + _onlineRecognizerResult = IntPtr.Zero; + this._disposed = true; + base.Dispose(disposing); + } + } + } + public class OnlineRecognizer : OnlineBase + where T : class, new() + { + + public OnlineRecognizer(T t, + string tokensFilePath, string decoding_method = "greedy_search", + int sample_rate = 16000, int feature_dim = 80, + int num_threads = 2, bool debug = false, int max_active_paths = 4, + int enable_endpoint=0,int rule1_min_trailing_silence=0, + int rule2_min_trailing_silence=0,int rule3_min_utterance_length=0) + { + SherpaOnnxOnlineTransducer transducer = new SherpaOnnxOnlineTransducer(); + SherpaOnnxOnlineModelConfig model_config = new SherpaOnnxOnlineModelConfig(); + if (t is not null && t.GetType() == typeof(OnlineTransducer)) + { + OnlineTransducer? onlineTransducer = t as OnlineTransducer; +#pragma warning disable CS8602 // 解引用可能出现空引用。 + Trace.Assert(File.Exists(onlineTransducer.DecoderFilename) + && File.Exists(onlineTransducer.EncoderFilename) + && File.Exists(onlineTransducer.JoinerFilename), "Please provide a model"); +#pragma warning restore CS8602 // 解引用可能出现空引用。 + Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens"); + Trace.Assert(num_threads > 0, "num_threads must be greater than 0"); + transducer.encoder_filename = onlineTransducer.EncoderFilename; + transducer.decoder_filename = onlineTransducer.DecoderFilename; + transducer.joiner_filename = onlineTransducer.JoinerFilename; + } + + model_config.transducer = transducer; + model_config.num_threads = num_threads; + model_config.debug = debug; + model_config.tokens = tokensFilePath; + + SherpaOnnxFeatureConfig feat_config = new SherpaOnnxFeatureConfig(); + feat_config.sample_rate = sample_rate; + feat_config.feature_dim = feature_dim; + + SherpaOnnxOnlineRecognizerConfig sherpaOnnxOnlineRecognizerConfig; + sherpaOnnxOnlineRecognizerConfig.decoding_method = decoding_method; + sherpaOnnxOnlineRecognizerConfig.feat_config = feat_config; + sherpaOnnxOnlineRecognizerConfig.model_config = model_config; + sherpaOnnxOnlineRecognizerConfig.max_active_paths = max_active_paths; + //endpoint + sherpaOnnxOnlineRecognizerConfig.enable_endpoint = enable_endpoint; + sherpaOnnxOnlineRecognizerConfig.rule1_min_trailing_silence = rule1_min_trailing_silence; + sherpaOnnxOnlineRecognizerConfig.rule2_min_trailing_silence = rule2_min_trailing_silence; + sherpaOnnxOnlineRecognizerConfig.rule3_min_utterance_length = rule3_min_utterance_length; + + _onlineRecognizer = + SherpaOnnxSharp.CreateOnlineRecognizer(sherpaOnnxOnlineRecognizerConfig); + } + internal OnlineStream CreateOnlineStream() + { + SherpaOnnxOnlineStream stream = SherpaOnnxSharp.CreateOnlineStream(_onlineRecognizer); + return new OnlineStream(stream); + } + public void InputFinished(OnlineStream stream) + { + SherpaOnnxSharp.InputFinished(stream._onlineStream); + } + public List CreateStreams(List samplesList) + { + int batch_size = samplesList.Count; + List streams = new List(); + for (int i = 0; i < batch_size; i++) + { + OnlineStream stream = CreateOnlineStream(); + AcceptWaveform(stream._onlineStream, 16000, samplesList[i]); + InputFinished(stream); + streams.Add(stream); + } + return streams; + } + public OnlineStream CreateStream() + { + OnlineStream stream = CreateOnlineStream(); + return stream; + } + internal void AcceptWaveform(SherpaOnnxOnlineStream stream, int sample_rate, float[] samples) + { + SherpaOnnxSharp.AcceptOnlineWaveform(stream, sample_rate, samples, samples.Length); + } + public void AcceptWaveForm(OnlineStream stream, int sample_rate, float[] samples) + { + AcceptWaveform(stream._onlineStream, sample_rate, samples); + } + internal IntPtr GetStreamsIntPtr(OnlineStream[] streams) + { + int streams_len = streams.Length; + int size = Marshal.SizeOf(typeof(SherpaOnnxOnlineStream)); + IntPtr streamsIntPtr = Marshal.AllocHGlobal(size * streams_len); + unsafe + { + byte* ptrbds = (byte*)(streamsIntPtr.ToPointer()); + for (int i = 0; i < streams_len; i++, ptrbds += (size)) + { + IntPtr streamIntptr = new IntPtr(ptrbds); + Marshal.StructureToPtr(streams[i]._onlineStream, streamIntptr, false); + } + + } + return streamsIntPtr; + } + internal bool IsReady(OnlineStream stream) + { + return SherpaOnnxSharp.IsOnlineStreamReady(_onlineRecognizer, stream._onlineStream) != 0; + } + public void DecodeMultipleStreams(List streams) + { + while (true) + { + List streamList = new List(); + foreach (OnlineStream stream in streams) + { + if (IsReady(stream)) + { + streamList.Add(stream); + } + } + if (streamList.Count == 0) + { + break; + } + OnlineStream[] streamsBatch = new OnlineStream[streamList.Count]; + for (int i = 0; i < streamsBatch.Length; i++) + { + streamsBatch[i] = streamList[i]; + } + streamList.Clear(); + IntPtr streamsIntPtr = GetStreamsIntPtr(streamsBatch); + SherpaOnnxSharp.DecodeMultipleOnlineStreams(_onlineRecognizer, streamsIntPtr, streamsBatch.Length); + Marshal.FreeHGlobal(streamsIntPtr); + } + } + public void DecodeStream(OnlineStream stream) + { + while (IsReady(stream)) + { + SherpaOnnxSharp.DecodeOnlineStream(_onlineRecognizer, stream._onlineStream); + } + } + internal OnlineRecognizerResultEntity GetResult(SherpaOnnxOnlineStream stream) + { + IntPtr result_ip = SherpaOnnxSharp.GetOnlineStreamResult(_onlineRecognizer, stream); + OnlineRecognizerResult onlineRecognizerResult = new OnlineRecognizerResult(result_ip); +#pragma warning disable CS8605 // 取消装箱可能为 null 的值。 + SherpaOnnxOnlineRecognizerResult result = + (SherpaOnnxOnlineRecognizerResult)Marshal.PtrToStructure( + onlineRecognizerResult._onlineRecognizerResult, typeof(SherpaOnnxOnlineRecognizerResult)); +#pragma warning restore CS8605 // 取消装箱可能为 null 的值。 + +#pragma warning disable CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。 + string text = Marshal.PtrToStringAnsi(result.text); +#pragma warning restore CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。 + OnlineRecognizerResultEntity onlineRecognizerResultEntity = + new OnlineRecognizerResultEntity(); + onlineRecognizerResultEntity.text = text; + onlineRecognizerResultEntity.text_len = result.text_len; + + return onlineRecognizerResultEntity; + } + public OnlineRecognizerResultEntity GetResult(OnlineStream stream) + { + OnlineRecognizerResultEntity result = GetResult(stream._onlineStream); + return result; + } + public List GetResults(List streams) + { + List results = new List(); + foreach (OnlineStream stream in streams) + { + OnlineRecognizerResultEntity onlineRecognizerResultEntity = GetResult(stream._onlineStream); + results.Add(onlineRecognizerResultEntity); + } + return results; + } + protected override void Dispose(bool disposing) + { + if (!disposing) + { + SherpaOnnxSharp.DestroyOnlineRecognizer(_onlineRecognizer); + _onlineRecognizer.impl = IntPtr.Zero; + this._disposed = true; + base.Dispose(); + } + } + } + public class OfflineBase : IDisposable + { + public void Dispose() + { + Dispose(disposing: true); + GC.SuppressFinalize(this); + } + protected virtual void Dispose(bool disposing) + { + if (!disposing) + { + if (_offlineRecognizerResult != IntPtr.Zero) + { + SherpaOnnxSharp.DestroyOfflineRecognizerResult(_offlineRecognizerResult); + _offlineRecognizerResult = IntPtr.Zero; + } + if (_offlineStream.impl != IntPtr.Zero) + { + SherpaOnnxSharp.DestroyOfflineStream(_offlineStream); + _offlineStream.impl = IntPtr.Zero; + } + if (_offlineRecognizer.impl != IntPtr.Zero) + { + SherpaOnnxSharp.DestroyOfflineRecognizer(_offlineRecognizer); + _offlineRecognizer.impl = IntPtr.Zero; + } + this._disposed = true; + } + } + ~OfflineBase() + { + Dispose(this._disposed); + } + internal SherpaOnnxOfflineStream _offlineStream; + internal IntPtr _offlineRecognizerResult; + internal SherpaOnnxOfflineRecognizer _offlineRecognizer; + internal bool _disposed = false; + } + public class OfflineStream : OfflineBase + { + internal OfflineStream(SherpaOnnxOfflineStream offlineStream) + { + this._offlineStream = offlineStream; + } + + protected override void Dispose(bool disposing) + { + if (!disposing) + { + SherpaOnnxSharp.DestroyOfflineStream(_offlineStream); + _offlineStream.impl = IntPtr.Zero; + this._disposed = true; + base.Dispose(); + } + } + } + public class OfflineRecognizerResult : OfflineBase + { + internal OfflineRecognizerResult(IntPtr offlineRecognizerResult) + { + this._offlineRecognizerResult = offlineRecognizerResult; + } + protected override void Dispose(bool disposing) + { + if (!disposing) + { + SherpaOnnxSharp.DestroyOfflineRecognizerResult(_offlineRecognizerResult); + _offlineRecognizerResult = IntPtr.Zero; + this._disposed = true; + base.Dispose(disposing); + } + } + } + public class OfflineRecognizer : OfflineBase + where T : class, new() + { + public OfflineRecognizer(T t, + string tokensFilePath, string decoding_method = "greedy_search", + int sample_rate = 16000, int feature_dim = 80, + int num_threads = 2, bool debug = false) + { + SherpaOnnxOfflineTransducer transducer = new SherpaOnnxOfflineTransducer(); + SherpaOnnxOfflineParaformer paraformer = new SherpaOnnxOfflineParaformer(); + SherpaOnnxOfflineNemoEncDecCtc nemo_ctc = new SherpaOnnxOfflineNemoEncDecCtc(); + SherpaOnnxOfflineModelConfig model_config = new SherpaOnnxOfflineModelConfig(); + if (t is not null && t.GetType() == typeof(OfflineTransducer)) + { + OfflineTransducer? offlineTransducer = t as OfflineTransducer; +#pragma warning disable CS8602 // 解引用可能出现空引用。 + Trace.Assert(File.Exists(offlineTransducer.DecoderFilename) + && File.Exists(offlineTransducer.EncoderFilename) + && File.Exists(offlineTransducer.JoinerFilename), "Please provide a model"); +#pragma warning restore CS8602 // 解引用可能出现空引用。 + Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens"); + Trace.Assert(num_threads > 0, "num_threads must be greater than 0"); + transducer.encoder_filename = offlineTransducer.EncoderFilename; + transducer.decoder_filename = offlineTransducer.DecoderFilename; + transducer.joiner_filename = offlineTransducer.JoinerFilename; + } + else if (t is not null && t.GetType() == typeof(OfflineParaformer)) + { + OfflineParaformer? offlineParaformer = t as OfflineParaformer; +#pragma warning disable CS8602 // 解引用可能出现空引用。 + Trace.Assert(File.Exists(offlineParaformer.Model), "Please provide a model"); +#pragma warning restore CS8602 // 解引用可能出现空引用。 + Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens"); + Trace.Assert(num_threads > 0, "num_threads must be greater than 0"); + paraformer.model = offlineParaformer.Model; + } + else if (t is not null && t.GetType() == typeof(OfflineNemoEncDecCtc)) + { + OfflineNemoEncDecCtc? offlineNemoEncDecCtc = t as OfflineNemoEncDecCtc; +#pragma warning disable CS8602 // 解引用可能出现空引用。 + Trace.Assert(File.Exists(offlineNemoEncDecCtc.Model), "Please provide a model"); +#pragma warning restore CS8602 // 解引用可能出现空引用。 + Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens"); + Trace.Assert(num_threads > 0, "num_threads must be greater than 0"); + nemo_ctc.model = offlineNemoEncDecCtc.Model; + } + + model_config.transducer = transducer; + model_config.paraformer = paraformer; + model_config.nemo_ctc = nemo_ctc; + model_config.num_threads = num_threads; + model_config.debug = debug; + model_config.tokens = tokensFilePath; + + SherpaOnnxFeatureConfig feat_config = new SherpaOnnxFeatureConfig(); + feat_config.sample_rate = sample_rate; + feat_config.feature_dim = feature_dim; + + SherpaOnnxOfflineRecognizerConfig sherpaOnnxOfflineRecognizerConfig; + sherpaOnnxOfflineRecognizerConfig.decoding_method = decoding_method; + sherpaOnnxOfflineRecognizerConfig.feat_config = feat_config; + sherpaOnnxOfflineRecognizerConfig.model_config = model_config; + + _offlineRecognizer = + SherpaOnnxSharp.CreateOfflineRecognizer(sherpaOnnxOfflineRecognizerConfig); + } + internal OfflineStream CreateOfflineStream() + { + SherpaOnnxOfflineStream stream = SherpaOnnxSharp.CreateOfflineStream(_offlineRecognizer); + return new OfflineStream(stream); + } + public OfflineStream[] CreateOfflineStream(List samplesList) + { + int batch_size = samplesList.Count; + OfflineStream[] streams = new OfflineStream[batch_size]; + List wavFiles = new List(); + for (int i = 0; i < batch_size; i++) + { + OfflineStream stream = CreateOfflineStream(); + AcceptWaveform(stream._offlineStream, 16000, samplesList[i]); + streams[i] = stream; + } + return streams; + } + internal void AcceptWaveform(SherpaOnnxOfflineStream stream, int sample_rate, float[] samples) + { + SherpaOnnxSharp.AcceptWaveform(stream, sample_rate, samples, samples.Length); + } + internal IntPtr GetStreamsIntPtr(OfflineStream[] streams) + { + int streams_len = streams.Length; + int size = Marshal.SizeOf(typeof(SherpaOnnxOfflineStream)); + IntPtr streamsIntPtr = Marshal.AllocHGlobal(size * streams_len); + unsafe + { + byte* ptrbds = (byte*)(streamsIntPtr.ToPointer()); + for (int i = 0; i < streams_len; i++, ptrbds += (size)) + { + IntPtr streamIntptr = new IntPtr(ptrbds); + Marshal.StructureToPtr(streams[i]._offlineStream, streamIntptr, false); + } + } + return streamsIntPtr; + } + public void DecodeMultipleOfflineStreams(OfflineStream[] streams) + { + IntPtr streamsIntPtr = GetStreamsIntPtr(streams); + SherpaOnnxSharp.DecodeMultipleOfflineStreams(_offlineRecognizer, streamsIntPtr, streams.Length); + Marshal.FreeHGlobal(streamsIntPtr); + } + internal OfflineRecognizerResultEntity GetResult(SherpaOnnxOfflineStream stream) + { + IntPtr result_ip = SherpaOnnxSharp.GetOfflineStreamResult(stream); + OfflineRecognizerResult offlineRecognizerResult = new OfflineRecognizerResult(result_ip); +#pragma warning disable CS8605 // 取消装箱可能为 null 的值。 + SherpaOnnxOfflineRecognizerResult result = + (SherpaOnnxOfflineRecognizerResult)Marshal.PtrToStructure( + offlineRecognizerResult._offlineRecognizerResult, typeof(SherpaOnnxOfflineRecognizerResult)); +#pragma warning restore CS8605 // 取消装箱可能为 null 的值。 + +#pragma warning disable CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。 + string text = Marshal.PtrToStringAnsi(result.text); +#pragma warning restore CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。 + OfflineRecognizerResultEntity offlineRecognizerResultEntity = + new OfflineRecognizerResultEntity(); + offlineRecognizerResultEntity.text = text; + offlineRecognizerResultEntity.text_len = result.text_len; + + return offlineRecognizerResultEntity; + } + public List GetResults(OfflineStream[] streams) + { + List results = new List(); + foreach (OfflineStream stream in streams) + { + OfflineRecognizerResultEntity offlineRecognizerResultEntity = GetResult(stream._offlineStream); + results.Add(offlineRecognizerResultEntity); + } + return results; + } + protected override void Dispose(bool disposing) + { + if (!disposing) + { + SherpaOnnxSharp.DestroyOfflineRecognizer(_offlineRecognizer); + _offlineRecognizer.impl = IntPtr.Zero; + this._disposed = true; + base.Dispose(); + } + } + } + internal static partial class SherpaOnnxSharp + { + private const string dllName = @"SherpaOnnxSharp"; + + [DllImport(dllName, EntryPoint = "CreateOfflineRecognizer", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] + internal static extern SherpaOnnxOfflineRecognizer CreateOfflineRecognizer(SherpaOnnxOfflineRecognizerConfig config); + + [DllImport(dllName, EntryPoint = "CreateOfflineStream", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] + internal static extern SherpaOnnxOfflineStream CreateOfflineStream(SherpaOnnxOfflineRecognizer offlineRecognizer); + + [DllImport(dllName, EntryPoint = "AcceptWaveform", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] + internal static extern void AcceptWaveform(SherpaOnnxOfflineStream stream, int sample_rate, float[] samples, int samples_size); + + [DllImport(dllName, EntryPoint = "DecodeOfflineStream", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] + internal static extern void DecodeOfflineStream(SherpaOnnxOfflineRecognizer recognizer, SherpaOnnxOfflineStream stream); + + [DllImport(dllName, EntryPoint = "DecodeMultipleOfflineStreams", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] + internal static extern void DecodeMultipleOfflineStreams(SherpaOnnxOfflineRecognizer recognizer, IntPtr + streams, int n); + + [DllImport(dllName, EntryPoint = "GetOfflineStreamResult", CallingConvention = CallingConvention.Cdecl)] + internal static extern IntPtr GetOfflineStreamResult(SherpaOnnxOfflineStream stream); + + [DllImport(dllName, EntryPoint = "DestroyOfflineRecognizerResult", CallingConvention = CallingConvention.Cdecl)] + internal static extern void DestroyOfflineRecognizerResult(IntPtr result); + + [DllImport(dllName, EntryPoint = "DestroyOfflineStream", CallingConvention = CallingConvention.Cdecl)] + internal static extern void DestroyOfflineStream(SherpaOnnxOfflineStream stream); + + [DllImport(dllName, EntryPoint = "DestroyOfflineRecognizer", CallingConvention = CallingConvention.Cdecl)] + internal static extern void DestroyOfflineRecognizer(SherpaOnnxOfflineRecognizer offlineRecognizer); + + [DllImport(dllName, EntryPoint = "CreateOnlineRecognizer", CallingConvention = CallingConvention.Cdecl)] + internal static extern SherpaOnnxOnlineRecognizer CreateOnlineRecognizer(SherpaOnnxOnlineRecognizerConfig config); + + /// Free a pointer returned by CreateOnlineRecognizer() + /// + /// @param p A pointer returned by CreateOnlineRecognizer() + [DllImport(dllName, EntryPoint = "DestroyOnlineRecognizer", CallingConvention = CallingConvention.Cdecl)] + internal static extern void DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer recognizer); + + /// Create an online stream for accepting wave samples. + /// + /// @param recognizer A pointer returned by CreateOnlineRecognizer() + /// @return Return a pointer to an OnlineStream. The user has to invoke + /// DestroyOnlineStream() to free it to avoid memory leak. + [DllImport(dllName, EntryPoint = "CreateOnlineStream", CallingConvention = CallingConvention.Cdecl)] + internal static extern SherpaOnnxOnlineStream CreateOnlineStream( + SherpaOnnxOnlineRecognizer recognizer); + + /// Destroy an online stream. + /// + /// @param stream A pointer returned by CreateOnlineStream() + [DllImport(dllName, EntryPoint = "DestroyOnlineStream", CallingConvention = CallingConvention.Cdecl)] + internal static extern void DestroyOnlineStream(SherpaOnnxOnlineStream stream); + + /// Accept input audio samples and compute the features. + /// The user has to invoke DecodeOnlineStream() to run the neural network and + /// decoding. + /// + /// @param stream A pointer returned by CreateOnlineStream(). + /// @param sample_rate Sample rate of the input samples. If it is different + /// from config.feat_config.sample_rate, we will do + /// resampling inside sherpa-onnx. + /// @param samples A pointer to a 1-D array containing audio samples. + /// The range of samples has to be normalized to [-1, 1]. + /// @param n Number of elements in the samples array. + [DllImport(dllName, EntryPoint = "AcceptOnlineWaveform", CallingConvention = CallingConvention.Cdecl)] + internal static extern void AcceptOnlineWaveform(SherpaOnnxOnlineStream stream, int sample_rate, + float[] samples, int n); + + /// Return 1 if there are enough number of feature frames for decoding. + /// Return 0 otherwise. + /// + /// @param recognizer A pointer returned by CreateOnlineRecognizer + /// @param stream A pointer returned by CreateOnlineStream + [DllImport(dllName, EntryPoint = "IsOnlineStreamReady", CallingConvention = CallingConvention.Cdecl)] + internal static extern int IsOnlineStreamReady(SherpaOnnxOnlineRecognizer recognizer, + SherpaOnnxOnlineStream stream); + + /// Call this function to run the neural network model and decoding. + // + /// Precondition for this function: IsOnlineStreamReady() MUST return 1. + /// + /// Usage example: + /// + /// while (IsOnlineStreamReady(recognizer, stream)) { + /// DecodeOnlineStream(recognizer, stream); + /// } + /// + [DllImport(dllName, EntryPoint = "DecodeOnlineStream", CallingConvention = CallingConvention.Cdecl)] + internal static extern void DecodeOnlineStream(SherpaOnnxOnlineRecognizer recognizer, + SherpaOnnxOnlineStream stream); + + /// This function is similar to DecodeOnlineStream(). It decodes multiple + /// OnlineStream in parallel. + /// + /// Caution: The caller has to ensure each OnlineStream is ready, i.e., + /// IsOnlineStreamReady() for that stream should return 1. + /// + /// @param recognizer A pointer returned by CreateOnlineRecognizer() + /// @param streams A pointer array containing pointers returned by + /// CreateOnlineRecognizer() + /// @param n Number of elements in the given streams array. + [DllImport(dllName, EntryPoint = "DecodeMultipleOnlineStreams", CallingConvention = CallingConvention.Cdecl)] + internal static extern void DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer recognizer, + IntPtr streams, int n); + + /// Get the decoding results so far for an OnlineStream. + /// + /// @param recognizer A pointer returned by CreateOnlineRecognizer(). + /// @param stream A pointer returned by CreateOnlineStream(). + /// @return A pointer containing the result. The user has to invoke + /// DestroyOnlineRecognizerResult() to free the returned pointer to + /// avoid memory leak. + [DllImport(dllName, EntryPoint = "GetOnlineStreamResult", CallingConvention = CallingConvention.Cdecl)] + internal static extern IntPtr GetOnlineStreamResult( + SherpaOnnxOnlineRecognizer recognizer, SherpaOnnxOnlineStream stream); + + /// Destroy the pointer returned by GetOnlineStreamResult(). + /// + /// @param r A pointer returned by GetOnlineStreamResult() + [DllImport(dllName, EntryPoint = "DestroyOnlineRecognizerResult", CallingConvention = CallingConvention.Cdecl)] + internal static extern void DestroyOnlineRecognizerResult(IntPtr result); + + /// Reset an OnlineStream , which clears the neural network model state + /// and the state for decoding. + /// + /// @param recognizer A pointer returned by CreateOnlineRecognizer(). + /// @param stream A pointer returned by CreateOnlineStream + [DllImport(dllName, EntryPoint = "Reset", CallingConvention = CallingConvention.Cdecl)] + internal static extern void Reset(SherpaOnnxOnlineRecognizer recognizer, + SherpaOnnxOnlineStream stream); + + /// Signal that no more audio samples would be available. + /// After this call, you cannot call AcceptWaveform() any more. + /// + /// @param stream A pointer returned by CreateOnlineStream() + [DllImport(dllName, EntryPoint = "InputFinished", CallingConvention = CallingConvention.Cdecl)] + internal static extern void InputFinished(SherpaOnnxOnlineStream stream); + + /// Return 1 if an endpoint has been detected. + /// + /// @param recognizer A pointer returned by CreateOnlineRecognizer() + /// @param stream A pointer returned by CreateOnlineStream() + /// @return Return 1 if an endpoint is detected. Return 0 otherwise. + [DllImport(dllName, EntryPoint = "IsEndpoint", CallingConvention = CallingConvention.Cdecl)] + internal static extern int IsEndpoint(SherpaOnnxOnlineRecognizer recognizer, + SherpaOnnxOnlineStream stream); + } + internal struct SherpaOnnxOfflineTransducer + { + public string encoder_filename; + public string decoder_filename; + public string joiner_filename; + public SherpaOnnxOfflineTransducer() + { + encoder_filename = ""; + decoder_filename = ""; + joiner_filename = ""; + } + }; + internal struct SherpaOnnxOfflineParaformer + { + public string model; + public SherpaOnnxOfflineParaformer() + { + model = ""; + } + }; + internal struct SherpaOnnxOfflineNemoEncDecCtc + { + public string model; + public SherpaOnnxOfflineNemoEncDecCtc() + { + model = ""; + } + }; + internal struct SherpaOnnxOfflineModelConfig + { + public SherpaOnnxOfflineTransducer transducer; + public SherpaOnnxOfflineParaformer paraformer; + public SherpaOnnxOfflineNemoEncDecCtc nemo_ctc; + public string tokens; + public int num_threads; + public bool debug; + }; + /// It expects 16 kHz 16-bit single channel wave format. + internal struct SherpaOnnxFeatureConfig + { + /// Sample rate of the input data. MUST match the one expected + /// by the model. For instance, it should be 16000 for models provided + /// by us. + public int sample_rate; + + /// Feature dimension of the model. + /// For instance, it should be 80 for models provided by us. + public int feature_dim; + }; + internal struct SherpaOnnxOfflineRecognizerConfig + { + public SherpaOnnxFeatureConfig feat_config; + public SherpaOnnxOfflineModelConfig model_config; + + /// Possible values are: greedy_search, modified_beam_search + public string decoding_method; + + }; + internal struct SherpaOnnxOfflineRecognizer + { + public IntPtr impl; + }; + [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi, Pack = 1)] + internal struct SherpaOnnxOfflineStream + { + public IntPtr impl; + }; + internal struct SherpaOnnxOfflineRecognizerResult + { + public IntPtr text; + public int text_len; + } + internal struct SherpaOnnxOnlineTransducer + { + public string encoder_filename; + public string decoder_filename; + public string joiner_filename; + public SherpaOnnxOnlineTransducer() + { + encoder_filename = string.Empty; + decoder_filename = string.Empty; + joiner_filename = string.Empty; + } + }; + internal struct SherpaOnnxOnlineModelConfig + { + public SherpaOnnxOnlineTransducer transducer; + public string tokens; + public int num_threads; + public bool debug; // true to print debug information of the model + }; + internal struct SherpaOnnxOnlineRecognizerConfig + { + public SherpaOnnxFeatureConfig feat_config; + public SherpaOnnxOnlineModelConfig model_config; + + /// Possible values are: greedy_search, modified_beam_search + public string decoding_method; + + /// Used only when decoding_method is modified_beam_search + /// Example value: 4 + public int max_active_paths; + + /// 0 to disable endpoint detection. + /// A non-zero value to enable endpoint detection. + public int enable_endpoint; + + /// An endpoint is detected if trailing silence in seconds is larger than + /// this value even if nothing has been decoded. + /// Used only when enable_endpoint is not 0. + public float rule1_min_trailing_silence; + + /// An endpoint is detected if trailing silence in seconds is larger than + /// this value after something that is not blank has been decoded. + /// Used only when enable_endpoint is not 0. + public float rule2_min_trailing_silence; + + /// An endpoint is detected if the utterance in seconds is larger than + /// this value. + /// Used only when enable_endpoint is not 0. + public float rule3_min_utterance_length; + }; + internal struct SherpaOnnxOnlineRecognizerResult + { + public IntPtr text; + public int text_len; + // TODO: Add more fields + } + internal struct SherpaOnnxOnlineRecognizer + { + public IntPtr impl; + }; + [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi, Pack = 1)] + internal struct SherpaOnnxOnlineStream + { + public IntPtr impl; + }; + public class OfflineNemoEncDecCtc + { + private string model = string.Empty; + public string Model { get => model; set => model = value; } + } + public class OfflineParaformer + { + private string model = string.Empty; + public string Model { get => model; set => model = value; } + } + public class OfflineRecognizerResultEntity + { + /// + /// recognizer result + /// + public string? text { get; set; } + /// + /// recognizer result length + /// + public int text_len { get; set; } + /// + /// decode tokens + /// + public List? tokens { get; set; } + /// + /// timestamps + /// + public List? timestamps { get; set; } + } + public class OfflineTransducer + { + private string encoderFilename = string.Empty; + private string decoderFilename = string.Empty; + private string joinerFilename = string.Empty; + public string EncoderFilename { get => encoderFilename; set => encoderFilename = value; } + public string DecoderFilename { get => decoderFilename; set => decoderFilename = value; } + public string JoinerFilename { get => joinerFilename; set => joinerFilename = value; } + } + public class OnlineEndpoint + { + /// 0 to disable endpoint detection. + /// A non-zero value to enable endpoint detection. + private int enableEndpoint; + + /// An endpoint is detected if trailing silence in seconds is larger than + /// this value even if nothing has been decoded. + /// Used only when enable_endpoint is not 0. + private float rule1MinTrailingSilence; + + /// An endpoint is detected if trailing silence in seconds is larger than + /// this value after something that is not blank has been decoded. + /// Used only when enable_endpoint is not 0. + private float rule2MinTrailingSilence; + + /// An endpoint is detected if the utterance in seconds is larger than + /// this value. + /// Used only when enable_endpoint is not 0. + private float rule3MinUtteranceLength; + + public int EnableEndpoint { get => enableEndpoint; set => enableEndpoint = value; } + public float Rule1MinTrailingSilence { get => rule1MinTrailingSilence; set => rule1MinTrailingSilence = value; } + public float Rule2MinTrailingSilence { get => rule2MinTrailingSilence; set => rule2MinTrailingSilence = value; } + public float Rule3MinUtteranceLength { get => rule3MinUtteranceLength; set => rule3MinUtteranceLength = value; } + } + public class OnlineRecognizerResultEntity + { + /// + /// recognizer result + /// + public string? text { get; set; } + /// + /// recognizer result length + /// + public int text_len { get; set; } + /// + /// decode tokens + /// + public List? tokens { get; set; } + /// + /// timestamps + /// + public List? timestamps { get; set; } + } + public class OnlineTransducer + { + private string encoderFilename = string.Empty; + private string decoderFilename = string.Empty; + private string joinerFilename = string.Empty; + public string EncoderFilename { get => encoderFilename; set => encoderFilename = value; } + public string DecoderFilename { get => decoderFilename; set => decoderFilename = value; } + public string JoinerFilename { get => joinerFilename; set => joinerFilename = value; } + } +} \ No newline at end of file diff --git a/sherpa-onnx/csharp-api/SherpaOnnx.csproj b/sherpa-onnx/csharp-api/SherpaOnnx.csproj new file mode 100644 index 00000000..3f2790a2 --- /dev/null +++ b/sherpa-onnx/csharp-api/SherpaOnnx.csproj @@ -0,0 +1,10 @@ + + + + net6.0 + enable + enable + true + + + diff --git a/sherpa-onnx/csharp-api/offline-api.cpp b/sherpa-onnx/csharp-api/offline-api.cpp new file mode 100644 index 00000000..6908a2a8 --- /dev/null +++ b/sherpa-onnx/csharp-api/offline-api.cpp @@ -0,0 +1,136 @@ +// sherpa-onnx/sharp-api/offline-api.cpp +// +// Copyright (c) 2023 Manyeyes Corporation + +#include "offline-api.h" + +#include "sherpa-onnx/csrc/display.h" +#include "sherpa-onnx/csrc/offline-recognizer.h" + +namespace sherpa_onnx +{ + struct SherpaOnnxOfflineRecognizer { + sherpa_onnx::OfflineRecognizer* impl; + }; + + struct SherpaOnnxOfflineStream { + std::unique_ptr impl; + explicit SherpaOnnxOfflineStream(std::unique_ptr p) + : impl(std::move(p)) {} + }; + + struct SherpaOnnxDisplay { + std::unique_ptr impl; + }; + + SherpaOnnxOfflineRecognizer* __stdcall CreateOfflineRecognizer( + const SherpaOnnxOfflineRecognizerConfig* config) { + sherpa_onnx::OfflineRecognizerConfig recognizer_config; + + recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate; + recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim; + + if (strlen(config->model_config.transducer.encoder_filename) > 0) { + recognizer_config.model_config.transducer.encoder_filename = + config->model_config.transducer.encoder_filename; + recognizer_config.model_config.transducer.decoder_filename = + config->model_config.transducer.decoder_filename; + recognizer_config.model_config.transducer.joiner_filename = + config->model_config.transducer.joiner_filename; + } + else if (strlen(config->model_config.paraformer.model) > 0) { + recognizer_config.model_config.paraformer.model = + config->model_config.paraformer.model; + } + else if (strlen(config->model_config.nemo_ctc.model) > 0) { + recognizer_config.model_config.nemo_ctc.model = + config->model_config.nemo_ctc.model; + } + + recognizer_config.model_config.tokens = + config->model_config.tokens; + recognizer_config.model_config.num_threads = + config->model_config.num_threads; + recognizer_config.model_config.debug = + config->model_config.debug; + + recognizer_config.decoding_method = config->decoding_method; + + SherpaOnnxOfflineRecognizer* recognizer = + new SherpaOnnxOfflineRecognizer; + recognizer->impl = + new sherpa_onnx::OfflineRecognizer(recognizer_config); + + return recognizer; + } + + SherpaOnnxOfflineStream* __stdcall CreateOfflineStream( + SherpaOnnxOfflineRecognizer* recognizer) { + SherpaOnnxOfflineStream* stream = + new SherpaOnnxOfflineStream(recognizer->impl->CreateStream()); + return stream; + } + + void __stdcall AcceptWaveform( + SherpaOnnxOfflineStream* stream, + int32_t sample_rate, + const float* samples, int32_t samples_size) { + std::vector waveform{ samples, samples + samples_size }; + stream->impl->AcceptWaveform(sample_rate, waveform.data(), waveform.size()); + } + + void __stdcall DecodeOfflineStream( + SherpaOnnxOfflineRecognizer* recognizer, + SherpaOnnxOfflineStream* stream) { + recognizer->impl->DecodeStream(stream->impl.get()); + } + + void __stdcall DecodeMultipleOfflineStreams( + SherpaOnnxOfflineRecognizer* recognizer, + SherpaOnnxOfflineStream** streams, int32_t n) { + std::vector ss(n); + for (int32_t i = 0; i != n; ++i) { + ss[i] = streams[i]->impl.get(); + } + recognizer->impl->DecodeStreams(ss.data(), n); + } + + SherpaOnnxOfflineRecognizerResult* __stdcall GetOfflineStreamResult( + SherpaOnnxOfflineStream* stream) { + sherpa_onnx::OfflineRecognitionResult result = + stream->impl->GetResult(); + const auto& text = result.text; + auto r = new SherpaOnnxOfflineRecognizerResult; + r->text = new char[text.size() + 1]; + std::copy(text.begin(), text.end(), const_cast(r->text)); + const_cast(r->text)[text.size()] = 0; + r->text_len = text.size(); + return r; + } + + + /// Free a pointer returned by CreateOfflineRecognizer() + /// + /// @param p A pointer returned by CreateOfflineRecognizer() + void __stdcall DestroyOfflineRecognizer( + SherpaOnnxOfflineRecognizer* recognizer) { + delete recognizer->impl; + delete recognizer; + } + + /// Destory an offline stream. + /// + /// @param stream A pointer returned by CreateOfflineStream() + void __stdcall DestroyOfflineStream(SherpaOnnxOfflineStream* stream) { + delete stream; + } + + /// Destroy the pointer returned by GetOfflineStreamResult(). + /// + /// @param r A pointer returned by GetOfflineStreamResult() + void __stdcall DestroyOfflineRecognizerResult( + SherpaOnnxOfflineRecognizerResult* r) { + delete r->text; + delete r; + } +}// namespace sherpa_onnx \ No newline at end of file diff --git a/sherpa-onnx/csharp-api/offline-api.h b/sherpa-onnx/csharp-api/offline-api.h new file mode 100644 index 00000000..03c6011d --- /dev/null +++ b/sherpa-onnx/csharp-api/offline-api.h @@ -0,0 +1,122 @@ +// sherpa-onnx/sharp-api/offline-api.h +// +// Copyright (c) 2023 Manyeyes Corporation + +#pragma once + +#include + +namespace sherpa_onnx +{ + /// Please refer to + /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html + /// to download pre-trained models. That is, you can find encoder-xxx.onnx + /// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct + /// from there. + typedef struct SherpaOnnxOfflineTransducer { + const char* encoder_filename; + const char* decoder_filename; + const char* joiner_filename; + } SherpaOnnxOfflineTransducer; + + typedef struct SherpaOnnxOfflineParaformer { + const char* model; + }SherpaOnnxOfflineParaformer; + + typedef struct SherpaOnnxOfflineNemoEncDecCtc { + const char* model; + }SherpaOnnxOfflineNemoEncDecCtc; + + + typedef struct SherpaOnnxOfflineModelConfig { + SherpaOnnxOfflineTransducer transducer; + SherpaOnnxOfflineParaformer paraformer; + SherpaOnnxOfflineNemoEncDecCtc nemo_ctc; + const char* tokens; + const int32_t num_threads; + const bool debug; + } SherpaOnnxOfflineModelConfig; + + /// It expects 16 kHz 16-bit single channel wave format. + typedef struct SherpaOnnxFeatureConfig { + /// Sample rate of the input data. MUST match the one expected + /// by the model. For instance, it should be 16000 for models provided + /// by us. + int32_t sample_rate; + + /// Feature dimension of the model. + /// For instance, it should be 80 for models provided by us. + int32_t feature_dim; + } SherpaOnnxFeatureConfig; + + typedef struct SherpaOnnxOfflineRecognizerConfig { + SherpaOnnxFeatureConfig feat_config; + SherpaOnnxOfflineModelConfig model_config; + + /// Possible values are: greedy_search, modified_beam_search + const char* decoding_method; + + } SherpaOnnxOfflineRecognizerConfig; + + typedef struct SherpaOnnxOfflineRecognizerResult { + // Recognition results. + // For English, it consists of space separated words. + // For Chinese, it consists of Chinese words without spaces. + char* text; + int text_len; + + // Decoded results at the token level. + // For instance, for BPE-based models it consists of a list of BPE tokens. + // std::vector tokens; + + // timestamps.size() == tokens.size() + // timestamps[i] records the time in seconds when tokens[i] is decoded. + // std::vector timestamps; + } SherpaOnnxOfflineRecognizerResult; + + /// Note: OfflineRecognizer here means StreamingRecognizer. + /// It does not need to access the Internet during recognition. + /// Everything is run locally. + typedef struct SherpaOnnxOfflineRecognizer SherpaOnnxOfflineRecognizer; + + typedef struct SherpaOnnxOfflineStream SherpaOnnxOfflineStream; + + extern "C" __declspec(dllexport) + SherpaOnnxOfflineRecognizer * __stdcall CreateOfflineRecognizer( + const SherpaOnnxOfflineRecognizerConfig * config); + + extern "C" __declspec(dllexport) + SherpaOnnxOfflineStream * __stdcall CreateOfflineStream( + SherpaOnnxOfflineRecognizer * sherpaOnnxOfflineRecognizer); + + extern "C" __declspec(dllexport) + void __stdcall AcceptWaveform( + SherpaOnnxOfflineStream * stream, int32_t sample_rate, + const float* samples, int32_t samples_size); + + extern "C" __declspec(dllexport) + void __stdcall DecodeOfflineStream( + SherpaOnnxOfflineRecognizer * recognizer, + SherpaOnnxOfflineStream * stream); + + extern "C" __declspec(dllexport) + void __stdcall DecodeMultipleOfflineStreams( + SherpaOnnxOfflineRecognizer * recognizer, + SherpaOnnxOfflineStream * *streams, int32_t n); + + extern "C" __declspec(dllexport) + SherpaOnnxOfflineRecognizerResult * __stdcall GetOfflineStreamResult( + SherpaOnnxOfflineStream * stream); + + extern "C" __declspec(dllexport) + void __stdcall DestroyOfflineRecognizer( + SherpaOnnxOfflineRecognizer * recognizer); + + extern "C" __declspec(dllexport) + void __stdcall DestroyOfflineStream( + SherpaOnnxOfflineStream * stream); + + extern "C" __declspec(dllexport) + void __stdcall DestroyOfflineRecognizerResult( + SherpaOnnxOfflineRecognizerResult * r); +}// namespace sherpa_onnx \ No newline at end of file