// See https://aka.ms/new-console-template for more information // Copyright (c) 2023 by manyeyes using SherpaOnnx; /// Please refer to /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html /// to download pre-trained models. That is, you can find encoder-xxx.onnx /// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct /// from there. /// download model eg: /// (The directory where the application runs) /// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory /// cd /path/to /// git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 /// NuGet for sherpa-onnx /// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx /// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx // transducer Usage: /* .\SherpaOnnx.Examples.exe ` --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ` --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx ` --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ` --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx ` --num-threads=2 ` --decoding-method=modified_beam_search ` --debug=false ` ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav */ internal class OnlineDecodeFile { static void Main(string[] args) { string usage = @" ----------------------------- transducer Usage: --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ` --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx ` --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ` --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx ` --num-threads=2 ` --decoding-method=modified_beam_search ` --debug=false ` ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav ----------------------------- "; if (args.Length == 0) { System.Console.WriteLine("Please enter the correct parameters:"); System.Console.WriteLine(usage); System.Text.StringBuilder sb = new System.Text.StringBuilder(); //args = Console.ReadLine().Split(" "); while (true) { string input = Console.ReadLine(); sb.AppendLine(input); if (Console.ReadKey().Key == ConsoleKey.Enter) break; } args = sb.ToString().Split("\r\n"); } Console.WriteLine("Started!\n"); string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory; List wavFiles = new List(); Dictionary argsDict = GetDict(args, applicationBase, ref wavFiles); string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : ""; string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : ""; string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : ""; string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : ""; string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : ""; string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : ""; string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : ""; string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : ""; string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : ""; OfflineTransducer offlineTransducer = new OfflineTransducer(); offlineTransducer.EncoderFilename = encoder; offlineTransducer.DecoderFilename = decoder; offlineTransducer.JoinerFilename = joiner; OfflineParaformer offlineParaformer = new OfflineParaformer(); offlineParaformer.Model = paraformer; OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc(); offlineNemoEncDecCtc.Model = nemo_ctc; int numThreads = 0; int.TryParse(num_threads, out numThreads); bool isDebug = false; bool.TryParse(debug, out isDebug); string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method; if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)) && string.IsNullOrEmpty(paraformer) && string.IsNullOrEmpty(nemo_ctc)) { Console.WriteLine("Please specify at least one model"); Console.WriteLine(usage); } // batch decode TimeSpan total_duration = TimeSpan.Zero; TimeSpan start_time = TimeSpan.Zero; TimeSpan end_time = TimeSpan.Zero; List results = new List(); if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))) { OnlineTransducer onlineTransducer = new OnlineTransducer(); onlineTransducer.EncoderFilename = encoder; onlineTransducer.DecoderFilename = decoder; onlineTransducer.JoinerFilename = joiner; //test online OnlineRecognizer onlineRecognizer = new OnlineRecognizer( onlineTransducer, tokens, num_threads: numThreads, debug: isDebug, decoding_method: decodingMethod); foreach (string wavFile in wavFiles) { TimeSpan duration = TimeSpan.Zero; List samplesList = AudioHelper.GetChunkSamplesList(wavFile, ref duration); OnlineStream stream = onlineRecognizer.CreateStream(); start_time = new TimeSpan(DateTime.Now.Ticks); for (int i = 0; i < samplesList.Count; i++) { onlineRecognizer.AcceptWaveForm(stream, 16000, samplesList[i]); onlineRecognizer.DecodeStream(stream); OnlineRecognizerResultEntity result_on = onlineRecognizer.GetResult(stream); Console.WriteLine(result_on.text); } total_duration += duration; } end_time = new TimeSpan(DateTime.Now.Ticks); } double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds; double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds; Console.WriteLine("num_threads:{0}", num_threads); Console.WriteLine("decoding_method:{0}", decodingMethod); Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString()); Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString()); Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString()); Console.WriteLine("End!"); } static Dictionary GetDict(string[] args, string applicationBase, ref List wavFiles) { Dictionary argsDict = new Dictionary(); foreach (string input in args) { string[] ss = input.Split("="); if (ss.Length == 1) { if (!string.IsNullOrEmpty(ss[0])) { wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' }))); } } else { argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' })); } } return argsDict; } }