diff --git a/.github/workflows/test-dot-net.yaml b/.github/workflows/test-dot-net.yaml index 75d7cc88..7f523cd2 100644 --- a/.github/workflows/test-dot-net.yaml +++ b/.github/workflows/test-dot-net.yaml @@ -63,8 +63,10 @@ jobs: shell: bash run: | cd dotnet-examples/ + cd online-decode-files - ./run.sh + ./run-transducer.sh + ./run-paraformer.sh cd ../offline-decode-files ./run-nemo-ctc.sh diff --git a/.gitignore b/.gitignore index 55661c5b..d6f49076 100644 --- a/.gitignore +++ b/.gitignore @@ -55,6 +55,7 @@ sherpa-onnx-zipformer-en-2023-03-30 sherpa-onnx-zipformer-en-2023-04-01 run-offline-decode-files.sh sherpa-onnx-nemo-ctc-en-citrinet-512 +sherpa-onnx-streaming-paraformer-bilingual-zh-en run-offline-decode-files-nemo-ctc.sh *.jar sherpa-onnx-nemo-ctc-* diff --git a/dotnet-examples/online-decode-files/Program.cs b/dotnet-examples/online-decode-files/Program.cs index b5bb6fc5..72c996ca 100644 --- a/dotnet-examples/online-decode-files/Program.cs +++ b/dotnet-examples/online-decode-files/Program.cs @@ -23,15 +23,21 @@ class OnlineDecodeFiles [Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")] public string Provider { get; set; } - [Option(Required = true, HelpText = "Path to encoder.onnx")] + [Option(Required = false, HelpText = "Path to transducer encoder.onnx")] public string Encoder { get; set; } - [Option(Required = true, HelpText = "Path to decoder.onnx")] + [Option(Required = false, HelpText = "Path to transducer decoder.onnx")] public string Decoder { get; set; } - [Option(Required = true, HelpText = "Path to joiner.onnx")] + [Option(Required = false, HelpText = "Path to transducer joiner.onnx")] public string Joiner { get; set; } + [Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")] + public string ParaformerEncoder { get; set; } + + [Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")] + public string ParaformerDecoder { get; set; } + [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")] public int NumThreads { get; set; } @@ -88,6 +94,8 @@ larger than this value. Used only when --enable-endpoint is true.")] private static void DisplayHelp(ParserResult result, IEnumerable errs) { string usage = @" +(1) Streaming transducer models + dotnet run \ --tokens=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \ --encoder=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx \ @@ -99,8 +107,20 @@ dotnet run \ --files ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav \ ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav +(2) Streaming Paraformer models +dotnet run \ + --tokens=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \ + --paraformer-encoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \ + --paraformer-decoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx \ + --num-threads=2 \ + --decoding-method=greedy_search \ + --debug=false \ + --files ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav \ + ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/1.wav + Please refer to https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html to download pre-trained streaming models. "; @@ -123,13 +143,17 @@ to download pre-trained streaming models. // You can change it if your model has a different feature dim. config.FeatConfig.FeatureDim = 80; - config.TransducerModelConfig.Encoder = options.Encoder; - config.TransducerModelConfig.Decoder = options.Decoder; - config.TransducerModelConfig.Joiner = options.Joiner; - config.TransducerModelConfig.Tokens = options.Tokens; - config.TransducerModelConfig.Provider = options.Provider; - config.TransducerModelConfig.NumThreads = options.NumThreads; - config.TransducerModelConfig.Debug = options.Debug ? 1 : 0; + config.ModelConfig.Transducer.Encoder = options.Encoder; + config.ModelConfig.Transducer.Decoder = options.Decoder; + config.ModelConfig.Transducer.Joiner = options.Joiner; + + config.ModelConfig.Paraformer.Encoder = options.ParaformerEncoder; + config.ModelConfig.Paraformer.Decoder = options.ParaformerDecoder; + + config.ModelConfig.Tokens = options.Tokens; + config.ModelConfig.Provider = options.Provider; + config.ModelConfig.NumThreads = options.NumThreads; + config.ModelConfig.Debug = options.Debug ? 1 : 0; config.DecodingMethod = options.DecodingMethod; config.MaxActivePaths = options.MaxActivePaths; diff --git a/dotnet-examples/online-decode-files/run-paraformer.sh b/dotnet-examples/online-decode-files/run-paraformer.sh new file mode 100755 index 00000000..4200aee7 --- /dev/null +++ b/dotnet-examples/online-decode-files/run-paraformer.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +# Please refer to +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english +# to download the model files + +if [ ! -d ./sherpa-onnx-streaming-paraformer-bilingual-zh-en ]; then + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en + cd sherpa-onnx-streaming-paraformer-bilingual-zh-en + git lfs pull --include "*.onnx" + cd .. +fi + +dotnet run -c Release \ + --tokens ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \ + --paraformer-encoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \ + --paraformer-decoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx \ + --decoding-method greedy_search \ + --files ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/1.wav \ + ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav diff --git a/dotnet-examples/online-decode-files/run.sh b/dotnet-examples/online-decode-files/run-transducer.sh similarity index 100% rename from dotnet-examples/online-decode-files/run.sh rename to dotnet-examples/online-decode-files/run-transducer.sh diff --git a/dotnet-examples/speech-recognition-from-microphone/Program.cs b/dotnet-examples/speech-recognition-from-microphone/Program.cs index b399ea8d..586e3b16 100644 --- a/dotnet-examples/speech-recognition-from-microphone/Program.cs +++ b/dotnet-examples/speech-recognition-from-microphone/Program.cs @@ -26,15 +26,21 @@ class SpeechRecognitionFromMicrophone [Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")] public string Provider { get; set; } - [Option(Required = true, HelpText = "Path to encoder.onnx")] + [Option(Required = false, HelpText = "Path to transducer encoder.onnx")] public string Encoder { get; set; } - [Option(Required = true, HelpText = "Path to decoder.onnx")] + [Option(Required = false, HelpText = "Path to transducer decoder.onnx")] public string Decoder { get; set; } - [Option(Required = true, HelpText = "Path to joiner.onnx")] + [Option(Required = false, HelpText = "Path to transducer joiner.onnx")] public string Joiner { get; set; } + [Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")] + public string ParaformerEncoder { get; set; } + + [Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")] + public string ParaformerDecoder { get; set; } + [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")] public int NumThreads { get; set; } @@ -87,14 +93,24 @@ larger than this value. Used only when --enable-endpoint is true.")] private static void DisplayHelp(ParserResult result, IEnumerable errs) { string usage = @" +(1) Streaming transducer models + dotnet run -c Release \ --tokens ./icefall-asr-zipformer-streaming-wenetspeech-20230615/data/lang_char/tokens.txt \ --encoder ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx \ --decoder ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx \ - --joiner ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx \ + --joiner ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx + +(2) Streaming Paraformer models + +dotnet run \ + --tokens=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \ + --paraformer-encoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \ + --paraformer-decoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx Please refer to https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html to download pre-trained streaming models. "; @@ -117,13 +133,17 @@ to download pre-trained streaming models. // You can change it if your model has a different feature dim. config.FeatConfig.FeatureDim = 80; - config.TransducerModelConfig.Encoder = options.Encoder; - config.TransducerModelConfig.Decoder = options.Decoder; - config.TransducerModelConfig.Joiner = options.Joiner; - config.TransducerModelConfig.Tokens = options.Tokens; - config.TransducerModelConfig.Provider = options.Provider; - config.TransducerModelConfig.NumThreads = options.NumThreads; - config.TransducerModelConfig.Debug = options.Debug ? 1 : 0; + config.ModelConfig.Transducer.Encoder = options.Encoder; + config.ModelConfig.Transducer.Decoder = options.Decoder; + config.ModelConfig.Transducer.Joiner = options.Joiner; + + config.ModelConfig.Paraformer.Encoder = options.ParaformerEncoder; + config.ModelConfig.Paraformer.Decoder = options.ParaformerDecoder; + + config.ModelConfig.Tokens = options.Tokens; + config.ModelConfig.Provider = options.Provider; + config.ModelConfig.NumThreads = options.NumThreads; + config.ModelConfig.Debug = options.Debug ? 1 : 0; config.DecodingMethod = options.DecodingMethod; config.MaxActivePaths = options.MaxActivePaths; @@ -135,7 +155,6 @@ to download pre-trained streaming models. OnlineRecognizer recognizer = new OnlineRecognizer(config); - OnlineStream s = recognizer.CreateStream(); Console.WriteLine(PortAudio.VersionInfo.versionText); @@ -196,7 +215,6 @@ to download pre-trained streaming models. stream.Start(); - int segment_index = 0; String lastText = ""; int segmentIndex = 0; diff --git a/dotnet-examples/speech-recognition-from-microphone/run-paraformer.sh b/dotnet-examples/speech-recognition-from-microphone/run-paraformer.sh new file mode 100755 index 00000000..5e774a55 --- /dev/null +++ b/dotnet-examples/speech-recognition-from-microphone/run-paraformer.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +# Please refer to +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english +# to download the model files + +if [ ! -d ./sherpa-onnx-streaming-paraformer-bilingual-zh-en ]; then + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en + cd sherpa-onnx-streaming-paraformer-bilingual-zh-en + git lfs pull --include "*.onnx" + cd .. +fi + +dotnet run -c Release \ + --tokens ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \ + --paraformer-encoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \ + --paraformer-decoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx \ diff --git a/dotnet-examples/speech-recognition-from-microphone/run.sh b/dotnet-examples/speech-recognition-from-microphone/run-transducer.sh similarity index 100% rename from dotnet-examples/speech-recognition-from-microphone/run.sh rename to dotnet-examples/speech-recognition-from-microphone/run-transducer.sh diff --git a/scripts/dotnet/online.cs b/scripts/dotnet/online.cs index d423ee36..f0ca414b 100644 --- a/scripts/dotnet/online.cs +++ b/scripts/dotnet/online.cs @@ -22,12 +22,8 @@ namespace SherpaOnnx Encoder = ""; Decoder = ""; Joiner = ""; - Tokens = ""; - NumThreads = 1; - Provider = "cpu"; - Debug = 0; - ModelType = ""; } + [MarshalAs(UnmanagedType.LPStr)] public string Encoder; @@ -36,6 +32,40 @@ namespace SherpaOnnx [MarshalAs(UnmanagedType.LPStr)] public string Joiner; + } + + [StructLayout(LayoutKind.Sequential)] + public struct OnlineParaformerModelConfig + { + public OnlineParaformerModelConfig() + { + Encoder = ""; + Decoder = ""; + } + + [MarshalAs(UnmanagedType.LPStr)] + public string Encoder; + + [MarshalAs(UnmanagedType.LPStr)] + public string Decoder; + } + + [StructLayout(LayoutKind.Sequential)] + public struct OnlineModelConfig + { + public OnlineModelConfig() + { + Transducer = new OnlineTransducerModelConfig(); + Paraformer = new OnlineParaformerModelConfig(); + Tokens = ""; + NumThreads = 1; + Provider = "cpu"; + Debug = 0; + ModelType = ""; + } + + public OnlineTransducerModelConfig Transducer; + public OnlineParaformerModelConfig Paraformer; [MarshalAs(UnmanagedType.LPStr)] public string Tokens; @@ -78,7 +108,7 @@ namespace SherpaOnnx public OnlineRecognizerConfig() { FeatConfig = new FeatureConfig(); - TransducerModelConfig = new OnlineTransducerModelConfig(); + ModelConfig = new OnlineModelConfig(); DecodingMethod = "greedy_search"; MaxActivePaths = 4; EnableEndpoint = 0; @@ -87,7 +117,7 @@ namespace SherpaOnnx Rule3MinUtteranceLength = 20.0F; } public FeatureConfig FeatConfig; - public OnlineTransducerModelConfig TransducerModelConfig; + public OnlineModelConfig ModelConfig; [MarshalAs(UnmanagedType.LPStr)] public string DecodingMethod;