Fix C# API to support streaming Paraformer (#266)
This commit is contained in:
4
.github/workflows/test-dot-net.yaml
vendored
4
.github/workflows/test-dot-net.yaml
vendored
@@ -63,8 +63,10 @@ jobs:
|
||||
shell: bash
|
||||
run: |
|
||||
cd dotnet-examples/
|
||||
|
||||
cd online-decode-files
|
||||
./run.sh
|
||||
./run-transducer.sh
|
||||
./run-paraformer.sh
|
||||
|
||||
cd ../offline-decode-files
|
||||
./run-nemo-ctc.sh
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -55,6 +55,7 @@ sherpa-onnx-zipformer-en-2023-03-30
|
||||
sherpa-onnx-zipformer-en-2023-04-01
|
||||
run-offline-decode-files.sh
|
||||
sherpa-onnx-nemo-ctc-en-citrinet-512
|
||||
sherpa-onnx-streaming-paraformer-bilingual-zh-en
|
||||
run-offline-decode-files-nemo-ctc.sh
|
||||
*.jar
|
||||
sherpa-onnx-nemo-ctc-*
|
||||
|
||||
@@ -23,15 +23,21 @@ class OnlineDecodeFiles
|
||||
[Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")]
|
||||
public string Provider { get; set; }
|
||||
|
||||
[Option(Required = true, HelpText = "Path to encoder.onnx")]
|
||||
[Option(Required = false, HelpText = "Path to transducer encoder.onnx")]
|
||||
public string Encoder { get; set; }
|
||||
|
||||
[Option(Required = true, HelpText = "Path to decoder.onnx")]
|
||||
[Option(Required = false, HelpText = "Path to transducer decoder.onnx")]
|
||||
public string Decoder { get; set; }
|
||||
|
||||
[Option(Required = true, HelpText = "Path to joiner.onnx")]
|
||||
[Option(Required = false, HelpText = "Path to transducer joiner.onnx")]
|
||||
public string Joiner { get; set; }
|
||||
|
||||
[Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")]
|
||||
public string ParaformerEncoder { get; set; }
|
||||
|
||||
[Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")]
|
||||
public string ParaformerDecoder { get; set; }
|
||||
|
||||
[Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
|
||||
public int NumThreads { get; set; }
|
||||
|
||||
@@ -88,6 +94,8 @@ larger than this value. Used only when --enable-endpoint is true.")]
|
||||
private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
|
||||
{
|
||||
string usage = @"
|
||||
(1) Streaming transducer models
|
||||
|
||||
dotnet run \
|
||||
--tokens=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
|
||||
--encoder=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx \
|
||||
@@ -99,8 +107,20 @@ dotnet run \
|
||||
--files ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav \
|
||||
./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav
|
||||
|
||||
(2) Streaming Paraformer models
|
||||
dotnet run \
|
||||
--tokens=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
|
||||
--paraformer-encoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
|
||||
--paraformer-decoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx \
|
||||
--num-threads=2 \
|
||||
--decoding-method=greedy_search \
|
||||
--debug=false \
|
||||
--files ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav \
|
||||
./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/1.wav
|
||||
|
||||
Please refer to
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
|
||||
to download pre-trained streaming models.
|
||||
";
|
||||
|
||||
@@ -123,13 +143,17 @@ to download pre-trained streaming models.
|
||||
// You can change it if your model has a different feature dim.
|
||||
config.FeatConfig.FeatureDim = 80;
|
||||
|
||||
config.TransducerModelConfig.Encoder = options.Encoder;
|
||||
config.TransducerModelConfig.Decoder = options.Decoder;
|
||||
config.TransducerModelConfig.Joiner = options.Joiner;
|
||||
config.TransducerModelConfig.Tokens = options.Tokens;
|
||||
config.TransducerModelConfig.Provider = options.Provider;
|
||||
config.TransducerModelConfig.NumThreads = options.NumThreads;
|
||||
config.TransducerModelConfig.Debug = options.Debug ? 1 : 0;
|
||||
config.ModelConfig.Transducer.Encoder = options.Encoder;
|
||||
config.ModelConfig.Transducer.Decoder = options.Decoder;
|
||||
config.ModelConfig.Transducer.Joiner = options.Joiner;
|
||||
|
||||
config.ModelConfig.Paraformer.Encoder = options.ParaformerEncoder;
|
||||
config.ModelConfig.Paraformer.Decoder = options.ParaformerDecoder;
|
||||
|
||||
config.ModelConfig.Tokens = options.Tokens;
|
||||
config.ModelConfig.Provider = options.Provider;
|
||||
config.ModelConfig.NumThreads = options.NumThreads;
|
||||
config.ModelConfig.Debug = options.Debug ? 1 : 0;
|
||||
|
||||
config.DecodingMethod = options.DecodingMethod;
|
||||
config.MaxActivePaths = options.MaxActivePaths;
|
||||
|
||||
20
dotnet-examples/online-decode-files/run-paraformer.sh
Executable file
20
dotnet-examples/online-decode-files/run-paraformer.sh
Executable file
@@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Please refer to
|
||||
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english
|
||||
# to download the model files
|
||||
|
||||
if [ ! -d ./sherpa-onnx-streaming-paraformer-bilingual-zh-en ]; then
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en
|
||||
cd sherpa-onnx-streaming-paraformer-bilingual-zh-en
|
||||
git lfs pull --include "*.onnx"
|
||||
cd ..
|
||||
fi
|
||||
|
||||
dotnet run -c Release \
|
||||
--tokens ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
|
||||
--paraformer-encoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
|
||||
--paraformer-decoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx \
|
||||
--decoding-method greedy_search \
|
||||
--files ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/1.wav \
|
||||
./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav
|
||||
@@ -26,15 +26,21 @@ class SpeechRecognitionFromMicrophone
|
||||
[Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")]
|
||||
public string Provider { get; set; }
|
||||
|
||||
[Option(Required = true, HelpText = "Path to encoder.onnx")]
|
||||
[Option(Required = false, HelpText = "Path to transducer encoder.onnx")]
|
||||
public string Encoder { get; set; }
|
||||
|
||||
[Option(Required = true, HelpText = "Path to decoder.onnx")]
|
||||
[Option(Required = false, HelpText = "Path to transducer decoder.onnx")]
|
||||
public string Decoder { get; set; }
|
||||
|
||||
[Option(Required = true, HelpText = "Path to joiner.onnx")]
|
||||
[Option(Required = false, HelpText = "Path to transducer joiner.onnx")]
|
||||
public string Joiner { get; set; }
|
||||
|
||||
[Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")]
|
||||
public string ParaformerEncoder { get; set; }
|
||||
|
||||
[Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")]
|
||||
public string ParaformerDecoder { get; set; }
|
||||
|
||||
[Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
|
||||
public int NumThreads { get; set; }
|
||||
|
||||
@@ -87,14 +93,24 @@ larger than this value. Used only when --enable-endpoint is true.")]
|
||||
private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
|
||||
{
|
||||
string usage = @"
|
||||
(1) Streaming transducer models
|
||||
|
||||
dotnet run -c Release \
|
||||
--tokens ./icefall-asr-zipformer-streaming-wenetspeech-20230615/data/lang_char/tokens.txt \
|
||||
--encoder ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx \
|
||||
--decoder ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx \
|
||||
--joiner ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx \
|
||||
--joiner ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx
|
||||
|
||||
(2) Streaming Paraformer models
|
||||
|
||||
dotnet run \
|
||||
--tokens=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
|
||||
--paraformer-encoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
|
||||
--paraformer-decoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx
|
||||
|
||||
Please refer to
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
|
||||
to download pre-trained streaming models.
|
||||
";
|
||||
|
||||
@@ -117,13 +133,17 @@ to download pre-trained streaming models.
|
||||
// You can change it if your model has a different feature dim.
|
||||
config.FeatConfig.FeatureDim = 80;
|
||||
|
||||
config.TransducerModelConfig.Encoder = options.Encoder;
|
||||
config.TransducerModelConfig.Decoder = options.Decoder;
|
||||
config.TransducerModelConfig.Joiner = options.Joiner;
|
||||
config.TransducerModelConfig.Tokens = options.Tokens;
|
||||
config.TransducerModelConfig.Provider = options.Provider;
|
||||
config.TransducerModelConfig.NumThreads = options.NumThreads;
|
||||
config.TransducerModelConfig.Debug = options.Debug ? 1 : 0;
|
||||
config.ModelConfig.Transducer.Encoder = options.Encoder;
|
||||
config.ModelConfig.Transducer.Decoder = options.Decoder;
|
||||
config.ModelConfig.Transducer.Joiner = options.Joiner;
|
||||
|
||||
config.ModelConfig.Paraformer.Encoder = options.ParaformerEncoder;
|
||||
config.ModelConfig.Paraformer.Decoder = options.ParaformerDecoder;
|
||||
|
||||
config.ModelConfig.Tokens = options.Tokens;
|
||||
config.ModelConfig.Provider = options.Provider;
|
||||
config.ModelConfig.NumThreads = options.NumThreads;
|
||||
config.ModelConfig.Debug = options.Debug ? 1 : 0;
|
||||
|
||||
config.DecodingMethod = options.DecodingMethod;
|
||||
config.MaxActivePaths = options.MaxActivePaths;
|
||||
@@ -135,7 +155,6 @@ to download pre-trained streaming models.
|
||||
|
||||
OnlineRecognizer recognizer = new OnlineRecognizer(config);
|
||||
|
||||
|
||||
OnlineStream s = recognizer.CreateStream();
|
||||
|
||||
Console.WriteLine(PortAudio.VersionInfo.versionText);
|
||||
@@ -196,7 +215,6 @@ to download pre-trained streaming models.
|
||||
|
||||
stream.Start();
|
||||
|
||||
int segment_index = 0;
|
||||
String lastText = "";
|
||||
int segmentIndex = 0;
|
||||
|
||||
|
||||
17
dotnet-examples/speech-recognition-from-microphone/run-paraformer.sh
Executable file
17
dotnet-examples/speech-recognition-from-microphone/run-paraformer.sh
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Please refer to
|
||||
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english
|
||||
# to download the model files
|
||||
|
||||
if [ ! -d ./sherpa-onnx-streaming-paraformer-bilingual-zh-en ]; then
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en
|
||||
cd sherpa-onnx-streaming-paraformer-bilingual-zh-en
|
||||
git lfs pull --include "*.onnx"
|
||||
cd ..
|
||||
fi
|
||||
|
||||
dotnet run -c Release \
|
||||
--tokens ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
|
||||
--paraformer-encoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
|
||||
--paraformer-decoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx \
|
||||
@@ -22,12 +22,8 @@ namespace SherpaOnnx
|
||||
Encoder = "";
|
||||
Decoder = "";
|
||||
Joiner = "";
|
||||
Tokens = "";
|
||||
NumThreads = 1;
|
||||
Provider = "cpu";
|
||||
Debug = 0;
|
||||
ModelType = "";
|
||||
}
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Encoder;
|
||||
|
||||
@@ -36,6 +32,40 @@ namespace SherpaOnnx
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Joiner;
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public struct OnlineParaformerModelConfig
|
||||
{
|
||||
public OnlineParaformerModelConfig()
|
||||
{
|
||||
Encoder = "";
|
||||
Decoder = "";
|
||||
}
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Encoder;
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Decoder;
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public struct OnlineModelConfig
|
||||
{
|
||||
public OnlineModelConfig()
|
||||
{
|
||||
Transducer = new OnlineTransducerModelConfig();
|
||||
Paraformer = new OnlineParaformerModelConfig();
|
||||
Tokens = "";
|
||||
NumThreads = 1;
|
||||
Provider = "cpu";
|
||||
Debug = 0;
|
||||
ModelType = "";
|
||||
}
|
||||
|
||||
public OnlineTransducerModelConfig Transducer;
|
||||
public OnlineParaformerModelConfig Paraformer;
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Tokens;
|
||||
@@ -78,7 +108,7 @@ namespace SherpaOnnx
|
||||
public OnlineRecognizerConfig()
|
||||
{
|
||||
FeatConfig = new FeatureConfig();
|
||||
TransducerModelConfig = new OnlineTransducerModelConfig();
|
||||
ModelConfig = new OnlineModelConfig();
|
||||
DecodingMethod = "greedy_search";
|
||||
MaxActivePaths = 4;
|
||||
EnableEndpoint = 0;
|
||||
@@ -87,7 +117,7 @@ namespace SherpaOnnx
|
||||
Rule3MinUtteranceLength = 20.0F;
|
||||
}
|
||||
public FeatureConfig FeatConfig;
|
||||
public OnlineTransducerModelConfig TransducerModelConfig;
|
||||
public OnlineModelConfig ModelConfig;
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string DecodingMethod;
|
||||
|
||||
Reference in New Issue
Block a user