Fix C# API to support streaming Paraformer (#266)
This commit is contained in:
4
.github/workflows/test-dot-net.yaml
vendored
4
.github/workflows/test-dot-net.yaml
vendored
@@ -63,8 +63,10 @@ jobs:
|
|||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
cd dotnet-examples/
|
cd dotnet-examples/
|
||||||
|
|
||||||
cd online-decode-files
|
cd online-decode-files
|
||||||
./run.sh
|
./run-transducer.sh
|
||||||
|
./run-paraformer.sh
|
||||||
|
|
||||||
cd ../offline-decode-files
|
cd ../offline-decode-files
|
||||||
./run-nemo-ctc.sh
|
./run-nemo-ctc.sh
|
||||||
|
|||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -55,6 +55,7 @@ sherpa-onnx-zipformer-en-2023-03-30
|
|||||||
sherpa-onnx-zipformer-en-2023-04-01
|
sherpa-onnx-zipformer-en-2023-04-01
|
||||||
run-offline-decode-files.sh
|
run-offline-decode-files.sh
|
||||||
sherpa-onnx-nemo-ctc-en-citrinet-512
|
sherpa-onnx-nemo-ctc-en-citrinet-512
|
||||||
|
sherpa-onnx-streaming-paraformer-bilingual-zh-en
|
||||||
run-offline-decode-files-nemo-ctc.sh
|
run-offline-decode-files-nemo-ctc.sh
|
||||||
*.jar
|
*.jar
|
||||||
sherpa-onnx-nemo-ctc-*
|
sherpa-onnx-nemo-ctc-*
|
||||||
|
|||||||
@@ -23,15 +23,21 @@ class OnlineDecodeFiles
|
|||||||
[Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")]
|
[Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")]
|
||||||
public string Provider { get; set; }
|
public string Provider { get; set; }
|
||||||
|
|
||||||
[Option(Required = true, HelpText = "Path to encoder.onnx")]
|
[Option(Required = false, HelpText = "Path to transducer encoder.onnx")]
|
||||||
public string Encoder { get; set; }
|
public string Encoder { get; set; }
|
||||||
|
|
||||||
[Option(Required = true, HelpText = "Path to decoder.onnx")]
|
[Option(Required = false, HelpText = "Path to transducer decoder.onnx")]
|
||||||
public string Decoder { get; set; }
|
public string Decoder { get; set; }
|
||||||
|
|
||||||
[Option(Required = true, HelpText = "Path to joiner.onnx")]
|
[Option(Required = false, HelpText = "Path to transducer joiner.onnx")]
|
||||||
public string Joiner { get; set; }
|
public string Joiner { get; set; }
|
||||||
|
|
||||||
|
[Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")]
|
||||||
|
public string ParaformerEncoder { get; set; }
|
||||||
|
|
||||||
|
[Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")]
|
||||||
|
public string ParaformerDecoder { get; set; }
|
||||||
|
|
||||||
[Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
|
[Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
|
||||||
public int NumThreads { get; set; }
|
public int NumThreads { get; set; }
|
||||||
|
|
||||||
@@ -88,6 +94,8 @@ larger than this value. Used only when --enable-endpoint is true.")]
|
|||||||
private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
|
private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
|
||||||
{
|
{
|
||||||
string usage = @"
|
string usage = @"
|
||||||
|
(1) Streaming transducer models
|
||||||
|
|
||||||
dotnet run \
|
dotnet run \
|
||||||
--tokens=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
|
--tokens=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
|
||||||
--encoder=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx \
|
--encoder=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx \
|
||||||
@@ -99,8 +107,20 @@ dotnet run \
|
|||||||
--files ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav \
|
--files ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav \
|
||||||
./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav
|
./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav
|
||||||
|
|
||||||
|
(2) Streaming Paraformer models
|
||||||
|
dotnet run \
|
||||||
|
--tokens=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
|
||||||
|
--paraformer-encoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
|
||||||
|
--paraformer-decoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx \
|
||||||
|
--num-threads=2 \
|
||||||
|
--decoding-method=greedy_search \
|
||||||
|
--debug=false \
|
||||||
|
--files ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav \
|
||||||
|
./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/1.wav
|
||||||
|
|
||||||
Please refer to
|
Please refer to
|
||||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
|
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
|
||||||
|
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
|
||||||
to download pre-trained streaming models.
|
to download pre-trained streaming models.
|
||||||
";
|
";
|
||||||
|
|
||||||
@@ -123,13 +143,17 @@ to download pre-trained streaming models.
|
|||||||
// You can change it if your model has a different feature dim.
|
// You can change it if your model has a different feature dim.
|
||||||
config.FeatConfig.FeatureDim = 80;
|
config.FeatConfig.FeatureDim = 80;
|
||||||
|
|
||||||
config.TransducerModelConfig.Encoder = options.Encoder;
|
config.ModelConfig.Transducer.Encoder = options.Encoder;
|
||||||
config.TransducerModelConfig.Decoder = options.Decoder;
|
config.ModelConfig.Transducer.Decoder = options.Decoder;
|
||||||
config.TransducerModelConfig.Joiner = options.Joiner;
|
config.ModelConfig.Transducer.Joiner = options.Joiner;
|
||||||
config.TransducerModelConfig.Tokens = options.Tokens;
|
|
||||||
config.TransducerModelConfig.Provider = options.Provider;
|
config.ModelConfig.Paraformer.Encoder = options.ParaformerEncoder;
|
||||||
config.TransducerModelConfig.NumThreads = options.NumThreads;
|
config.ModelConfig.Paraformer.Decoder = options.ParaformerDecoder;
|
||||||
config.TransducerModelConfig.Debug = options.Debug ? 1 : 0;
|
|
||||||
|
config.ModelConfig.Tokens = options.Tokens;
|
||||||
|
config.ModelConfig.Provider = options.Provider;
|
||||||
|
config.ModelConfig.NumThreads = options.NumThreads;
|
||||||
|
config.ModelConfig.Debug = options.Debug ? 1 : 0;
|
||||||
|
|
||||||
config.DecodingMethod = options.DecodingMethod;
|
config.DecodingMethod = options.DecodingMethod;
|
||||||
config.MaxActivePaths = options.MaxActivePaths;
|
config.MaxActivePaths = options.MaxActivePaths;
|
||||||
|
|||||||
20
dotnet-examples/online-decode-files/run-paraformer.sh
Executable file
20
dotnet-examples/online-decode-files/run-paraformer.sh
Executable file
@@ -0,0 +1,20 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# Please refer to
|
||||||
|
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english
|
||||||
|
# to download the model files
|
||||||
|
|
||||||
|
if [ ! -d ./sherpa-onnx-streaming-paraformer-bilingual-zh-en ]; then
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en
|
||||||
|
cd sherpa-onnx-streaming-paraformer-bilingual-zh-en
|
||||||
|
git lfs pull --include "*.onnx"
|
||||||
|
cd ..
|
||||||
|
fi
|
||||||
|
|
||||||
|
dotnet run -c Release \
|
||||||
|
--tokens ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
|
||||||
|
--paraformer-encoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
|
||||||
|
--paraformer-decoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx \
|
||||||
|
--decoding-method greedy_search \
|
||||||
|
--files ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/1.wav \
|
||||||
|
./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav
|
||||||
@@ -26,15 +26,21 @@ class SpeechRecognitionFromMicrophone
|
|||||||
[Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")]
|
[Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")]
|
||||||
public string Provider { get; set; }
|
public string Provider { get; set; }
|
||||||
|
|
||||||
[Option(Required = true, HelpText = "Path to encoder.onnx")]
|
[Option(Required = false, HelpText = "Path to transducer encoder.onnx")]
|
||||||
public string Encoder { get; set; }
|
public string Encoder { get; set; }
|
||||||
|
|
||||||
[Option(Required = true, HelpText = "Path to decoder.onnx")]
|
[Option(Required = false, HelpText = "Path to transducer decoder.onnx")]
|
||||||
public string Decoder { get; set; }
|
public string Decoder { get; set; }
|
||||||
|
|
||||||
[Option(Required = true, HelpText = "Path to joiner.onnx")]
|
[Option(Required = false, HelpText = "Path to transducer joiner.onnx")]
|
||||||
public string Joiner { get; set; }
|
public string Joiner { get; set; }
|
||||||
|
|
||||||
|
[Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")]
|
||||||
|
public string ParaformerEncoder { get; set; }
|
||||||
|
|
||||||
|
[Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")]
|
||||||
|
public string ParaformerDecoder { get; set; }
|
||||||
|
|
||||||
[Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
|
[Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
|
||||||
public int NumThreads { get; set; }
|
public int NumThreads { get; set; }
|
||||||
|
|
||||||
@@ -87,14 +93,24 @@ larger than this value. Used only when --enable-endpoint is true.")]
|
|||||||
private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
|
private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
|
||||||
{
|
{
|
||||||
string usage = @"
|
string usage = @"
|
||||||
|
(1) Streaming transducer models
|
||||||
|
|
||||||
dotnet run -c Release \
|
dotnet run -c Release \
|
||||||
--tokens ./icefall-asr-zipformer-streaming-wenetspeech-20230615/data/lang_char/tokens.txt \
|
--tokens ./icefall-asr-zipformer-streaming-wenetspeech-20230615/data/lang_char/tokens.txt \
|
||||||
--encoder ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx \
|
--encoder ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx \
|
||||||
--decoder ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx \
|
--decoder ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx \
|
||||||
--joiner ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx \
|
--joiner ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx
|
||||||
|
|
||||||
|
(2) Streaming Paraformer models
|
||||||
|
|
||||||
|
dotnet run \
|
||||||
|
--tokens=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
|
||||||
|
--paraformer-encoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
|
||||||
|
--paraformer-decoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx
|
||||||
|
|
||||||
Please refer to
|
Please refer to
|
||||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
|
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
|
||||||
|
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
|
||||||
to download pre-trained streaming models.
|
to download pre-trained streaming models.
|
||||||
";
|
";
|
||||||
|
|
||||||
@@ -117,13 +133,17 @@ to download pre-trained streaming models.
|
|||||||
// You can change it if your model has a different feature dim.
|
// You can change it if your model has a different feature dim.
|
||||||
config.FeatConfig.FeatureDim = 80;
|
config.FeatConfig.FeatureDim = 80;
|
||||||
|
|
||||||
config.TransducerModelConfig.Encoder = options.Encoder;
|
config.ModelConfig.Transducer.Encoder = options.Encoder;
|
||||||
config.TransducerModelConfig.Decoder = options.Decoder;
|
config.ModelConfig.Transducer.Decoder = options.Decoder;
|
||||||
config.TransducerModelConfig.Joiner = options.Joiner;
|
config.ModelConfig.Transducer.Joiner = options.Joiner;
|
||||||
config.TransducerModelConfig.Tokens = options.Tokens;
|
|
||||||
config.TransducerModelConfig.Provider = options.Provider;
|
config.ModelConfig.Paraformer.Encoder = options.ParaformerEncoder;
|
||||||
config.TransducerModelConfig.NumThreads = options.NumThreads;
|
config.ModelConfig.Paraformer.Decoder = options.ParaformerDecoder;
|
||||||
config.TransducerModelConfig.Debug = options.Debug ? 1 : 0;
|
|
||||||
|
config.ModelConfig.Tokens = options.Tokens;
|
||||||
|
config.ModelConfig.Provider = options.Provider;
|
||||||
|
config.ModelConfig.NumThreads = options.NumThreads;
|
||||||
|
config.ModelConfig.Debug = options.Debug ? 1 : 0;
|
||||||
|
|
||||||
config.DecodingMethod = options.DecodingMethod;
|
config.DecodingMethod = options.DecodingMethod;
|
||||||
config.MaxActivePaths = options.MaxActivePaths;
|
config.MaxActivePaths = options.MaxActivePaths;
|
||||||
@@ -135,7 +155,6 @@ to download pre-trained streaming models.
|
|||||||
|
|
||||||
OnlineRecognizer recognizer = new OnlineRecognizer(config);
|
OnlineRecognizer recognizer = new OnlineRecognizer(config);
|
||||||
|
|
||||||
|
|
||||||
OnlineStream s = recognizer.CreateStream();
|
OnlineStream s = recognizer.CreateStream();
|
||||||
|
|
||||||
Console.WriteLine(PortAudio.VersionInfo.versionText);
|
Console.WriteLine(PortAudio.VersionInfo.versionText);
|
||||||
@@ -196,7 +215,6 @@ to download pre-trained streaming models.
|
|||||||
|
|
||||||
stream.Start();
|
stream.Start();
|
||||||
|
|
||||||
int segment_index = 0;
|
|
||||||
String lastText = "";
|
String lastText = "";
|
||||||
int segmentIndex = 0;
|
int segmentIndex = 0;
|
||||||
|
|
||||||
|
|||||||
17
dotnet-examples/speech-recognition-from-microphone/run-paraformer.sh
Executable file
17
dotnet-examples/speech-recognition-from-microphone/run-paraformer.sh
Executable file
@@ -0,0 +1,17 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# Please refer to
|
||||||
|
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english
|
||||||
|
# to download the model files
|
||||||
|
|
||||||
|
if [ ! -d ./sherpa-onnx-streaming-paraformer-bilingual-zh-en ]; then
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en
|
||||||
|
cd sherpa-onnx-streaming-paraformer-bilingual-zh-en
|
||||||
|
git lfs pull --include "*.onnx"
|
||||||
|
cd ..
|
||||||
|
fi
|
||||||
|
|
||||||
|
dotnet run -c Release \
|
||||||
|
--tokens ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
|
||||||
|
--paraformer-encoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
|
||||||
|
--paraformer-decoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx \
|
||||||
@@ -22,12 +22,8 @@ namespace SherpaOnnx
|
|||||||
Encoder = "";
|
Encoder = "";
|
||||||
Decoder = "";
|
Decoder = "";
|
||||||
Joiner = "";
|
Joiner = "";
|
||||||
Tokens = "";
|
|
||||||
NumThreads = 1;
|
|
||||||
Provider = "cpu";
|
|
||||||
Debug = 0;
|
|
||||||
ModelType = "";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
[MarshalAs(UnmanagedType.LPStr)]
|
[MarshalAs(UnmanagedType.LPStr)]
|
||||||
public string Encoder;
|
public string Encoder;
|
||||||
|
|
||||||
@@ -36,6 +32,40 @@ namespace SherpaOnnx
|
|||||||
|
|
||||||
[MarshalAs(UnmanagedType.LPStr)]
|
[MarshalAs(UnmanagedType.LPStr)]
|
||||||
public string Joiner;
|
public string Joiner;
|
||||||
|
}
|
||||||
|
|
||||||
|
[StructLayout(LayoutKind.Sequential)]
|
||||||
|
public struct OnlineParaformerModelConfig
|
||||||
|
{
|
||||||
|
public OnlineParaformerModelConfig()
|
||||||
|
{
|
||||||
|
Encoder = "";
|
||||||
|
Decoder = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
[MarshalAs(UnmanagedType.LPStr)]
|
||||||
|
public string Encoder;
|
||||||
|
|
||||||
|
[MarshalAs(UnmanagedType.LPStr)]
|
||||||
|
public string Decoder;
|
||||||
|
}
|
||||||
|
|
||||||
|
[StructLayout(LayoutKind.Sequential)]
|
||||||
|
public struct OnlineModelConfig
|
||||||
|
{
|
||||||
|
public OnlineModelConfig()
|
||||||
|
{
|
||||||
|
Transducer = new OnlineTransducerModelConfig();
|
||||||
|
Paraformer = new OnlineParaformerModelConfig();
|
||||||
|
Tokens = "";
|
||||||
|
NumThreads = 1;
|
||||||
|
Provider = "cpu";
|
||||||
|
Debug = 0;
|
||||||
|
ModelType = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
public OnlineTransducerModelConfig Transducer;
|
||||||
|
public OnlineParaformerModelConfig Paraformer;
|
||||||
|
|
||||||
[MarshalAs(UnmanagedType.LPStr)]
|
[MarshalAs(UnmanagedType.LPStr)]
|
||||||
public string Tokens;
|
public string Tokens;
|
||||||
@@ -78,7 +108,7 @@ namespace SherpaOnnx
|
|||||||
public OnlineRecognizerConfig()
|
public OnlineRecognizerConfig()
|
||||||
{
|
{
|
||||||
FeatConfig = new FeatureConfig();
|
FeatConfig = new FeatureConfig();
|
||||||
TransducerModelConfig = new OnlineTransducerModelConfig();
|
ModelConfig = new OnlineModelConfig();
|
||||||
DecodingMethod = "greedy_search";
|
DecodingMethod = "greedy_search";
|
||||||
MaxActivePaths = 4;
|
MaxActivePaths = 4;
|
||||||
EnableEndpoint = 0;
|
EnableEndpoint = 0;
|
||||||
@@ -87,7 +117,7 @@ namespace SherpaOnnx
|
|||||||
Rule3MinUtteranceLength = 20.0F;
|
Rule3MinUtteranceLength = 20.0F;
|
||||||
}
|
}
|
||||||
public FeatureConfig FeatConfig;
|
public FeatureConfig FeatConfig;
|
||||||
public OnlineTransducerModelConfig TransducerModelConfig;
|
public OnlineModelConfig ModelConfig;
|
||||||
|
|
||||||
[MarshalAs(UnmanagedType.LPStr)]
|
[MarshalAs(UnmanagedType.LPStr)]
|
||||||
public string DecodingMethod;
|
public string DecodingMethod;
|
||||||
|
|||||||
Reference in New Issue
Block a user