diff --git a/.github/workflows/go.yaml b/.github/workflows/go.yaml index c4bad36d..f0e34fe4 100644 --- a/.github/workflows/go.yaml +++ b/.github/workflows/go.yaml @@ -67,7 +67,7 @@ jobs: ls -lh go mod tidy cat go.mod - go build -x + go build ls -lh git lfs install @@ -87,6 +87,19 @@ jobs: ./run-nemo-ctc.sh rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium + echo "Test Whisper tiny.en" + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en + cd sherpa-onnx-whisper-tiny.en + git lfs pull --include "*.onnx" + cd .. + ./run-whisper.sh + rm -rf sherpa-onnx-whisper-tiny.en + + echo "Test Tdnn yesno" + git clone https://huggingface.co/csukuangfj/sherpa-onnx-tdnn-yesno + ./run-tdnn-yesno.sh + rm -rf sherpa-onnx-tdnn-yesno + - name: Test non-streaming decoding files (Win64) if: matrix.os == 'windows-latest' && matrix.arch == 'x64' shell: bash @@ -121,6 +134,19 @@ jobs: ./run-nemo-ctc.sh rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium + echo "Test Whisper tiny.en" + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en + cd sherpa-onnx-whisper-tiny.en + git lfs pull --include "*.onnx" + cd .. + ./run-whisper.sh + rm -rf sherpa-onnx-whisper-tiny.en + + echo "Test Tdnn yesno" + git clone https://huggingface.co/csukuangfj/sherpa-onnx-tdnn-yesno + ./run-tdnn-yesno.sh + rm -rf sherpa-onnx-tdnn-yesno + - name: Test non-streaming decoding files (Win32) if: matrix.os == 'windows-latest' && matrix.arch == 'x86' shell: bash @@ -139,7 +165,7 @@ jobs: go env go clean - go build -x + go build echo $PWD ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ @@ -163,6 +189,19 @@ jobs: ./run-nemo-ctc.sh rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium + echo "Test Whisper tiny.en" + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en + cd sherpa-onnx-whisper-tiny.en + git lfs pull --include "*.onnx" + cd .. + ./run-whisper.sh + rm -rf sherpa-onnx-whisper-tiny.en + + echo "Test Tdnn yesno" + git clone https://huggingface.co/csukuangfj/sherpa-onnx-tdnn-yesno + ./run-tdnn-yesno.sh + rm -rf sherpa-onnx-tdnn-yesno + - name: Test streaming decoding files (Linux/macOS) if: matrix.os != 'windows-latest' shell: bash @@ -171,7 +210,7 @@ jobs: ls -lh go mod tidy cat go.mod - go build -x + go build ls -lh git lfs install @@ -233,7 +272,7 @@ jobs: go env go clean - go build -x + go build echo $PWD ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ diff --git a/.github/workflows/test-dot-net.yaml b/.github/workflows/test-dot-net.yaml index 7f523cd2..bd688015 100644 --- a/.github/workflows/test-dot-net.yaml +++ b/.github/workflows/test-dot-net.yaml @@ -72,3 +72,5 @@ jobs: ./run-nemo-ctc.sh ./run-paraformer.sh ./run-zipformer.sh + ./run-whisper.sh + ./run-tdnn-yesno.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index 9ca4fbeb..93c00af4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR) project(sherpa-onnx) -set(SHERPA_ONNX_VERSION "1.7.7") +set(SHERPA_ONNX_VERSION "1.7.8") # Disable warning about # diff --git a/dotnet-examples/offline-decode-files/Program.cs b/dotnet-examples/offline-decode-files/Program.cs index 7b729252..9cda985e 100644 --- a/dotnet-examples/offline-decode-files/Program.cs +++ b/dotnet-examples/offline-decode-files/Program.cs @@ -15,18 +15,35 @@ class OfflineDecodeFiles { class Options { + + [Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")] + public int SampleRate { get; set; } + + [Option("feat-dim", Required = false, Default = 80, HelpText = "Dimension of the features used to train the model")] + public int FeatureDim { get; set; } + [Option(Required = false, HelpText = "Path to tokens.txt")] public string Tokens { get; set; } - [Option(Required = false, HelpText = "Path to encoder.onnx. Used only for transducer models")] + [Option(Required = false, Default = "", HelpText = "Path to transducer encoder.onnx. Used only for transducer models")] public string Encoder { get; set; } - [Option(Required = false, HelpText = "Path to decoder.onnx. Used only for transducer models")] + [Option(Required = false, Default = "", HelpText = "Path to transducer decoder.onnx. Used only for transducer models")] public string Decoder { get; set; } - [Option(Required = false, HelpText = "Path to joiner.onnx. Used only for transducer models")] + [Option(Required = false, Default = "",HelpText = "Path to transducer joiner.onnx. Used only for transducer models")] public string Joiner { get; set; } + [Option("whisper-encoder", Required = false, Default = "", HelpText = "Path to whisper encoder.onnx. Used only for whisper models")] + public string WhisperEncoder { get; set; } + + [Option("whisper-decoder", Required = false, Default = "", HelpText = "Path to whisper decoder.onnx. Used only for whisper models")] + public string WhisperDecoder { get; set; } + + [Option("tdnn-model", Required = false, Default = "", HelpText = "Path to tdnn yesno model")] + public string TdnnModel { get; set; } + + [Option(Required = false, HelpText = "Path to model.onnx. Used only for paraformer models")] public string Paraformer { get; set; } @@ -105,6 +122,38 @@ dotnet run \ Please refer to https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html to download pre-trained paraformer models + +# Whisper + +dotnet run \ + --whisper-encoder=./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.onnx \ + --whisper-decoder=./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.onnx \ + --tokens=./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt \ + --files ./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav \ + ./sherpa-onnx-whisper-tiny.en/test_wavs/1.wav \ + ./sherpa-onnx-whisper-tiny.en/test_wavs/8k.wav + +Please refer to +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html +to download pre-trained whisper models. + +# Tdnn yesno + +dotnet run \ + --sample-rate=8000 \ + --feat-dim=23 \ + --tokens=./sherpa-onnx-tdnn-yesno/tokens.txt \ + --tdnn-model=./sherpa-onnx-tdnn-yesno/model-epoch-14-avg-2.onnx \ + --files ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_0_1_0_0_0_1.wav \ + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_0_1_0.wav \ + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_1_1_1.wav \ + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_1_0_0_1.wav \ + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_0_0_1.wav \ + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_1_1_0.wav + +Please refer to +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/yesno/index.html +to download pre-trained Tdnn models. "; var helpText = HelpText.AutoBuild(result, h => @@ -120,6 +169,9 @@ to download pre-trained paraformer models private static void Run(Options options) { OfflineRecognizerConfig config = new OfflineRecognizerConfig(); + config.FeatConfig.SampleRate = options.SampleRate; + config.FeatConfig.FeatureDim = options.FeatureDim; + config.ModelConfig.Tokens = options.Tokens; if (!String.IsNullOrEmpty(options.Encoder)) @@ -137,6 +189,15 @@ to download pre-trained paraformer models { config.ModelConfig.NeMoCtc.Model = options.NeMoCtc; } + else if (!String.IsNullOrEmpty(options.WhisperEncoder)) + { + config.ModelConfig.Whisper.Encoder = options.WhisperEncoder; + config.ModelConfig.Whisper.Decoder = options.WhisperDecoder; + } + else if (!String.IsNullOrEmpty(options.TdnnModel)) + { + config.ModelConfig.Tdnn.Model = options.TdnnModel; + } else { Console.WriteLine("Please provide a model"); diff --git a/dotnet-examples/offline-decode-files/run-tdnn-yesno.sh b/dotnet-examples/offline-decode-files/run-tdnn-yesno.sh new file mode 100755 index 00000000..32d4a487 --- /dev/null +++ b/dotnet-examples/offline-decode-files/run-tdnn-yesno.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +if [ ! -d ./sherpa-onnx-tdnn-yesno ]; then + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-tdnn-yesno + cd sherpa-onnx-tdnn-yesno + git lfs pull --include "*.onnx" + cd .. +fi + +dotnet run \ + --sample-rate=8000 \ + --feat-dim=23 \ + --tokens=./sherpa-onnx-tdnn-yesno/tokens.txt \ + --tdnn-model=./sherpa-onnx-tdnn-yesno/model-epoch-14-avg-2.onnx \ + --files ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_0_1_0_0_0_1.wav \ + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_0_1_0.wav \ + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_1_1_1.wav \ + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_1_0_0_1.wav \ + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_0_0_1.wav \ + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_1_1_0.wav diff --git a/dotnet-examples/offline-decode-files/run-whisper.sh b/dotnet-examples/offline-decode-files/run-whisper.sh new file mode 100755 index 00000000..8ac45282 --- /dev/null +++ b/dotnet-examples/offline-decode-files/run-whisper.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +if [ ! -d ./sherpa-onnx-whisper-tiny.en ]; then + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en + cd sherpa-onnx-whisper-tiny.en + git lfs pull --include "*.onnx" + cd .. +fi + +dotnet run \ + --num-threads=2 \ + --whisper-encoder=./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.onnx \ + --whisper-decoder=./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.onnx \ + --tokens=./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt \ + --files ./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav \ + ./sherpa-onnx-whisper-tiny.en/test_wavs/1.wav \ + ./sherpa-onnx-whisper-tiny.en/test_wavs/8k.wav diff --git a/go-api-examples/non-streaming-decode-files/main.go b/go-api-examples/non-streaming-decode-files/main.go index 4a8a53e3..651b06e1 100644 --- a/go-api-examples/non-streaming-decode-files/main.go +++ b/go-api-examples/non-streaming-decode-files/main.go @@ -15,13 +15,23 @@ func main() { log.SetFlags(log.LstdFlags | log.Lmicroseconds) config := sherpa.OfflineRecognizerConfig{} - config.FeatConfig = sherpa.FeatureConfig{SampleRate: 16000, FeatureDim: 80} - flag.StringVar(&config.ModelConfig.Transducer.Encoder, "encoder", "", "Path to the encoder model") - flag.StringVar(&config.ModelConfig.Transducer.Decoder, "decoder", "", "Path to the decoder model") + flag.IntVar(&config.FeatConfig.SampleRate, "sample-rate", 16000, "Sample rate of the data used to train the model") + flag.IntVar(&config.FeatConfig.FeatureDim, "feat-dim", 80, "Dimension of the features used to train the model") + + flag.StringVar(&config.ModelConfig.Transducer.Encoder, "encoder", "", "Path to the transducer encoder model") + flag.StringVar(&config.ModelConfig.Transducer.Decoder, "decoder", "", "Path to the transducer decoder model") flag.StringVar(&config.ModelConfig.Transducer.Joiner, "joiner", "", "Path to the joiner model") + flag.StringVar(&config.ModelConfig.Paraformer.Model, "paraformer", "", "Path to the paraformer model") + flag.StringVar(&config.ModelConfig.NemoCTC.Model, "nemo-ctc", "", "Path to the NeMo CTC model") + + flag.StringVar(&config.ModelConfig.Whisper.Encoder, "whisper-encoder", "", "Path to the whisper encoder model") + flag.StringVar(&config.ModelConfig.Whisper.Decoder, "whisper-decoder", "", "Path to the whisper decoder model") + + flag.StringVar(&config.ModelConfig.Tdnn.Model, "tdnn-model", "", "Path to the tdnn model") + flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file") flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing") flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message") diff --git a/go-api-examples/non-streaming-decode-files/run-nemo-ctc.sh b/go-api-examples/non-streaming-decode-files/run-nemo-ctc.sh index b895437f..8655e5b6 100755 --- a/go-api-examples/non-streaming-decode-files/run-nemo-ctc.sh +++ b/go-api-examples/non-streaming-decode-files/run-nemo-ctc.sh @@ -5,7 +5,7 @@ # to download the model # before you run this script. # -# You can switch to a different online model if you need +# You can switch to a different offline model if you need ./non-streaming-decode-files \ --nemo-ctc ./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \ diff --git a/go-api-examples/non-streaming-decode-files/run-paraformer.sh b/go-api-examples/non-streaming-decode-files/run-paraformer.sh index 2d1658c2..191a6a41 100755 --- a/go-api-examples/non-streaming-decode-files/run-paraformer.sh +++ b/go-api-examples/non-streaming-decode-files/run-paraformer.sh @@ -5,7 +5,6 @@ # to download the model # before you run this script. # -# You can switch to a different online model if you need ./non-streaming-decode-files \ --paraformer ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \ diff --git a/go-api-examples/non-streaming-decode-files/run-tdnn-yesno.sh b/go-api-examples/non-streaming-decode-files/run-tdnn-yesno.sh new file mode 100755 index 00000000..4253a362 --- /dev/null +++ b/go-api-examples/non-streaming-decode-files/run-tdnn-yesno.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +# Please refer to +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/yesno/index.html +# to download the model +# before you run this script. +# + +./non-streaming-decode-files \ + --sample-rate=8000 \ + --feat-dim=23 \ + --tokens=./sherpa-onnx-tdnn-yesno/tokens.txt \ + --tdnn-model=./sherpa-onnx-tdnn-yesno/model-epoch-14-avg-2.onnx \ + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_0_1_0_0_0_1.wav diff --git a/go-api-examples/non-streaming-decode-files/run-transducer.sh b/go-api-examples/non-streaming-decode-files/run-transducer.sh index 74837089..cec6c84f 100755 --- a/go-api-examples/non-streaming-decode-files/run-transducer.sh +++ b/go-api-examples/non-streaming-decode-files/run-transducer.sh @@ -5,7 +5,7 @@ # to download the model # before you run this script. # -# You can switch to a different online model if you need +# You can switch to a different offline model if you need ./non-streaming-decode-files \ --encoder ./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx \ diff --git a/go-api-examples/non-streaming-decode-files/run-whisper.sh b/go-api-examples/non-streaming-decode-files/run-whisper.sh new file mode 100755 index 00000000..cf723418 --- /dev/null +++ b/go-api-examples/non-streaming-decode-files/run-whisper.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +# Please refer to +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html +# to download the model +# before you run this script. +# +# You can switch to a different offline model if you need + +./non-streaming-decode-files \ + --whisper-encoder=./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.onnx \ + --whisper-decoder=./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.onnx \ + --tokens=./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt \ + ./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav + diff --git a/scripts/dotnet/offline.cs b/scripts/dotnet/offline.cs index 60c1279e..c068bfb8 100644 --- a/scripts/dotnet/offline.cs +++ b/scripts/dotnet/offline.cs @@ -51,6 +51,32 @@ namespace SherpaOnnx public string Model; } + [StructLayout(LayoutKind.Sequential)] + public struct OfflineWhisperModelConfig + { + public OfflineWhisperModelConfig() + { + Encoder = ""; + Decoder = ""; + } + [MarshalAs(UnmanagedType.LPStr)] + public string Encoder; + + [MarshalAs(UnmanagedType.LPStr)] + public string Decoder; + } + + [StructLayout(LayoutKind.Sequential)] + public struct OfflineTdnnModelConfig + { + public OfflineWhisperModelConfig() + { + Model = ""; + } + [MarshalAs(UnmanagedType.LPStr)] + public string Model; + } + [StructLayout(LayoutKind.Sequential)] public struct OfflineLMConfig { @@ -73,6 +99,8 @@ namespace SherpaOnnx Transducer = new OfflineTransducerModelConfig(); Paraformer = new OfflineParaformerModelConfig(); NeMoCtc = new OfflineNemoEncDecCtcModelConfig(); + Whisper = new OfflineWhisperModelConfig(); + Tdnn = new OfflineTdnnModelConfig(); Tokens = ""; NumThreads = 1; Debug = 0; @@ -82,6 +110,8 @@ namespace SherpaOnnx public OfflineTransducerModelConfig Transducer; public OfflineParaformerModelConfig Paraformer; public OfflineNemoEncDecCtcModelConfig NeMoCtc; + public OfflineWhisperModelConfig Whisper; + public OfflineTdnnModelConfig Tdnn; [MarshalAs(UnmanagedType.LPStr)] public string Tokens; diff --git a/scripts/go/sherpa_onnx.go b/scripts/go/sherpa_onnx.go index a5ec4b52..f4fe6998 100644 --- a/scripts/go/sherpa_onnx.go +++ b/scripts/go/sherpa_onnx.go @@ -309,6 +309,15 @@ type OfflineNemoEncDecCtcModelConfig struct { Model string // Path to the model, e.g., model.onnx or model.int8.onnx } +type OfflineWhisperModelConfig struct { + Encoder string + Decoder string +} + +type OfflineTdnnModelConfig struct { + Model string +} + // Configuration for offline LM. type OfflineLMConfig struct { Model string // Path to the model @@ -319,6 +328,8 @@ type OfflineModelConfig struct { Transducer OfflineTransducerModelConfig Paraformer OfflineParaformerModelConfig NemoCTC OfflineNemoEncDecCtcModelConfig + Whisper OfflineWhisperModelConfig + Tdnn OfflineTdnnModelConfig Tokens string // Path to tokens.txt // Number of threads to use for neural network computation @@ -390,6 +401,15 @@ func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer { c.model_config.nemo_ctc.model = C.CString(config.ModelConfig.NemoCTC.Model) defer C.free(unsafe.Pointer(c.model_config.nemo_ctc.model)) + c.model_config.whisper.encoder = C.CString(config.ModelConfig.Whisper.Encoder) + defer C.free(unsafe.Pointer(c.model_config.whisper.encoder)) + + c.model_config.whisper.decoder = C.CString(config.ModelConfig.Whisper.Decoder) + defer C.free(unsafe.Pointer(c.model_config.whisper.decoder)) + + c.model_config.tdnn.decoder = C.CString(config.ModelConfig.Tdnn.Model) + defer C.free(unsafe.Pointer(c.model_config.tdnn.model)) + c.model_config.tokens = C.CString(config.ModelConfig.Tokens) defer C.free(unsafe.Pointer(c.model_config.tokens)) diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index 9d1f3919..cc433d82 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -271,6 +271,9 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer( recognizer_config.model_config.whisper.decoder = SHERPA_ONNX_OR(config->model_config.whisper.decoder, ""); + recognizer_config.model_config.tdnn.model = + SHERPA_ONNX_OR(config->model_config.tdnn.model, ""); + recognizer_config.model_config.tokens = SHERPA_ONNX_OR(config->model_config.tokens, ""); recognizer_config.model_config.num_threads = diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index 5bbd9fe2..d669bce2 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -305,6 +305,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineWhisperModelConfig { const char *decoder; } SherpaOnnxOfflineWhisperModelConfig; +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTdnnModelConfig { + const char *model; +} SherpaOnnxOfflineTdnnModelConfig; + SHERPA_ONNX_API typedef struct SherpaOnnxOfflineLMConfig { const char *model; float scale; @@ -315,6 +319,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { SherpaOnnxOfflineParaformerModelConfig paraformer; SherpaOnnxOfflineNemoEncDecCtcModelConfig nemo_ctc; SherpaOnnxOfflineWhisperModelConfig whisper; + SherpaOnnxOfflineTdnnModelConfig tdnn; const char *tokens; int32_t num_threads;