diff --git a/.github/scripts/test-dot-net.sh b/.github/scripts/test-dot-net.sh index 84516254..a4559f7b 100755 --- a/.github/scripts/test-dot-net.sh +++ b/.github/scripts/test-dot-net.sh @@ -2,7 +2,10 @@ cd dotnet-examples/ -cd ./online-decode-files +cd ./keyword-spotting-from-files +./run.sh + +cd ../online-decode-files ./run-transducer-itn.sh ./run-zipformer2-ctc.sh ./run-transducer.sh diff --git a/.github/scripts/test-offline-transducer.sh b/.github/scripts/test-offline-transducer.sh index eadc0f49..ee012de3 100755 --- a/.github/scripts/test-offline-transducer.sh +++ b/.github/scripts/test-offline-transducer.sh @@ -139,7 +139,7 @@ time $EXE \ time $EXE \ --tokens=$repo/tokens.txt \ --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \ - --decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \ + --decoder=$repo/decoder-epoch-99-avg-1.onnx \ --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \ --num-threads=2 \ $repo/test_wavs/0.wav \ @@ -172,7 +172,7 @@ time $EXE \ time $EXE \ --tokens=$repo/tokens.txt \ --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \ - --decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \ + --decoder=$repo/decoder-epoch-99-avg-1.onnx \ --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \ --num-threads=2 \ $repo/test_wavs/0.wav \ diff --git a/.github/scripts/test-online-transducer.sh b/.github/scripts/test-online-transducer.sh index 7616b18e..ceb2be47 100755 --- a/.github/scripts/test-online-transducer.sh +++ b/.github/scripts/test-online-transducer.sh @@ -86,7 +86,7 @@ for wave in ${waves[@]}; do time $EXE \ --tokens=$repo/tokens.txt \ --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \ - --decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \ + --decoder=$repo/decoder-epoch-99-avg-1.onnx \ --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \ --num-threads=2 \ $wave @@ -126,7 +126,7 @@ for wave in ${waves[@]}; do time $EXE \ --tokens=$repo/tokens.txt \ --encoder=$repo/encoder-epoch-11-avg-1.int8.onnx \ - --decoder=$repo/decoder-epoch-11-avg-1.int8.onnx \ + --decoder=$repo/decoder-epoch-11-avg-1.onnx \ --joiner=$repo/joiner-epoch-11-avg-1.int8.onnx \ --num-threads=2 \ $wave @@ -168,7 +168,7 @@ for wave in ${waves[@]}; do time $EXE \ --tokens=$repo/tokens.txt \ --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \ - --decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \ + --decoder=$repo/decoder-epoch-99-avg-1.onnx \ --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \ --num-threads=2 \ $wave @@ -210,7 +210,7 @@ for wave in ${waves[@]}; do time $EXE \ --tokens=$repo/tokens.txt \ --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \ - --decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \ + --decoder=$repo/decoder-epoch-99-avg-1.onnx \ --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \ --num-threads=2 \ $wave @@ -231,7 +231,7 @@ if [ $EXE == "sherpa-onnx-ffmpeg" ]; then time $EXE \ $repo/tokens.txt \ $repo/encoder-epoch-99-avg-1.int8.onnx \ - $repo/decoder-epoch-99-avg-1.int8.onnx \ + $repo/decoder-epoch-99-avg-1.onnx \ $repo/joiner-epoch-99-avg-1.int8.onnx \ https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/resolve/main/test_wavs/4.wav \ 2 @@ -271,7 +271,7 @@ for wave in ${waves[@]}; do time $EXE \ --tokens=$repo/tokens.txt \ --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \ - --decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \ + --decoder=$repo/decoder-epoch-99-avg-1.onnx \ --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \ --num-threads=2 \ $wave diff --git a/.github/scripts/test-python.sh b/.github/scripts/test-python.sh index 7bc7f0df..68104baa 100755 --- a/.github/scripts/test-python.sh +++ b/.github/scripts/test-python.sh @@ -125,12 +125,15 @@ for name in ${wenet_models[@]}; do repo=$name log "Start testing ${repo_url}" - python3 ./python-api-examples/offline-decode-files.py \ - --tokens=$repo/tokens.txt \ - --wenet-ctc=$repo/model.onnx \ - $repo/test_wavs/0.wav \ - $repo/test_wavs/1.wav \ - $repo/test_wavs/8k.wav + if false; then + # offline wenet ctc models are not supported by onnxruntime >= 1.18 + python3 ./python-api-examples/offline-decode-files.py \ + --tokens=$repo/tokens.txt \ + --wenet-ctc=$repo/model.onnx \ + $repo/test_wavs/0.wav \ + $repo/test_wavs/1.wav \ + $repo/test_wavs/8k.wav + fi python3 ./python-api-examples/online-decode-files.py \ --tokens=$repo/tokens.txt \ diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f8c76d6..6672c41b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ ## 1.10.13 * Update onnxruntime from 1.17.1 to 1.18.0 +* Add C# API for Keyword spotting ## 1.10.12 diff --git a/CMakeLists.txt b/CMakeLists.txt index 3d4d20d1..203b8a56 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,11 +21,6 @@ if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0") cmake_policy(SET CMP0135 NEW) endif() - - - - - option(SHERPA_ONNX_ENABLE_PYTHON "Whether to build Python" OFF) option(SHERPA_ONNX_ENABLE_TESTS "Whether to build tests" OFF) option(SHERPA_ONNX_ENABLE_CHECK "Whether to build with assert" OFF) diff --git a/dotnet-examples/keyword-spotting-from-files/Program.cs b/dotnet-examples/keyword-spotting-from-files/Program.cs new file mode 100644 index 00000000..2fea260d --- /dev/null +++ b/dotnet-examples/keyword-spotting-from-files/Program.cs @@ -0,0 +1,99 @@ +// Copyright (c) 2024 Xiaomi Corporation +// +// This file shows how to do keyword spotting with sherpa-onnx. +// +// 1. Download a model from +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models +// +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 +// tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 +// +// 2. Now run it +// +// dotnet run + +using SherpaOnnx; +using System.Collections.Generic; +using System; + +class KeywordSpotterDemo +{ + static void Main(string[] args) + { + var config = new KeywordSpotterConfig(); + config.FeatConfig.SampleRate = 16000; + config.FeatConfig.FeatureDim = 80; + + config.ModelConfig.Transducer.Encoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx"; + config.ModelConfig.Transducer.Decoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx"; + config.ModelConfig.Transducer.Joiner = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx"; + + config.ModelConfig.Tokens = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt"; + config.ModelConfig.Provider = "cpu"; + config.ModelConfig.NumThreads = 1; + config.ModelConfig.Debug = 1; + config.KeywordsFile = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt"; + + var kws = new KeywordSpotter(config); + + var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"; + + WaveReader waveReader = new WaveReader(filename); + + Console.WriteLine("----------Use pre-defined keywords----------"); + + OnlineStream s = kws.CreateStream(); + s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); + + float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)]; + s.AcceptWaveform(waveReader.SampleRate, tailPadding); + s.InputFinished(); + + while (kws.IsReady(s)) + { + kws.Decode(s); + var result = kws.GetResult(s); + if (result.Keyword != "") + { + Console.WriteLine("Detected: {0}", result.Keyword); + } + } + + Console.WriteLine("----------Use pre-defined keywords + add a new keyword----------"); + s = kws.CreateStream("y ǎn y uán @演员"); + s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); + + s.AcceptWaveform(waveReader.SampleRate, tailPadding); + s.InputFinished(); + + while (kws.IsReady(s)) + { + kws.Decode(s); + var result = kws.GetResult(s); + if (result.Keyword != "") + { + Console.WriteLine("Detected: {0}", result.Keyword); + } + } + + Console.WriteLine("----------Use pre-defined keywords + add 2 new keywords----------"); + + // Note keywords are separated by / + s = kws.CreateStream("y ǎn y uán @演员/zh ī m íng @知名"); + s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); + + s.AcceptWaveform(waveReader.SampleRate, tailPadding); + s.InputFinished(); + + while (kws.IsReady(s)) + { + kws.Decode(s); + var result = kws.GetResult(s); + if (result.Keyword != "") + { + Console.WriteLine("Detected: {0}", result.Keyword); + } + } + } +} + diff --git a/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj b/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj new file mode 100644 index 00000000..992f8e0e --- /dev/null +++ b/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj @@ -0,0 +1,15 @@ + + + + Exe + net6.0 + keyword_spotting_from_files + enable + enable + + + + + + + diff --git a/dotnet-examples/keyword-spotting-from-files/run.sh b/dotnet-examples/keyword-spotting-from-files/run.sh new file mode 100755 index 00000000..1f07b9fa --- /dev/null +++ b/dotnet-examples/keyword-spotting-from-files/run.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +set -ex + +if [ ! -f ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 + tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 + rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 +fi + +dotnet run -c Release diff --git a/dotnet-examples/online-decode-files/run-transducer-itn.sh b/dotnet-examples/online-decode-files/run-transducer-itn.sh index 0c81fc7d..de3445da 100755 --- a/dotnet-examples/online-decode-files/run-transducer-itn.sh +++ b/dotnet-examples/online-decode-files/run-transducer-itn.sh @@ -22,7 +22,7 @@ fi dotnet run -c Release \ --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \ --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \ - --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx \ + --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \ --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \ --rule-fsts ./itn_zh_number.fst \ --decoding-method greedy_search \ diff --git a/dotnet-examples/online-decode-files/run-transducer.sh b/dotnet-examples/online-decode-files/run-transducer.sh index b3ca7c7c..82435429 100755 --- a/dotnet-examples/online-decode-files/run-transducer.sh +++ b/dotnet-examples/online-decode-files/run-transducer.sh @@ -14,7 +14,7 @@ fi dotnet run -c Release \ --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \ --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \ - --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx \ + --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \ --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \ --decoding-method greedy_search \ --files ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav \ diff --git a/dotnet-examples/sherpa-onnx.sln b/dotnet-examples/sherpa-onnx.sln index d844c503..b0d2e56c 100644 --- a/dotnet-examples/sherpa-onnx.sln +++ b/dotnet-examples/sherpa-onnx.sln @@ -25,6 +25,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "vad-non-streaming-asr-paraf EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common", "Common\Common.csproj", "{401E963F-E25A-43CE-987D-8DB2D4715756}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "keyword-spotting-from-files", "keyword-spotting-from-files\keyword-spotting-from-files.csproj", "{A87EDD31-D654-4C9F-AED7-F6F2825659BD}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -75,6 +77,10 @@ Global {401E963F-E25A-43CE-987D-8DB2D4715756}.Debug|Any CPU.Build.0 = Debug|Any CPU {401E963F-E25A-43CE-987D-8DB2D4715756}.Release|Any CPU.ActiveCfg = Release|Any CPU {401E963F-E25A-43CE-987D-8DB2D4715756}.Release|Any CPU.Build.0 = Release|Any CPU + {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Debug|Any CPU.Build.0 = Debug|Any CPU + {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.ActiveCfg = Release|Any CPU + {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/dotnet-examples/speech-recognition-from-microphone/run-transducer.sh b/dotnet-examples/speech-recognition-from-microphone/run-transducer.sh index e6184b4f..ecafb292 100755 --- a/dotnet-examples/speech-recognition-from-microphone/run-transducer.sh +++ b/dotnet-examples/speech-recognition-from-microphone/run-transducer.sh @@ -18,5 +18,5 @@ fi dotnet run -c Release \ --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \ --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \ - --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx \ + --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \ --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx diff --git a/flutter/sherpa_onnx/example/example.md b/flutter/sherpa_onnx/example/example.md index f7e9fed4..02b0e22f 100644 --- a/flutter/sherpa_onnx/example/example.md +++ b/flutter/sherpa_onnx/example/example.md @@ -5,6 +5,7 @@ | Functions | URL | Supported Platforms| |---|---|---| |Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter-examples/streaming_asr)| Android, macOS, Windows| +|Speech synthesis| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter-examples/tts)| Android, iOS, Linux, macOS, Windows| ## Pure dart-examples diff --git a/scripts/dotnet/.gitignore b/scripts/dotnet/.gitignore index 6d09be53..4dda52bb 100644 --- a/scripts/dotnet/.gitignore +++ b/scripts/dotnet/.gitignore @@ -1,8 +1,10 @@ all macos-arm64 macos-x64 -linux -windows +linux-x64 +linux-arm64 +windows-arm64 windows-x64 windows-x86 packages +tmp diff --git a/scripts/dotnet/KeywordResult.cs b/scripts/dotnet/KeywordResult.cs new file mode 100644 index 00000000..13ed4e79 --- /dev/null +++ b/scripts/dotnet/KeywordResult.cs @@ -0,0 +1,44 @@ +/// Copyright (c) 2024 Xiaomi Corporation + +using System; +using System.Runtime.InteropServices; +using System.Text; + +namespace SherpaOnnx +{ + public class KeywordResult + { + public KeywordResult(IntPtr handle) + { + Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl)); + + // PtrToStringUTF8() requires .net standard 2.1 + // _keyword = Marshal.PtrToStringUTF8(impl.Keyword); + + int length = 0; + + unsafe + { + byte* buffer = (byte*)impl.Keyword; + while (*buffer != 0) + { + ++buffer; + length += 1; + } + } + + byte[] stringBuffer = new byte[length]; + Marshal.Copy(impl.Keyword, stringBuffer, 0, length); + _keyword = Encoding.UTF8.GetString(stringBuffer); + } + + [StructLayout(LayoutKind.Sequential)] + struct Impl + { + public IntPtr Keyword; + } + + private String _keyword; + public String Keyword => _keyword; + } +} diff --git a/scripts/dotnet/KeywordSpotter.cs b/scripts/dotnet/KeywordSpotter.cs new file mode 100644 index 00000000..fc80e31b --- /dev/null +++ b/scripts/dotnet/KeywordSpotter.cs @@ -0,0 +1,119 @@ +/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang) +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; + +namespace SherpaOnnx +{ + // please see + // https://www.mono-project.com/docs/advanced/pinvoke/#gc-safe-pinvoke-code + // https://www.mono-project.com/docs/advanced/pinvoke/#properly-disposing-of-resources + public class KeywordSpotter : IDisposable + { + public KeywordSpotter(KeywordSpotterConfig config) + { + IntPtr h = CreateKeywordSpotter(ref config); + _handle = new HandleRef(this, h); + } + + public OnlineStream CreateStream() + { + IntPtr p = CreateKeywordStream(_handle.Handle); + return new OnlineStream(p); + } + + public OnlineStream CreateStream(string keywords) + { + byte[] utf8Bytes = Encoding.UTF8.GetBytes(keywords); + IntPtr p = CreateKeywordStreamWithKeywords(_handle.Handle, utf8Bytes); + return new OnlineStream(p); + } + + /// Return true if the passed stream is ready for decoding. + public bool IsReady(OnlineStream stream) + { + return IsReady(_handle.Handle, stream.Handle) != 0; + } + + /// You have to ensure that IsReady(stream) returns true before + /// you call this method + public void Decode(OnlineStream stream) + { + Decode(_handle.Handle, stream.Handle); + } + + // The caller should ensure all passed streams are ready for decoding. + public void Decode(IEnumerable streams) + { + // TargetFramework=net20 does not support System.Linq + // IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray(); + List list = new List(); + foreach (OnlineStream s in streams) + { + list.Add(s.Handle); + } + + IntPtr[] ptrs = list.ToArray(); + Decode(_handle.Handle, ptrs, ptrs.Length); + } + + public KeywordResult GetResult(OnlineStream stream) + { + IntPtr h = GetResult(_handle.Handle, stream.Handle); + KeywordResult result = new KeywordResult(h); + DestroyResult(h); + return result; + } + + public void Dispose() + { + Cleanup(); + // Prevent the object from being placed on the + // finalization queue + System.GC.SuppressFinalize(this); + } + + ~KeywordSpotter() + { + Cleanup(); + } + + private void Cleanup() + { + DestroyKeywordSpotter(_handle.Handle); + + // Don't permit the handle to be used again. + _handle = new HandleRef(this, IntPtr.Zero); + } + + private HandleRef _handle; + + [DllImport(Dll.Filename)] + private static extern IntPtr CreateKeywordSpotter(ref KeywordSpotterConfig config); + + [DllImport(Dll.Filename)] + private static extern void DestroyKeywordSpotter(IntPtr handle); + + [DllImport(Dll.Filename)] + private static extern IntPtr CreateKeywordStream(IntPtr handle); + + [DllImport(Dll.Filename)] + private static extern IntPtr CreateKeywordStreamWithKeywords(IntPtr handle, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Keywords); + + [DllImport(Dll.Filename, EntryPoint = "IsKeywordStreamReady")] + private static extern int IsReady(IntPtr handle, IntPtr stream); + + [DllImport(Dll.Filename, EntryPoint = "DecodeKeywordStream")] + private static extern void Decode(IntPtr handle, IntPtr stream); + + [DllImport(Dll.Filename, EntryPoint = "DecodeMultipleKeywordStreams")] + private static extern void Decode(IntPtr handle, IntPtr[] streams, int n); + + [DllImport(Dll.Filename, EntryPoint = "GetKeywordResult")] + private static extern IntPtr GetResult(IntPtr handle, IntPtr stream); + + [DllImport(Dll.Filename, EntryPoint = "DestroyKeywordResult")] + private static extern void DestroyResult(IntPtr result); + } +} diff --git a/scripts/dotnet/KeywordSpotterConfig.cs b/scripts/dotnet/KeywordSpotterConfig.cs new file mode 100644 index 00000000..125afb71 --- /dev/null +++ b/scripts/dotnet/KeywordSpotterConfig.cs @@ -0,0 +1,32 @@ +/// Copyright (c) 2024 Xiaomi Corporation + +using System.Runtime.InteropServices; + +namespace SherpaOnnx +{ + [StructLayout(LayoutKind.Sequential)] + public struct KeywordSpotterConfig + { + public KeywordSpotterConfig() + { + FeatConfig = new FeatureConfig(); + ModelConfig = new OnlineModelConfig(); + + MaxActivePaths = 4; + NumTrailingBlanks = 1; + KeywordsScore = 1.0F; + KeywordsThreshold = 0.25F; + KeywordsFile = ""; + } + public FeatureConfig FeatConfig; + public OnlineModelConfig ModelConfig; + + public int MaxActivePaths; + public int NumTrailingBlanks; + public float KeywordsScore; + public float KeywordsThreshold; + + [MarshalAs(UnmanagedType.LPStr)] + public string KeywordsFile; + } +} diff --git a/sherpa-onnx/python/tests/test_keyword_spotter.py b/sherpa-onnx/python/tests/test_keyword_spotter.py index bdefa5d1..f4d79830 100755 --- a/sherpa-onnx/python/tests/test_keyword_spotter.py +++ b/sherpa-onnx/python/tests/test_keyword_spotter.py @@ -50,12 +50,12 @@ class TestKeywordSpotter(unittest.TestCase): for use_int8 in [True, False]: if use_int8: encoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx" - decoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx" + decoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx" joiner = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx" else: - encoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx" - decoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx" - joiner = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx" + encoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx" + decoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx" + joiner = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx" tokens = ( f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/tokens.txt" @@ -109,12 +109,12 @@ class TestKeywordSpotter(unittest.TestCase): for use_int8 in [True, False]: if use_int8: encoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx" - decoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx" + decoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx" joiner = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx" else: - encoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx" - decoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx" - joiner = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx" + encoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx" + decoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx" + joiner = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx" tokens = ( f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt" diff --git a/sherpa-onnx/python/tests/test_offline_recognizer.py b/sherpa-onnx/python/tests/test_offline_recognizer.py index 159fb47c..32f702c3 100755 --- a/sherpa-onnx/python/tests/test_offline_recognizer.py +++ b/sherpa-onnx/python/tests/test_offline_recognizer.py @@ -52,7 +52,7 @@ class TestOfflineRecognizer(unittest.TestCase): for use_int8 in [True, False]: if use_int8: encoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.int8.onnx" - decoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.int8.onnx" + decoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx" joiner = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.int8.onnx" else: encoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx" @@ -85,7 +85,7 @@ class TestOfflineRecognizer(unittest.TestCase): for use_int8 in [True, False]: if use_int8: encoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.int8.onnx" - decoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.int8.onnx" + decoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx" joiner = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.int8.onnx" else: encoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx" diff --git a/sherpa-onnx/python/tests/test_online_recognizer.py b/sherpa-onnx/python/tests/test_online_recognizer.py index 9193fb0f..5319f41e 100755 --- a/sherpa-onnx/python/tests/test_online_recognizer.py +++ b/sherpa-onnx/python/tests/test_online_recognizer.py @@ -50,7 +50,7 @@ class TestOnlineRecognizer(unittest.TestCase): for use_int8 in [True, False]: if use_int8: encoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx" - decoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx" + decoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx" joiner = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx" else: encoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx" @@ -90,7 +90,7 @@ class TestOnlineRecognizer(unittest.TestCase): for use_int8 in [True, False]: if use_int8: encoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx" - decoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx" + decoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx" joiner = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx" else: encoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx"