diff --git a/dotnet-examples/keyword-spotting-from-microphone/Program.cs b/dotnet-examples/keyword-spotting-from-microphone/Program.cs new file mode 100644 index 00000000..cb0c922f --- /dev/null +++ b/dotnet-examples/keyword-spotting-from-microphone/Program.cs @@ -0,0 +1,127 @@ +// Copyright (c) 2024 Xiaomi Corporation +// +// This file shows how to do keyword spotting with sherpa-onnx. +// +// 1. Download a model from +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models +// +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 +// tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 +// +// 2. Now run it +// +// dotnet run + +using SherpaOnnx; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System; + +using PortAudioSharp; + +class KeywordSpotterDemo +{ + static void Main(string[] args) + { + var config = new KeywordSpotterConfig(); + config.FeatConfig.SampleRate = 16000; + config.FeatConfig.FeatureDim = 80; + + config.ModelConfig.Transducer.Encoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx"; + config.ModelConfig.Transducer.Decoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx"; + config.ModelConfig.Transducer.Joiner = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx"; + + config.ModelConfig.Tokens = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt"; + config.ModelConfig.Provider = "cpu"; + config.ModelConfig.NumThreads = 1; + config.ModelConfig.Debug = 1; + config.KeywordsFile = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt"; + + var kws = new KeywordSpotter(config); + + var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"; + + WaveReader waveReader = new WaveReader(filename); + + Console.WriteLine("----------Use pre-defined keywords----------"); + + OnlineStream s = kws.CreateStream(); + + Console.WriteLine(PortAudio.VersionInfo.versionText); + PortAudio.Initialize(); + + Console.WriteLine($"Number of devices: {PortAudio.DeviceCount}"); + for (int i = 0; i != PortAudio.DeviceCount; ++i) + { + Console.WriteLine($" Device {i}"); + DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i); + Console.WriteLine($" Name: {deviceInfo.name}"); + Console.WriteLine($" Max input channels: {deviceInfo.maxInputChannels}"); + Console.WriteLine($" Default sample rate: {deviceInfo.defaultSampleRate}"); + } + int deviceIndex = PortAudio.DefaultInputDevice; + if (deviceIndex == PortAudio.NoDevice) + { + Console.WriteLine("No default input device found"); + Environment.Exit(1); + } + + DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex); + + Console.WriteLine(); + Console.WriteLine($"Use default device {deviceIndex} ({info.name})"); + + StreamParameters param = new StreamParameters(); + param.device = deviceIndex; + param.channelCount = 1; + param.sampleFormat = SampleFormat.Float32; + param.suggestedLatency = info.defaultLowInputLatency; + param.hostApiSpecificStreamInfo = IntPtr.Zero; + + PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output, + UInt32 frameCount, + ref StreamCallbackTimeInfo timeInfo, + StreamCallbackFlags statusFlags, + IntPtr userData + ) => + { + float[] samples = new float[frameCount]; + Marshal.Copy(input, samples, 0, (Int32)frameCount); + + s.AcceptWaveform(config.FeatConfig.SampleRate, samples); + + return StreamCallbackResult.Continue; + }; + + PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: config.FeatConfig.SampleRate, + framesPerBuffer: 0, + streamFlags: StreamFlags.ClipOff, + callback: callback, + userData: IntPtr.Zero + ); + + Console.WriteLine(param); + Console.WriteLine("Started! Please speak"); + + stream.Start(); + + while (true) + { + while (kws.IsReady(s)) + { + kws.Decode(s); + } + + var result = kws.GetResult(s); + if (result.Keyword != "") + { + Console.WriteLine("Detected: {0}", result.Keyword); + } + + Thread.Sleep(200); // ms + } + + PortAudio.Terminate(); + } +} + diff --git a/dotnet-examples/keyword-spotting-from-microphone/keyword-spotting-from-microphone.csproj b/dotnet-examples/keyword-spotting-from-microphone/keyword-spotting-from-microphone.csproj new file mode 100644 index 00000000..b3afae78 --- /dev/null +++ b/dotnet-examples/keyword-spotting-from-microphone/keyword-spotting-from-microphone.csproj @@ -0,0 +1,19 @@ + + + + Exe + net6.0 + keyword_spotting_from_microphone + enable + enable + + + + + + + + + + + diff --git a/dotnet-examples/keyword-spotting-from-microphone/run.sh b/dotnet-examples/keyword-spotting-from-microphone/run.sh new file mode 100755 index 00000000..1f07b9fa --- /dev/null +++ b/dotnet-examples/keyword-spotting-from-microphone/run.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +set -ex + +if [ ! -f ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 + tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 + rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 +fi + +dotnet run -c Release diff --git a/dotnet-examples/sherpa-onnx.sln b/dotnet-examples/sherpa-onnx.sln index b0d2e56c..fa754d8c 100644 --- a/dotnet-examples/sherpa-onnx.sln +++ b/dotnet-examples/sherpa-onnx.sln @@ -27,6 +27,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common", "Common\Common.csp EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "keyword-spotting-from-files", "keyword-spotting-from-files\keyword-spotting-from-files.csproj", "{A87EDD31-D654-4C9F-AED7-F6F2825659BD}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "keyword-spotting-from-microphone", "keyword-spotting-from-microphone\keyword-spotting-from-microphone.csproj", "{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -81,6 +83,10 @@ Global {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Debug|Any CPU.Build.0 = Debug|Any CPU {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.ActiveCfg = Release|Any CPU {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.Build.0 = Release|Any CPU + {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Debug|Any CPU.Build.0 = Debug|Any CPU + {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.ActiveCfg = Release|Any CPU + {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE