Add microphone example for .Net keyword spotting (#1120)
This commit is contained in:
127
dotnet-examples/keyword-spotting-from-microphone/Program.cs
Normal file
127
dotnet-examples/keyword-spotting-from-microphone/Program.cs
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
// Copyright (c) 2024 Xiaomi Corporation
|
||||||
|
//
|
||||||
|
// This file shows how to do keyword spotting with sherpa-onnx.
|
||||||
|
//
|
||||||
|
// 1. Download a model from
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
|
||||||
|
//
|
||||||
|
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
|
||||||
|
// tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
|
||||||
|
//
|
||||||
|
// 2. Now run it
|
||||||
|
//
|
||||||
|
// dotnet run
|
||||||
|
|
||||||
|
using SherpaOnnx;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
|
using System;
|
||||||
|
|
||||||
|
using PortAudioSharp;
|
||||||
|
|
||||||
|
class KeywordSpotterDemo
|
||||||
|
{
|
||||||
|
static void Main(string[] args)
|
||||||
|
{
|
||||||
|
var config = new KeywordSpotterConfig();
|
||||||
|
config.FeatConfig.SampleRate = 16000;
|
||||||
|
config.FeatConfig.FeatureDim = 80;
|
||||||
|
|
||||||
|
config.ModelConfig.Transducer.Encoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx";
|
||||||
|
config.ModelConfig.Transducer.Decoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx";
|
||||||
|
config.ModelConfig.Transducer.Joiner = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx";
|
||||||
|
|
||||||
|
config.ModelConfig.Tokens = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt";
|
||||||
|
config.ModelConfig.Provider = "cpu";
|
||||||
|
config.ModelConfig.NumThreads = 1;
|
||||||
|
config.ModelConfig.Debug = 1;
|
||||||
|
config.KeywordsFile = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt";
|
||||||
|
|
||||||
|
var kws = new KeywordSpotter(config);
|
||||||
|
|
||||||
|
var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav";
|
||||||
|
|
||||||
|
WaveReader waveReader = new WaveReader(filename);
|
||||||
|
|
||||||
|
Console.WriteLine("----------Use pre-defined keywords----------");
|
||||||
|
|
||||||
|
OnlineStream s = kws.CreateStream();
|
||||||
|
|
||||||
|
Console.WriteLine(PortAudio.VersionInfo.versionText);
|
||||||
|
PortAudio.Initialize();
|
||||||
|
|
||||||
|
Console.WriteLine($"Number of devices: {PortAudio.DeviceCount}");
|
||||||
|
for (int i = 0; i != PortAudio.DeviceCount; ++i)
|
||||||
|
{
|
||||||
|
Console.WriteLine($" Device {i}");
|
||||||
|
DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i);
|
||||||
|
Console.WriteLine($" Name: {deviceInfo.name}");
|
||||||
|
Console.WriteLine($" Max input channels: {deviceInfo.maxInputChannels}");
|
||||||
|
Console.WriteLine($" Default sample rate: {deviceInfo.defaultSampleRate}");
|
||||||
|
}
|
||||||
|
int deviceIndex = PortAudio.DefaultInputDevice;
|
||||||
|
if (deviceIndex == PortAudio.NoDevice)
|
||||||
|
{
|
||||||
|
Console.WriteLine("No default input device found");
|
||||||
|
Environment.Exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex);
|
||||||
|
|
||||||
|
Console.WriteLine();
|
||||||
|
Console.WriteLine($"Use default device {deviceIndex} ({info.name})");
|
||||||
|
|
||||||
|
StreamParameters param = new StreamParameters();
|
||||||
|
param.device = deviceIndex;
|
||||||
|
param.channelCount = 1;
|
||||||
|
param.sampleFormat = SampleFormat.Float32;
|
||||||
|
param.suggestedLatency = info.defaultLowInputLatency;
|
||||||
|
param.hostApiSpecificStreamInfo = IntPtr.Zero;
|
||||||
|
|
||||||
|
PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output,
|
||||||
|
UInt32 frameCount,
|
||||||
|
ref StreamCallbackTimeInfo timeInfo,
|
||||||
|
StreamCallbackFlags statusFlags,
|
||||||
|
IntPtr userData
|
||||||
|
) =>
|
||||||
|
{
|
||||||
|
float[] samples = new float[frameCount];
|
||||||
|
Marshal.Copy(input, samples, 0, (Int32)frameCount);
|
||||||
|
|
||||||
|
s.AcceptWaveform(config.FeatConfig.SampleRate, samples);
|
||||||
|
|
||||||
|
return StreamCallbackResult.Continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: config.FeatConfig.SampleRate,
|
||||||
|
framesPerBuffer: 0,
|
||||||
|
streamFlags: StreamFlags.ClipOff,
|
||||||
|
callback: callback,
|
||||||
|
userData: IntPtr.Zero
|
||||||
|
);
|
||||||
|
|
||||||
|
Console.WriteLine(param);
|
||||||
|
Console.WriteLine("Started! Please speak");
|
||||||
|
|
||||||
|
stream.Start();
|
||||||
|
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
while (kws.IsReady(s))
|
||||||
|
{
|
||||||
|
kws.Decode(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
var result = kws.GetResult(s);
|
||||||
|
if (result.Keyword != "")
|
||||||
|
{
|
||||||
|
Console.WriteLine("Detected: {0}", result.Keyword);
|
||||||
|
}
|
||||||
|
|
||||||
|
Thread.Sleep(200); // ms
|
||||||
|
}
|
||||||
|
|
||||||
|
PortAudio.Terminate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<OutputType>Exe</OutputType>
|
||||||
|
<TargetFramework>net6.0</TargetFramework>
|
||||||
|
<RootNamespace>keyword_spotting_from_microphone</RootNamespace>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="PortAudioSharp2" Version="*" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="..\Common\Common.csproj" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
11
dotnet-examples/keyword-spotting-from-microphone/run.sh
Executable file
11
dotnet-examples/keyword-spotting-from-microphone/run.sh
Executable file
@@ -0,0 +1,11 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
if [ ! -f ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
|
||||||
|
rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
dotnet run -c Release
|
||||||
@@ -27,6 +27,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common", "Common\Common.csp
|
|||||||
EndProject
|
EndProject
|
||||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "keyword-spotting-from-files", "keyword-spotting-from-files\keyword-spotting-from-files.csproj", "{A87EDD31-D654-4C9F-AED7-F6F2825659BD}"
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "keyword-spotting-from-files", "keyword-spotting-from-files\keyword-spotting-from-files.csproj", "{A87EDD31-D654-4C9F-AED7-F6F2825659BD}"
|
||||||
EndProject
|
EndProject
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "keyword-spotting-from-microphone", "keyword-spotting-from-microphone\keyword-spotting-from-microphone.csproj", "{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}"
|
||||||
|
EndProject
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
Debug|Any CPU = Debug|Any CPU
|
Debug|Any CPU = Debug|Any CPU
|
||||||
@@ -81,6 +83,10 @@ Global
|
|||||||
{A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
{A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
{A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
{A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
{A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.Build.0 = Release|Any CPU
|
{A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
EndGlobalSection
|
EndGlobalSection
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
HideSolutionNode = FALSE
|
HideSolutionNode = FALSE
|
||||||
|
|||||||
Reference in New Issue
Block a user