diff --git a/.github/scripts/test-dot-net.sh b/.github/scripts/test-dot-net.sh
index 70dc4fb7..1843cdf4 100755
--- a/.github/scripts/test-dot-net.sh
+++ b/.github/scripts/test-dot-net.sh
@@ -2,7 +2,10 @@
cd dotnet-examples/
-cd offline-punctuation
+cd vad-non-streaming-asr-paraformer
+./run.sh
+
+cd ../offline-punctuation
./run.sh
cd ../speaker-identification
diff --git a/.github/workflows/test-dot-net.yaml b/.github/workflows/test-dot-net.yaml
index 500d9e02..8f7c9973 100644
--- a/.github/workflows/test-dot-net.yaml
+++ b/.github/workflows/test-dot-net.yaml
@@ -67,7 +67,7 @@ jobs:
-DCMAKE_BUILD_TYPE=Release \
-DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
-DBUILD_ESPEAK_NG_EXE=OFF \
- -DSHERPA_ONNX_ENABLE_BINARY=OFF \
+ -DSHERPA_ONNX_ENABLE_BINARY=ON \
..
cmake --build . --target install --config Release
@@ -197,6 +197,7 @@ jobs:
cp -v scripts/dotnet/examples/streaming-hlg-decoding.csproj dotnet-examples/streaming-hlg-decoding
cp -v scripts/dotnet/examples/speaker-identification.csproj dotnet-examples/speaker-identification
cp -v scripts/dotnet/examples/offline-punctuation.csproj dotnet-examples/offline-punctuation
+ cp -v scripts/dotnet/examples/vad-non-streaming-asr-paraformer.csproj dotnet-examples/vad-non-streaming-asr-paraformer
ls -lh /tmp
diff --git a/dotnet-examples/offline-punctuation/Program.cs b/dotnet-examples/offline-punctuation/Program.cs
index 83a54fea..d20ff105 100644
--- a/dotnet-examples/offline-punctuation/Program.cs
+++ b/dotnet-examples/offline-punctuation/Program.cs
@@ -17,7 +17,6 @@ using System;
class OfflinePunctuationDemo
{
-
static void Main(string[] args)
{
var config = new OfflinePunctuationConfig();
@@ -42,4 +41,3 @@ class OfflinePunctuationDemo
}
}
}
-
diff --git a/dotnet-examples/sherpa-onnx.sln b/dotnet-examples/sherpa-onnx.sln
index fae0af92..c2685180 100644
--- a/dotnet-examples/sherpa-onnx.sln
+++ b/dotnet-examples/sherpa-onnx.sln
@@ -21,6 +21,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speaker-identification", "s
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-punctuation", "offline-punctuation\offline-punctuation.csproj", "{42D85582-BB63-4259-A4EA-837D66AC078B}"
EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "vad-non-streaming-asr-paraformer", "vad-non-streaming-asr-paraformer\vad-non-streaming-asr-paraformer.csproj", "{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -66,5 +68,9 @@ Global
{42D85582-BB63-4259-A4EA-837D66AC078B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{42D85582-BB63-4259-A4EA-837D66AC078B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{42D85582-BB63-4259-A4EA-837D66AC078B}.Release|Any CPU.Build.0 = Release|Any CPU
+ {8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal
diff --git a/dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs b/dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs
new file mode 100644
index 00000000..8471c024
--- /dev/null
+++ b/dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs
@@ -0,0 +1,62 @@
+// Copyright (c) 2024 Xiaomi Corporation
+//
+// This file shows how to use a silero_vad model with a non-streaming Paraformer
+// for speech recognition.
+using SherpaOnnx;
+using System.Collections.Generic;
+using System;
+
+class VadNonStreamingAsrParaformer
+{
+ static void Main(string[] args)
+ {
+ // please download model files from
+ // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+ OfflineRecognizerConfig config = new OfflineRecognizerConfig();
+ config.ModelConfig.Paraformer.Model = "./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx";
+ config.ModelConfig.Tokens = "./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt";
+ config.ModelConfig.Debug = 0;
+ OfflineRecognizer recognizer = new OfflineRecognizer(config);
+
+ VadModelConfig vadModelConfig = new VadModelConfig();
+ vadModelConfig.SileroVad.Model = "./silero_vad.onnx";
+ vadModelConfig.Debug = 0;
+
+ VoiceActivityDetector vad = new VoiceActivityDetector(vadModelConfig, 60);
+
+ string testWaveFilename = "./lei-jun-test.wav";
+ WaveReader reader = new WaveReader(testWaveFilename);
+
+ int numSamples = reader.Samples.Length;
+ int windowSize = vadModelConfig.SileroVad.WindowSize;
+ int sampleRate = vadModelConfig.SampleRate;
+ int numIter = numSamples / windowSize;
+
+ for (int i = 0; i != numIter; ++i) {
+ int start = i * windowSize;
+ float[] samples = new float[windowSize];
+ Array.Copy(reader.Samples, start, samples, 0, windowSize);
+ vad.AcceptWaveform(samples);
+ if (vad.IsSpeechDetected()) {
+ while (!vad.IsEmpty()) {
+ SpeechSegment segment = vad.Front();
+ float startTime = segment.Start / (float)sampleRate;
+ float duration = segment.Samples.Length / (float)sampleRate;
+
+ OfflineStream stream = recognizer.CreateStream();
+ stream.AcceptWaveform(sampleRate, segment.Samples);
+ recognizer.Decode(stream);
+ String text = stream.Result.Text;
+
+ if (!String.IsNullOrEmpty(text)) {
+ Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime),
+ String.Format("{0:0.00}", startTime+duration), text);
+ }
+
+ vad.Pop();
+ }
+ }
+ }
+ }
+}
+
diff --git a/dotnet-examples/vad-non-streaming-asr-paraformer/WaveReader.cs b/dotnet-examples/vad-non-streaming-asr-paraformer/WaveReader.cs
new file mode 120000
index 00000000..bedfc634
--- /dev/null
+++ b/dotnet-examples/vad-non-streaming-asr-paraformer/WaveReader.cs
@@ -0,0 +1 @@
+../online-decode-files/WaveReader.cs
\ No newline at end of file
diff --git a/dotnet-examples/vad-non-streaming-asr-paraformer/run.sh b/dotnet-examples/vad-non-streaming-asr-paraformer/run.sh
new file mode 100755
index 00000000..cb8ca87f
--- /dev/null
+++ b/dotnet-examples/vad-non-streaming-asr-paraformer/run.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -f ./silero_vad.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+fi
+
+if [ ! -f ./lei-jun-test.wav ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
+fi
+
+if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+
+ tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+ rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+fi
+
+dotnet run
diff --git a/dotnet-examples/vad-non-streaming-asr-paraformer/vad-non-streaming-asr-paraformer.csproj b/dotnet-examples/vad-non-streaming-asr-paraformer/vad-non-streaming-asr-paraformer.csproj
new file mode 100644
index 00000000..3a957bcf
--- /dev/null
+++ b/dotnet-examples/vad-non-streaming-asr-paraformer/vad-non-streaming-asr-paraformer.csproj
@@ -0,0 +1,15 @@
+
+
+
+ Exe
+ net6.0
+ vad_non_streaming_asr_paraformer
+ enable
+ enable
+
+
+
+
+
+
+
diff --git a/java-api-examples/VadNonStreamingParaformer.java b/java-api-examples/VadNonStreamingParaformer.java
index 48e446ae..61c2b53d 100644
--- a/java-api-examples/VadNonStreamingParaformer.java
+++ b/java-api-examples/VadNonStreamingParaformer.java
@@ -39,10 +39,6 @@ public class VadNonStreamingParaformer {
String model = "./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx";
String tokens = "./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt";
- String waveFilename = "./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/3-sichuan.wav";
-
- WaveReader reader = new WaveReader(waveFilename);
-
OfflineParaformerModelConfig paraformer =
OfflineParaformerModelConfig.builder().setModel(model).build();
diff --git a/scripts/dotnet/CircularBuffer.cs b/scripts/dotnet/CircularBuffer.cs
new file mode 100644
index 00000000..9a507123
--- /dev/null
+++ b/scripts/dotnet/CircularBuffer.cs
@@ -0,0 +1,112 @@
+/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
+
+using System.Linq;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+using System.Text;
+using System;
+
+namespace SherpaOnnx
+{
+ public class CircularBuffer : IDisposable
+ {
+ public CircularBuffer(int capacity)
+ {
+ IntPtr h = SherpaOnnxCreateCircularBuffer(capacity);
+ _handle = new HandleRef(this, h);
+ }
+
+ public void Push(float[] data)
+ {
+ SherpaOnnxCircularBufferPush(_handle.Handle, data, data.Length);
+ }
+
+ public float[] Get(int startIndex, int n)
+ {
+ IntPtr p = SherpaOnnxCircularBufferGet(_handle.Handle, startIndex, n);
+
+ float[] ans = new float[n];
+ Marshal.Copy(p, ans, 0, n);
+
+ SherpaOnnxCircularBufferFree(p);
+
+ return ans;
+ }
+
+ public void Pop(int n)
+ {
+ SherpaOnnxCircularBufferPop(_handle.Handle, n);
+ }
+
+ public int Size
+ {
+ get
+ {
+ return SherpaOnnxCircularBufferSize(_handle.Handle);
+ }
+ }
+
+ public int Head
+ {
+ get
+ {
+ return SherpaOnnxCircularBufferHead(_handle.Handle);
+ }
+ }
+
+ public void Reset()
+ {
+ SherpaOnnxCircularBufferReset(_handle.Handle);
+ }
+
+ public void Dispose()
+ {
+ Cleanup();
+ // Prevent the object from being placed on the
+ // finalization queue
+ System.GC.SuppressFinalize(this);
+ }
+
+ ~CircularBuffer()
+ {
+ Cleanup();
+ }
+
+ private void Cleanup()
+ {
+ SherpaOnnxDestroyCircularBuffer(_handle.Handle);
+
+ // Don't permit the handle to be used again.
+ _handle = new HandleRef(this, IntPtr.Zero);
+ }
+
+ private HandleRef _handle;
+
+ [DllImport(Dll.Filename)]
+ private static extern IntPtr SherpaOnnxCreateCircularBuffer(int capacity);
+
+ [DllImport(Dll.Filename)]
+ private static extern void SherpaOnnxDestroyCircularBuffer(IntPtr handle);
+
+ [DllImport(Dll.Filename)]
+ private static extern void SherpaOnnxCircularBufferPush(IntPtr handle, float[] p, int n);
+
+ [DllImport(Dll.Filename)]
+ private static extern IntPtr SherpaOnnxCircularBufferGet(IntPtr handle, int startIndex, int n);
+
+ [DllImport(Dll.Filename)]
+ private static extern void SherpaOnnxCircularBufferFree(IntPtr p);
+
+ [DllImport(Dll.Filename)]
+ private static extern void SherpaOnnxCircularBufferPop(IntPtr handle, int n);
+
+ [DllImport(Dll.Filename)]
+ private static extern int SherpaOnnxCircularBufferSize(IntPtr handle);
+
+ [DllImport(Dll.Filename)]
+ private static extern int SherpaOnnxCircularBufferHead(IntPtr handle);
+
+ [DllImport(Dll.Filename)]
+ private static extern void SherpaOnnxCircularBufferReset(IntPtr handle);
+ }
+}
diff --git a/scripts/dotnet/SileroVadModelConfig.cs b/scripts/dotnet/SileroVadModelConfig.cs
new file mode 100644
index 00000000..2b02672f
--- /dev/null
+++ b/scripts/dotnet/SileroVadModelConfig.cs
@@ -0,0 +1,34 @@
+/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
+
+using System.Linq;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+using System.Text;
+using System;
+
+namespace SherpaOnnx
+{
+ [StructLayout(LayoutKind.Sequential)]
+ public struct SileroVadModelConfig
+ {
+ public SileroVadModelConfig()
+ {
+ Model = "";
+ Threshold = 0.5F;
+ MinSilenceDuration = 0.5F;
+ MinSpeechDuration = 0.25F;
+ WindowSize = 512;
+ }
+
+ [MarshalAs(UnmanagedType.LPStr)]
+ public string Model;
+
+ public float Threshold;
+
+ public float MinSilenceDuration;
+
+ public float MinSpeechDuration;
+
+ public int WindowSize;
+ }
+}
diff --git a/scripts/dotnet/SpeechSegment.cs b/scripts/dotnet/SpeechSegment.cs
new file mode 100644
index 00000000..1128e705
--- /dev/null
+++ b/scripts/dotnet/SpeechSegment.cs
@@ -0,0 +1,47 @@
+/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
+
+using System.Linq;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+using System.Text;
+using System;
+
+namespace SherpaOnnx
+{
+ public class SpeechSegment
+ {
+ public SpeechSegment(IntPtr handle)
+ {
+ Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));
+
+ _start = impl.Start;
+
+ unsafe
+ {
+ float* t = (float*)impl.Samples;
+ _samples = new float[impl.Count];
+ fixed (float* pTarget = _samples)
+ {
+ for (int i = 0; i < impl.Count; i++)
+ {
+ pTarget[i] = t[i];
+ }
+ }
+ }
+ }
+
+ public int _start;
+ public int Start => _start;
+
+ private float[] _samples;
+ public float[] Samples => _samples;
+
+ [StructLayout(LayoutKind.Sequential)]
+ struct Impl
+ {
+ public int Start;
+ public IntPtr Samples;
+ public int Count;
+ }
+ }
+}
diff --git a/scripts/dotnet/VadModelConfig.cs b/scripts/dotnet/VadModelConfig.cs
new file mode 100644
index 00000000..87fca71d
--- /dev/null
+++ b/scripts/dotnet/VadModelConfig.cs
@@ -0,0 +1,35 @@
+/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
+
+using System.Linq;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+using System.Text;
+using System;
+
+namespace SherpaOnnx
+{
+ [StructLayout(LayoutKind.Sequential)]
+ public struct VadModelConfig
+ {
+ public VadModelConfig()
+ {
+ SileroVad = new SileroVadModelConfig();
+ SampleRate = 16000;
+ NumThreads = 1;
+ Provider = "cpu";
+ Debug = 0;
+ }
+
+ public SileroVadModelConfig SileroVad;
+
+ public int SampleRate;
+
+ public int NumThreads;
+
+ [MarshalAs(UnmanagedType.LPStr)]
+ public string Provider;
+
+ public int Debug;
+ }
+}
+
diff --git a/scripts/dotnet/VoiceActivityDetector.cs b/scripts/dotnet/VoiceActivityDetector.cs
new file mode 100644
index 00000000..44ecc2aa
--- /dev/null
+++ b/scripts/dotnet/VoiceActivityDetector.cs
@@ -0,0 +1,115 @@
+/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
+
+using System.Linq;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+using System.Text;
+using System;
+
+namespace SherpaOnnx
+{
+ public class VoiceActivityDetector : IDisposable
+ {
+ public VoiceActivityDetector(VadModelConfig config, float bufferSizeInSeconds)
+ {
+ IntPtr h = SherpaOnnxCreateVoiceActivityDetector(ref config, bufferSizeInSeconds);
+ _handle = new HandleRef(this, h);
+ }
+
+ public void AcceptWaveform(float[] samples)
+ {
+ SherpaOnnxVoiceActivityDetectorAcceptWaveform(_handle.Handle, samples, samples.Length);
+ }
+
+ public bool IsEmpty()
+ {
+ return SherpaOnnxVoiceActivityDetectorEmpty(_handle.Handle) == 1;
+ }
+
+ public bool IsSpeechDetected()
+ {
+ return SherpaOnnxVoiceActivityDetectorDetected(_handle.Handle) == 1;
+ }
+
+ public void Pop()
+ {
+ SherpaOnnxVoiceActivityDetectorPop(_handle.Handle);
+ }
+
+ public SpeechSegment Front()
+ {
+ IntPtr p = SherpaOnnxVoiceActivityDetectorFront(_handle.Handle);
+
+ SpeechSegment segment = new SpeechSegment(p);
+
+ SherpaOnnxDestroySpeechSegment(p);
+
+ return segment;
+ }
+
+ public void Clear()
+ {
+ SherpaOnnxVoiceActivityDetectorClear(_handle.Handle);
+ }
+
+ public void Reset()
+ {
+ SherpaOnnxVoiceActivityDetectorReset(_handle.Handle);
+ }
+
+ public void Dispose()
+ {
+ Cleanup();
+ // Prevent the object from being placed on the
+ // finalization queue
+ System.GC.SuppressFinalize(this);
+ }
+
+ ~VoiceActivityDetector()
+ {
+ Cleanup();
+ }
+
+ private void Cleanup()
+ {
+ SherpaOnnxDestroyVoiceActivityDetector(_handle.Handle);
+
+ // Don't permit the handle to be used again.
+ _handle = new HandleRef(this, IntPtr.Zero);
+ }
+
+ private HandleRef _handle;
+
+ [DllImport(Dll.Filename)]
+ private static extern IntPtr SherpaOnnxCreateVoiceActivityDetector(ref VadModelConfig config, float bufferSizeInSeconds);
+
+ [DllImport(Dll.Filename)]
+ private static extern void SherpaOnnxDestroyVoiceActivityDetector(IntPtr handle);
+
+ [DllImport(Dll.Filename)]
+ private static extern void SherpaOnnxVoiceActivityDetectorAcceptWaveform(IntPtr handle, float[] samples, int n);
+
+ [DllImport(Dll.Filename)]
+ private static extern int SherpaOnnxVoiceActivityDetectorEmpty(IntPtr handle);
+
+ [DllImport(Dll.Filename)]
+ private static extern int SherpaOnnxVoiceActivityDetectorDetected(IntPtr handle);
+
+ [DllImport(Dll.Filename)]
+ private static extern void SherpaOnnxVoiceActivityDetectorPop(IntPtr handle);
+
+ [DllImport(Dll.Filename)]
+ private static extern void SherpaOnnxVoiceActivityDetectorClear(IntPtr handle);
+
+ [DllImport(Dll.Filename)]
+ private static extern IntPtr SherpaOnnxVoiceActivityDetectorFront(IntPtr handle);
+
+ [DllImport(Dll.Filename)]
+ private static extern void SherpaOnnxDestroySpeechSegment(IntPtr segment);
+
+ [DllImport(Dll.Filename)]
+ private static extern void SherpaOnnxVoiceActivityDetectorReset(IntPtr handle);
+
+ }
+}
+
diff --git a/scripts/dotnet/examples/vad-non-streaming-asr-paraformer.csproj b/scripts/dotnet/examples/vad-non-streaming-asr-paraformer.csproj
new file mode 100644
index 00000000..4870735f
--- /dev/null
+++ b/scripts/dotnet/examples/vad-non-streaming-asr-paraformer.csproj
@@ -0,0 +1,19 @@
+
+
+
+ Exe
+ net6.0
+ vad_non_streaming_asr_paraformer
+ enable
+ enable
+
+
+
+ /tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json
+
+
+
+
+
+
+
diff --git a/scripts/dotnet/sherpa-onnx.csproj.in b/scripts/dotnet/sherpa-onnx.csproj.in
index a6f83a64..60ae4187 100644
--- a/scripts/dotnet/sherpa-onnx.csproj.in
+++ b/scripts/dotnet/sherpa-onnx.csproj.in
@@ -4,7 +4,7 @@
README.md
Library
10.0
- netstandard2.0;netcoreapp3.1;net6.0;net7.0
+ netstandard2.0
linux-x64;osx-x64;win-x64
true
sherpa-onnx
diff --git a/scripts/dotnet/sherpa-onnx.csproj.runtime.in b/scripts/dotnet/sherpa-onnx.csproj.runtime.in
index 335254e1..f90ae2c3 100644
--- a/scripts/dotnet/sherpa-onnx.csproj.runtime.in
+++ b/scripts/dotnet/sherpa-onnx.csproj.runtime.in
@@ -3,7 +3,7 @@
Apache-2.0
README.md
Library
- netstandard2.0;netcoreapp3.1;net6.0;net7.0
+ netstandard2.0
{{ dotnet_rid }}
sherpa-onnx
{{ version }}