Pascal API for VAD (#1249)

This commit is contained in:
Fangjun Kuang
2024-08-13 16:16:51 +08:00
committed by GitHub
parent a7dc6c2c16
commit 619279b162
24 changed files with 1199 additions and 14 deletions

3
pascal-api-examples/vad/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
!run*.sh
circular_buffer
remove_silence

View File

@@ -0,0 +1,11 @@
# Introduction
This directory contains examples for how to use the VAD (voice activity detection)
APIs.
|Directory| Description|
|---------|------------|
|[run-circular-buffer.sh](./run-circular-buffer.sh)|It shows how to use the circular buffer API.|
|[run-remove-silence.sh](./run-remove-silence.sh)|It shows how to use the VAD API to remove silences from a wave file.|

View File

@@ -0,0 +1,106 @@
{ Copyright (c) 2024 Xiaomi Corporation }
program circular_buffer;
{
This file shows how to use the CircularBuffer API of sherpa-onnx
}
{$mode objfpc}
{$ASSERTIONS ON}
uses
sherpa_onnx;
var
Buffer: TSherpaOnnxCircularBuffer;
Samples: TSherpaOnnxSamplesArray;
begin
{The initial capacity is 5. It will be resized automatically if needed.}
Buffer := TSherpaOnnxCircularBuffer.Create(5);
Assert(Buffer.Size = 0);
Assert(Buffer.Head = 0);
Buffer.Push([0, 10, 20]);
{Push() changes Size. Head is not changed.}
Assert(Buffer.Size = 3);
Assert(Buffer.Head = 0);
Samples := Buffer.Get(0, 1);
Assert(Length(Samples) = 1);
Assert(Samples[0] = 0);
{ Get() does not change Size or Head}
Assert(Buffer.Size = 3);
Assert(Buffer.Head = 0);
Samples := Buffer.Get(0, 2);
Assert(Length(Samples) = 2);
Assert(Samples[0] = 0);
Assert(Samples[1] = 10);
{ The buffer will be resized since its initial capacity is 5 but we have
pushed 7 elements into it.
No data is lost during the resize.
}
Buffer.Push([30, 40, 50, 60]);
Assert(Buffer.Size = 7); {There are now 7 elements}
Assert(Buffer.Head = 0);
{Remove the first 4 elements}
Buffer.Pop(4);
Assert(Buffer.Size = 3); {There are only 3 elements left}
Assert(Buffer.Head = 4);
Samples := Buffer.Get(Buffer.Head, 2);
Assert(Length(Samples) = 2);
Assert(Samples[0] = 40);
Assert(Samples[1] = 50);
Buffer.Pop(1);
Assert(Buffer.Size = 2); {There are only 2 elements left}
Assert(Buffer.Head = 5);
Samples := Buffer.Get(Buffer.Head, 2);
Assert(Length(Samples) = 2);
Assert(Samples[0] = 50);
Assert(Samples[1] = 60);
Buffer.Pop(2);
Assert(Buffer.Size = 0); {There are no elements left}
Assert(Buffer.Head = 7);
Buffer.Push([100, 200, 300, 400, 500]);
Assert(Buffer.Size = 5);
Assert(Buffer.Head = 7);
Buffer.Pop(4);
Assert(Buffer.Size = 1);
{Head can be larger than the Capacity!
This is what circular means. It points to Buffer.Head / Capacity.
}
Assert(Buffer.Head = 11);
Buffer.Push([600, 700]);
Assert(Buffer.Size = 3);
Assert(Buffer.Head = 11);
Samples := Buffer.Get(Buffer.Head, 3);
Assert(Length(Samples) = 3);
Assert(Samples[0] = 500);
Assert(Samples[1] = 600);
Assert(Samples[2] = 700);
Buffer.Pop(3);
Assert(Buffer.Size = 0);
Assert(Buffer.Head = 14);
Buffer.Reset();
Assert(Buffer.Size = 0);
Assert(Buffer.Head = 0);
end.

View File

@@ -0,0 +1,115 @@
{ Copyright (c) 2024 Xiaomi Corporation }
{
This file shows how to use the VAD API from sherpa-onnx
to remove silences from a wave file.
}
program main;
{$mode delphi}
uses
sherpa_onnx,
SysUtils;
var
Wave: TSherpaOnnxWave;
Config: TSherpaOnnxVadModelConfig;
Vad: TSherpaOnnxVoiceActivityDetector;
Offset: Integer;
WindowSize: Integer;
SpeechSegment: TSherpaOnnxSpeechSegment;
Start: Single;
Duration: Single;
SampleRate: Integer;
AllSpeechSegment: array of TSherpaOnnxSpeechSegment;
AllSamples: array of Single;
N: Integer;
I: Integer;
begin
SampleRate := 16000; {Please don't change it unless you know the details}
Wave := SherpaOnnxReadWave('./lei-jun-test.wav');
if Wave.SampleRate <> SampleRate then
begin
WriteLn(Format('Expected sample rate: %d. Given: %d',
[SampleRate, Wave.SampleRate]));
Exit;
end;
WindowSize := 512; {Please don't change it unless you know the details}
Initialize(Config);
Config.SileroVad.Model := './silero_vad.onnx';
Config.SileroVad.MinSpeechDuration := 0.25;
Config.SileroVad.MinSilenceDuration := 0.5;
Config.SileroVad.Threshold := 0.5;
Config.SileroVad.WindowSize := WindowSize;
Config.NumThreads:= 1;
Config.Debug:= True;
Config.Provider:= 'cpu';
Config.SampleRate := SampleRate;
Vad := TSherpaOnnxVoiceActivityDetector.Create(Config, 20);
AllSpeechSegment := nil;
AllSamples := nil;
Offset := 0;
while Offset + WindowSize <= Length(Wave.Samples) do
begin
Vad.AcceptWaveform(Wave.Samples, Offset, WindowSize);
Inc(Offset, WindowSize);
while not Vad.IsEmpty do
begin
SetLength(AllSpeechSegment, Length(AllSpeechSegment) + 1);
SpeechSegment := Vad.Front();
Vad.Pop();
AllSpeechSegment[Length(AllSpeechSegment)-1] := SpeechSegment;
Start := SpeechSegment.Start / SampleRate;
Duration := Length(SpeechSegment.Samples) / SampleRate;
WriteLn(Format('%.3f -- %.3f', [Start, Start + Duration]));
end;
end;
Vad.Flush;
while not Vad.IsEmpty do
begin
SetLength(AllSpeechSegment, Length(AllSpeechSegment) + 1);
SpeechSegment := Vad.Front();
Vad.Pop();
AllSpeechSegment[Length(AllSpeechSegment)-1] := SpeechSegment;
Start := SpeechSegment.Start / SampleRate;
Duration := Length(SpeechSegment.Samples) / SampleRate;
WriteLn(Format('%.3f -- %.3f', [Start, Start + Duration]));
end;
N := 0;
for SpeechSegment in AllSpeechSegment do
Inc(N, Length(SpeechSegment.Samples));
SetLength(AllSamples, N);
N := 0;
for SpeechSegment in AllSpeechSegment do
begin
for I := Low(SpeechSegment.Samples) to High(SpeechSegment.Samples) do
begin
AllSamples[N] := SpeechSegment.Samples[I];
Inc(N);
end;
end;
SherpaOnnxWriteWave('./lei-jun-test-no-silence.wav', AllSamples, SampleRate);
WriteLn('Saved to ./lei-jun-test-no-silence.wav');
FreeAndNil(Vad);
end.

View File

@@ -0,0 +1,34 @@
#!/usr/bin/env bash
set -ex
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
mkdir -p ../../build
pushd ../../build
cmake \
-DCMAKE_INSTALL_PREFIX=./install \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
..
cmake --build . --target install --config Release
popd
fi
fpc \
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
-Fl$SHERPA_ONNX_DIR/build/install/lib \
./circular_buffer.pas
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
./circular_buffer

View File

@@ -0,0 +1,42 @@
#!/usr/bin/env bash
set -ex
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
mkdir -p ../../build
pushd ../../build
cmake \
-DCMAKE_INSTALL_PREFIX=./install \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
..
cmake --build . --target install --config Release
popd
fi
if [[ ! -f ./silero_vad.onnx ]]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi
if [ ! -f ./lei-jun-test.wav ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi
fpc \
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
-Fl$SHERPA_ONNX_DIR/build/install/lib \
./remove_silence.pas
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
./remove_silence