Add Dart API for VAD (#904)

This commit is contained in:
Fangjun Kuang
2024-05-22 21:56:21 +08:00
committed by GitHub
parent 81346d1172
commit 49ee458bfb
11 changed files with 598 additions and 14 deletions

View File

@@ -2,6 +2,47 @@
import 'dart:ffi';
import 'package:ffi/ffi.dart';
final class SherpaOnnxSileroVadModelConfig extends Struct {
external Pointer<Utf8> model;
@Float()
external double threshold;
@Float()
external double minSilenceDuration;
@Float()
external double minSpeechDuration;
@Int32()
external int windowSize;
}
final class SherpaOnnxVadModelConfig extends Struct {
external SherpaOnnxSileroVadModelConfig sileroVad;
@Int32()
external int sampleRate;
@Int32()
external int numThreads;
external Pointer<Utf8> provider;
@Int32()
external int debug;
}
final class SherpaOnnxSpeechSegment extends Struct {
@Int32()
external int start;
external Pointer<Float> samples;
@Int32()
external int n;
}
final class SherpaOnnxWave extends Struct {
external Pointer<Float> samples;
@@ -24,17 +65,136 @@ final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct {
external Pointer<Utf8> provider;
}
final class SherpaOnnxCircularBuffer extends Opaque {}
final class SherpaOnnxVoiceActivityDetector extends Opaque {}
final class SherpaOnnxOnlineStream extends Opaque {}
final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {}
final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {}
typedef SherpaOnnxCreateVoiceActivityDetectorNative
= Pointer<SherpaOnnxVoiceActivityDetector> Function(
Pointer<SherpaOnnxVadModelConfig>, Float);
typedef SherpaOnnxCreateVoiceActivityDetector
= Pointer<SherpaOnnxVoiceActivityDetector> Function(
Pointer<SherpaOnnxVadModelConfig>, double);
typedef SherpaOnnxDestroyVoiceActivityDetectorNative = Void Function(
Pointer<SherpaOnnxVoiceActivityDetector>);
typedef SherpaOnnxDestroyVoiceActivityDetector = void Function(
Pointer<SherpaOnnxVoiceActivityDetector>);
typedef SherpaOnnxVoiceActivityDetectorAcceptWaveformNative = Void Function(
Pointer<SherpaOnnxVoiceActivityDetector>, Pointer<Float>, Int32);
typedef SherpaOnnxVoiceActivityDetectorAcceptWaveform = void Function(
Pointer<SherpaOnnxVoiceActivityDetector>, Pointer<Float>, int);
typedef SherpaOnnxVoiceActivityDetectorEmptyNative = Int32 Function(
Pointer<SherpaOnnxVoiceActivityDetector>);
typedef SherpaOnnxVoiceActivityDetectorEmpty = int Function(
Pointer<SherpaOnnxVoiceActivityDetector>);
typedef SherpaOnnxVoiceActivityDetectorDetectedNative = Int32 Function(
Pointer<SherpaOnnxVoiceActivityDetector>);
typedef SherpaOnnxVoiceActivityDetectorDetected = int Function(
Pointer<SherpaOnnxVoiceActivityDetector>);
typedef SherpaOnnxVoiceActivityDetectorPopNative = Void Function(
Pointer<SherpaOnnxVoiceActivityDetector>);
typedef SherpaOnnxVoiceActivityDetectorPop = void Function(
Pointer<SherpaOnnxVoiceActivityDetector>);
typedef SherpaOnnxVoiceActivityDetectorClearNative = Void Function(
Pointer<SherpaOnnxVoiceActivityDetector>);
typedef SherpaOnnxVoiceActivityDetectorClear = void Function(
Pointer<SherpaOnnxVoiceActivityDetector>);
typedef SherpaOnnxVoiceActivityDetectorResetNative = Void Function(
Pointer<SherpaOnnxVoiceActivityDetector>);
typedef SherpaOnnxVoiceActivityDetectorReset = void Function(
Pointer<SherpaOnnxVoiceActivityDetector>);
typedef SherpaOnnxVoiceActivityDetectorFrontNative
= Pointer<SherpaOnnxSpeechSegment> Function(
Pointer<SherpaOnnxVoiceActivityDetector>);
typedef SherpaOnnxVoiceActivityDetectorFront
= SherpaOnnxVoiceActivityDetectorFrontNative;
typedef SherpaOnnxDestroySpeechSegmentNative = Void Function(
Pointer<SherpaOnnxSpeechSegment>);
typedef SherpaOnnxDestroySpeechSegment = void Function(
Pointer<SherpaOnnxSpeechSegment>);
typedef SherpaOnnxCreateCircularBufferNative = Pointer<SherpaOnnxCircularBuffer>
Function(Int32);
typedef SherpaOnnxCreateCircularBuffer = Pointer<SherpaOnnxCircularBuffer>
Function(int);
typedef SherpaOnnxDestroyCircularBufferNative = Void Function(
Pointer<SherpaOnnxCircularBuffer>);
typedef SherpaOnnxDestroyCircularBuffer = void Function(
Pointer<SherpaOnnxCircularBuffer>);
typedef SherpaOnnxCircularBufferPushNative = Void Function(
Pointer<SherpaOnnxCircularBuffer>, Pointer<Float>, Int32);
typedef SherpaOnnxCircularBufferPush = void Function(
Pointer<SherpaOnnxCircularBuffer>, Pointer<Float>, int);
typedef SherpaOnnxCircularBufferGetNative = Pointer<Float> Function(
Pointer<SherpaOnnxCircularBuffer>, Int32, Int32);
typedef SherpaOnnxCircularBufferGet = Pointer<Float> Function(
Pointer<SherpaOnnxCircularBuffer>, int, int);
typedef SherpaOnnxCircularBufferFreeNative = Void Function(Pointer<Float>);
typedef SherpaOnnxCircularBufferFree = void Function(Pointer<Float>);
typedef SherpaOnnxCircularBufferPopNative = Void Function(
Pointer<SherpaOnnxCircularBuffer>, Int32);
typedef SherpaOnnxCircularBufferPop = void Function(
Pointer<SherpaOnnxCircularBuffer>, int);
typedef SherpaOnnxCircularBufferSizeNative = Int32 Function(
Pointer<SherpaOnnxCircularBuffer>);
typedef SherpaOnnxCircularBufferSize = int Function(
Pointer<SherpaOnnxCircularBuffer>);
typedef SherpaOnnxCircularBufferHeadNative = Int32 Function(
Pointer<SherpaOnnxCircularBuffer>);
typedef SherpaOnnxCircularBufferHead = int Function(
Pointer<SherpaOnnxCircularBuffer>);
typedef SherpaOnnxCircularBufferResetNative = Void Function(
Pointer<SherpaOnnxCircularBuffer>);
typedef SherpaOnnxCircularBufferReset = void Function(
Pointer<SherpaOnnxCircularBuffer>);
typedef SherpaOnnxCreateSpeakerEmbeddingManagerNative
= Pointer<SherpaOnnxSpeakerEmbeddingManager> Function(Int32 dim);
= Pointer<SherpaOnnxSpeakerEmbeddingManager> Function(Int32);
typedef SherpaOnnxCreateSpeakerEmbeddingManager
= Pointer<SherpaOnnxSpeakerEmbeddingManager> Function(int dim);
= Pointer<SherpaOnnxSpeakerEmbeddingManager> Function(int);
typedef SherpaOnnxDestroySpeakerEmbeddingManagerNative = Void Function(
Pointer<SherpaOnnxSpeakerEmbeddingManager>);
@@ -190,6 +350,45 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>);
typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>);
class SherpaOnnxBindings {
static SherpaOnnxCreateVoiceActivityDetector? createVoiceActivityDetector;
static SherpaOnnxDestroyVoiceActivityDetector? destroyVoiceActivityDetector;
static SherpaOnnxVoiceActivityDetectorAcceptWaveform?
voiceActivityDetectorAcceptWaveform;
static SherpaOnnxVoiceActivityDetectorEmpty? voiceActivityDetectorEmpty;
static SherpaOnnxVoiceActivityDetectorDetected? voiceActivityDetectorDetected;
static SherpaOnnxVoiceActivityDetectorPop? voiceActivityDetectorPop;
static SherpaOnnxVoiceActivityDetectorClear? voiceActivityDetectorClear;
static SherpaOnnxVoiceActivityDetectorFront? voiceActivityDetectorFront;
static SherpaOnnxDestroySpeechSegment? destroySpeechSegment;
static SherpaOnnxVoiceActivityDetectorReset? voiceActivityDetectorReset;
static SherpaOnnxCreateCircularBuffer? createCircularBuffer;
static SherpaOnnxDestroyCircularBuffer? destroyCircularBuffer;
static SherpaOnnxCircularBufferPush? circularBufferPush;
static SherpaOnnxCircularBufferGet? circularBufferGet;
static SherpaOnnxCircularBufferFree? circularBufferFree;
static SherpaOnnxCircularBufferPop? circularBufferPop;
static SherpaOnnxCircularBufferSize? circularBufferSize;
static SherpaOnnxCircularBufferHead? circularBufferHead;
static SherpaOnnxCircularBufferReset? circularBufferReset;
static SherpaOnnxCreateSpeakerEmbeddingExtractor?
createSpeakerEmbeddingExtractor;
@@ -252,8 +451,107 @@ class SherpaOnnxBindings {
static SherpaOnnxFreeWave? freeWave;
static void init(DynamicLibrary dynamicLibrary) {
createVoiceActivityDetector ??= dynamicLibrary
.lookup<NativeFunction<SherpaOnnxCreateVoiceActivityDetectorNative>>(
'SherpaOnnxCreateVoiceActivityDetector')
.asFunction();
destroyVoiceActivityDetector ??= dynamicLibrary
.lookup<NativeFunction<SherpaOnnxDestroyVoiceActivityDetectorNative>>(
'SherpaOnnxDestroyVoiceActivityDetector')
.asFunction();
voiceActivityDetectorAcceptWaveform ??= dynamicLibrary
.lookup<
NativeFunction<
SherpaOnnxVoiceActivityDetectorAcceptWaveformNative>>(
'SherpaOnnxVoiceActivityDetectorAcceptWaveform')
.asFunction();
voiceActivityDetectorEmpty ??= dynamicLibrary
.lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorEmptyNative>>(
'SherpaOnnxVoiceActivityDetectorEmpty')
.asFunction();
voiceActivityDetectorDetected ??= dynamicLibrary
.lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorDetectedNative>>(
'SherpaOnnxVoiceActivityDetectorDetected')
.asFunction();
voiceActivityDetectorPop ??= dynamicLibrary
.lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorPopNative>>(
'SherpaOnnxVoiceActivityDetectorPop')
.asFunction();
voiceActivityDetectorClear ??= dynamicLibrary
.lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorClearNative>>(
'SherpaOnnxVoiceActivityDetectorClear')
.asFunction();
voiceActivityDetectorFront ??= dynamicLibrary
.lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorFrontNative>>(
'SherpaOnnxVoiceActivityDetectorFront')
.asFunction();
destroySpeechSegment ??= dynamicLibrary
.lookup<NativeFunction<SherpaOnnxDestroySpeechSegmentNative>>(
'SherpaOnnxDestroySpeechSegment')
.asFunction();
voiceActivityDetectorReset ??= dynamicLibrary
.lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorResetNative>>(
'SherpaOnnxVoiceActivityDetectorReset')
.asFunction();
createCircularBuffer ??= dynamicLibrary
.lookup<NativeFunction<SherpaOnnxCreateCircularBufferNative>>(
'SherpaOnnxCreateCircularBuffer')
.asFunction();
destroyCircularBuffer ??= dynamicLibrary
.lookup<NativeFunction<SherpaOnnxDestroyCircularBufferNative>>(
'SherpaOnnxDestroyCircularBuffer')
.asFunction();
circularBufferPush ??= dynamicLibrary
.lookup<NativeFunction<SherpaOnnxCircularBufferPushNative>>(
'SherpaOnnxCircularBufferPush')
.asFunction();
circularBufferGet ??= dynamicLibrary
.lookup<NativeFunction<SherpaOnnxCircularBufferGetNative>>(
'SherpaOnnxCircularBufferGet')
.asFunction();
circularBufferFree ??= dynamicLibrary
.lookup<NativeFunction<SherpaOnnxCircularBufferFreeNative>>(
'SherpaOnnxCircularBufferFree')
.asFunction();
circularBufferPop ??= dynamicLibrary
.lookup<NativeFunction<SherpaOnnxCircularBufferPopNative>>(
'SherpaOnnxCircularBufferPop')
.asFunction();
circularBufferSize ??= dynamicLibrary
.lookup<NativeFunction<SherpaOnnxCircularBufferSizeNative>>(
'SherpaOnnxCircularBufferSize')
.asFunction();
circularBufferHead ??= dynamicLibrary
.lookup<NativeFunction<SherpaOnnxCircularBufferHeadNative>>(
'SherpaOnnxCircularBufferHead')
.asFunction();
circularBufferReset ??= dynamicLibrary
.lookup<NativeFunction<SherpaOnnxCircularBufferResetNative>>(
'SherpaOnnxCircularBufferReset')
.asFunction();
createSpeakerEmbeddingExtractor ??= dynamicLibrary
.lookup<NativeFunction<SherpaOnnxCreateSpeakerEmbeddingExtractor>>(
.lookup<
NativeFunction<
SherpaOnnxCreateSpeakerEmbeddingExtractorNative>>(
'SherpaOnnxCreateSpeakerEmbeddingExtractor')
.asFunction();