Add Dart API for VAD (#904)
This commit is contained in:
1
.github/workflows/flutter.yaml
vendored
1
.github/workflows/flutter.yaml
vendored
@@ -123,6 +123,7 @@ jobs:
|
||||
pushd example/assets
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||
git clone https://github.com/csukuangfj/sr-data
|
||||
|
||||
rm -rf sr-data/.git
|
||||
|
||||
2
sherpa-onnx/flutter/.gitignore
vendored
2
sherpa-onnx/flutter/.gitignore
vendored
@@ -143,3 +143,5 @@ xcuserdata/
|
||||
## Xcode 8 and earlier
|
||||
*.xcscmblueprint
|
||||
*.xccheckout
|
||||
|
||||
flutter_jank_metrics*.json
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
# switch to this directory and run
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||
git clone https://github.com/csukuangfj/sr-data
|
||||
|
||||
rm -rf sr-data/.git
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
|
||||
import 'package:flutter/material.dart';
|
||||
|
||||
import "./speaker_identification_test.dart";
|
||||
import "./vad_test.dart";
|
||||
|
||||
void main() {
|
||||
runApp(const MyApp());
|
||||
@@ -51,6 +53,7 @@ class _MyHomePageState extends State<MyHomePage> {
|
||||
if (_counter <= 10) {
|
||||
sherpa_onnx.initBindings();
|
||||
await testSpeakerID();
|
||||
// await testVad();
|
||||
}
|
||||
|
||||
setState(() {
|
||||
|
||||
62
sherpa-onnx/flutter/example/lib/vad_test.dart
Normal file
62
sherpa-onnx/flutter/example/lib/vad_test.dart
Normal file
@@ -0,0 +1,62 @@
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
import 'dart:typed_data';
|
||||
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
|
||||
import './utils.dart';
|
||||
|
||||
Future<void> testVad() async {
|
||||
final src = 'assets/silero_vad.onnx';
|
||||
final modelPath = await copyAssetFile(src: src, dst: 'silero_vad.onnx');
|
||||
|
||||
final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(model: modelPath);
|
||||
final config = sherpa_onnx.VadModelConfig(
|
||||
sileroVad: sileroVadConfig,
|
||||
numThreads: 1,
|
||||
debug: true,
|
||||
);
|
||||
|
||||
final vad = sherpa_onnx.VoiceActivityDetector(
|
||||
config: config, bufferSizeInSeconds: 10);
|
||||
print('before vad.free(): ${vad.ptr}');
|
||||
vad.free();
|
||||
print('after vad.free(): ${vad.ptr}');
|
||||
|
||||
final buffer = sherpa_onnx.CircularBuffer(capacity: 16000 * 2);
|
||||
|
||||
final d = Float32List.fromList([0, 10, 20, 30]);
|
||||
buffer.push(d);
|
||||
assert(d.length == buffer.size, '${d.length} vs ${buffer.size}');
|
||||
|
||||
final f = Float32List.fromList([-5, 100.25, 599]);
|
||||
buffer.push(f);
|
||||
|
||||
assert(buffer.size == d.length + f.length);
|
||||
final g = buffer.get(startIndex: 0, n: 5);
|
||||
|
||||
assert(g.length == 5);
|
||||
assert(g[0] == 0);
|
||||
assert(g[1] == 10);
|
||||
assert(g[2] == 20);
|
||||
assert(g[3] == 30);
|
||||
assert(g[4] == -5);
|
||||
|
||||
assert(buffer.size == d.length + f.length);
|
||||
|
||||
buffer.pop(3);
|
||||
assert(buffer.size == d.length + f.length - 3);
|
||||
|
||||
final h = buffer.get(startIndex: buffer.head, n: 4);
|
||||
assert(h.length == 4);
|
||||
assert(h[0] == 30);
|
||||
assert(h[1] == -5);
|
||||
assert(h[2] == 100.25);
|
||||
assert(h[3] == 599);
|
||||
|
||||
buffer.reset();
|
||||
|
||||
assert(buffer.size == 0);
|
||||
assert(buffer.head == 0);
|
||||
|
||||
print('before free: ${buffer.ptr}');
|
||||
buffer.free();
|
||||
print('after free: ${buffer.ptr}');
|
||||
}
|
||||
@@ -2,10 +2,11 @@
|
||||
import 'dart:io';
|
||||
import 'dart:ffi';
|
||||
|
||||
import 'src/sherpa_onnx_bindings.dart';
|
||||
export 'src/speaker_identification.dart';
|
||||
export 'src/online_stream.dart';
|
||||
export 'src/speaker_identification.dart';
|
||||
export 'src/vad.dart';
|
||||
export 'src/wave_reader.dart';
|
||||
import 'src/sherpa_onnx_bindings.dart';
|
||||
|
||||
final DynamicLibrary _dylib = () {
|
||||
if (Platform.isIOS) {
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
import 'dart:typed_data';
|
||||
import 'dart:ffi';
|
||||
import 'dart:typed_data';
|
||||
import 'package:ffi/ffi.dart';
|
||||
import "./sherpa_onnx_bindings.dart";
|
||||
|
||||
import './sherpa_onnx_bindings.dart';
|
||||
|
||||
class OnlineStream {
|
||||
/// The user has to call OnlineStream.free() to avoid memory leak.
|
||||
|
||||
@@ -2,6 +2,47 @@
|
||||
import 'dart:ffi';
|
||||
import 'package:ffi/ffi.dart';
|
||||
|
||||
final class SherpaOnnxSileroVadModelConfig extends Struct {
|
||||
external Pointer<Utf8> model;
|
||||
|
||||
@Float()
|
||||
external double threshold;
|
||||
|
||||
@Float()
|
||||
external double minSilenceDuration;
|
||||
|
||||
@Float()
|
||||
external double minSpeechDuration;
|
||||
|
||||
@Int32()
|
||||
external int windowSize;
|
||||
}
|
||||
|
||||
final class SherpaOnnxVadModelConfig extends Struct {
|
||||
external SherpaOnnxSileroVadModelConfig sileroVad;
|
||||
|
||||
@Int32()
|
||||
external int sampleRate;
|
||||
|
||||
@Int32()
|
||||
external int numThreads;
|
||||
|
||||
external Pointer<Utf8> provider;
|
||||
|
||||
@Int32()
|
||||
external int debug;
|
||||
}
|
||||
|
||||
final class SherpaOnnxSpeechSegment extends Struct {
|
||||
@Int32()
|
||||
external int start;
|
||||
|
||||
external Pointer<Float> samples;
|
||||
|
||||
@Int32()
|
||||
external int n;
|
||||
}
|
||||
|
||||
final class SherpaOnnxWave extends Struct {
|
||||
external Pointer<Float> samples;
|
||||
|
||||
@@ -24,17 +65,136 @@ final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct {
|
||||
external Pointer<Utf8> provider;
|
||||
}
|
||||
|
||||
final class SherpaOnnxCircularBuffer extends Opaque {}
|
||||
|
||||
final class SherpaOnnxVoiceActivityDetector extends Opaque {}
|
||||
|
||||
final class SherpaOnnxOnlineStream extends Opaque {}
|
||||
|
||||
final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {}
|
||||
|
||||
final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {}
|
||||
|
||||
typedef SherpaOnnxCreateVoiceActivityDetectorNative
|
||||
= Pointer<SherpaOnnxVoiceActivityDetector> Function(
|
||||
Pointer<SherpaOnnxVadModelConfig>, Float);
|
||||
|
||||
typedef SherpaOnnxCreateVoiceActivityDetector
|
||||
= Pointer<SherpaOnnxVoiceActivityDetector> Function(
|
||||
Pointer<SherpaOnnxVadModelConfig>, double);
|
||||
|
||||
typedef SherpaOnnxDestroyVoiceActivityDetectorNative = Void Function(
|
||||
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||
|
||||
typedef SherpaOnnxDestroyVoiceActivityDetector = void Function(
|
||||
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||
|
||||
typedef SherpaOnnxVoiceActivityDetectorAcceptWaveformNative = Void Function(
|
||||
Pointer<SherpaOnnxVoiceActivityDetector>, Pointer<Float>, Int32);
|
||||
|
||||
typedef SherpaOnnxVoiceActivityDetectorAcceptWaveform = void Function(
|
||||
Pointer<SherpaOnnxVoiceActivityDetector>, Pointer<Float>, int);
|
||||
|
||||
typedef SherpaOnnxVoiceActivityDetectorEmptyNative = Int32 Function(
|
||||
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||
|
||||
typedef SherpaOnnxVoiceActivityDetectorEmpty = int Function(
|
||||
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||
|
||||
typedef SherpaOnnxVoiceActivityDetectorDetectedNative = Int32 Function(
|
||||
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||
|
||||
typedef SherpaOnnxVoiceActivityDetectorDetected = int Function(
|
||||
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||
|
||||
typedef SherpaOnnxVoiceActivityDetectorPopNative = Void Function(
|
||||
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||
|
||||
typedef SherpaOnnxVoiceActivityDetectorPop = void Function(
|
||||
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||
|
||||
typedef SherpaOnnxVoiceActivityDetectorClearNative = Void Function(
|
||||
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||
|
||||
typedef SherpaOnnxVoiceActivityDetectorClear = void Function(
|
||||
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||
|
||||
typedef SherpaOnnxVoiceActivityDetectorResetNative = Void Function(
|
||||
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||
|
||||
typedef SherpaOnnxVoiceActivityDetectorReset = void Function(
|
||||
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||
|
||||
typedef SherpaOnnxVoiceActivityDetectorFrontNative
|
||||
= Pointer<SherpaOnnxSpeechSegment> Function(
|
||||
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||
|
||||
typedef SherpaOnnxVoiceActivityDetectorFront
|
||||
= SherpaOnnxVoiceActivityDetectorFrontNative;
|
||||
|
||||
typedef SherpaOnnxDestroySpeechSegmentNative = Void Function(
|
||||
Pointer<SherpaOnnxSpeechSegment>);
|
||||
|
||||
typedef SherpaOnnxDestroySpeechSegment = void Function(
|
||||
Pointer<SherpaOnnxSpeechSegment>);
|
||||
|
||||
typedef SherpaOnnxCreateCircularBufferNative = Pointer<SherpaOnnxCircularBuffer>
|
||||
Function(Int32);
|
||||
|
||||
typedef SherpaOnnxCreateCircularBuffer = Pointer<SherpaOnnxCircularBuffer>
|
||||
Function(int);
|
||||
|
||||
typedef SherpaOnnxDestroyCircularBufferNative = Void Function(
|
||||
Pointer<SherpaOnnxCircularBuffer>);
|
||||
|
||||
typedef SherpaOnnxDestroyCircularBuffer = void Function(
|
||||
Pointer<SherpaOnnxCircularBuffer>);
|
||||
|
||||
typedef SherpaOnnxCircularBufferPushNative = Void Function(
|
||||
Pointer<SherpaOnnxCircularBuffer>, Pointer<Float>, Int32);
|
||||
|
||||
typedef SherpaOnnxCircularBufferPush = void Function(
|
||||
Pointer<SherpaOnnxCircularBuffer>, Pointer<Float>, int);
|
||||
|
||||
typedef SherpaOnnxCircularBufferGetNative = Pointer<Float> Function(
|
||||
Pointer<SherpaOnnxCircularBuffer>, Int32, Int32);
|
||||
|
||||
typedef SherpaOnnxCircularBufferGet = Pointer<Float> Function(
|
||||
Pointer<SherpaOnnxCircularBuffer>, int, int);
|
||||
|
||||
typedef SherpaOnnxCircularBufferFreeNative = Void Function(Pointer<Float>);
|
||||
|
||||
typedef SherpaOnnxCircularBufferFree = void Function(Pointer<Float>);
|
||||
|
||||
typedef SherpaOnnxCircularBufferPopNative = Void Function(
|
||||
Pointer<SherpaOnnxCircularBuffer>, Int32);
|
||||
|
||||
typedef SherpaOnnxCircularBufferPop = void Function(
|
||||
Pointer<SherpaOnnxCircularBuffer>, int);
|
||||
|
||||
typedef SherpaOnnxCircularBufferSizeNative = Int32 Function(
|
||||
Pointer<SherpaOnnxCircularBuffer>);
|
||||
|
||||
typedef SherpaOnnxCircularBufferSize = int Function(
|
||||
Pointer<SherpaOnnxCircularBuffer>);
|
||||
|
||||
typedef SherpaOnnxCircularBufferHeadNative = Int32 Function(
|
||||
Pointer<SherpaOnnxCircularBuffer>);
|
||||
|
||||
typedef SherpaOnnxCircularBufferHead = int Function(
|
||||
Pointer<SherpaOnnxCircularBuffer>);
|
||||
|
||||
typedef SherpaOnnxCircularBufferResetNative = Void Function(
|
||||
Pointer<SherpaOnnxCircularBuffer>);
|
||||
|
||||
typedef SherpaOnnxCircularBufferReset = void Function(
|
||||
Pointer<SherpaOnnxCircularBuffer>);
|
||||
|
||||
typedef SherpaOnnxCreateSpeakerEmbeddingManagerNative
|
||||
= Pointer<SherpaOnnxSpeakerEmbeddingManager> Function(Int32 dim);
|
||||
= Pointer<SherpaOnnxSpeakerEmbeddingManager> Function(Int32);
|
||||
|
||||
typedef SherpaOnnxCreateSpeakerEmbeddingManager
|
||||
= Pointer<SherpaOnnxSpeakerEmbeddingManager> Function(int dim);
|
||||
= Pointer<SherpaOnnxSpeakerEmbeddingManager> Function(int);
|
||||
|
||||
typedef SherpaOnnxDestroySpeakerEmbeddingManagerNative = Void Function(
|
||||
Pointer<SherpaOnnxSpeakerEmbeddingManager>);
|
||||
@@ -190,6 +350,45 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>);
|
||||
typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>);
|
||||
|
||||
class SherpaOnnxBindings {
|
||||
static SherpaOnnxCreateVoiceActivityDetector? createVoiceActivityDetector;
|
||||
|
||||
static SherpaOnnxDestroyVoiceActivityDetector? destroyVoiceActivityDetector;
|
||||
|
||||
static SherpaOnnxVoiceActivityDetectorAcceptWaveform?
|
||||
voiceActivityDetectorAcceptWaveform;
|
||||
|
||||
static SherpaOnnxVoiceActivityDetectorEmpty? voiceActivityDetectorEmpty;
|
||||
|
||||
static SherpaOnnxVoiceActivityDetectorDetected? voiceActivityDetectorDetected;
|
||||
|
||||
static SherpaOnnxVoiceActivityDetectorPop? voiceActivityDetectorPop;
|
||||
|
||||
static SherpaOnnxVoiceActivityDetectorClear? voiceActivityDetectorClear;
|
||||
|
||||
static SherpaOnnxVoiceActivityDetectorFront? voiceActivityDetectorFront;
|
||||
|
||||
static SherpaOnnxDestroySpeechSegment? destroySpeechSegment;
|
||||
|
||||
static SherpaOnnxVoiceActivityDetectorReset? voiceActivityDetectorReset;
|
||||
|
||||
static SherpaOnnxCreateCircularBuffer? createCircularBuffer;
|
||||
|
||||
static SherpaOnnxDestroyCircularBuffer? destroyCircularBuffer;
|
||||
|
||||
static SherpaOnnxCircularBufferPush? circularBufferPush;
|
||||
|
||||
static SherpaOnnxCircularBufferGet? circularBufferGet;
|
||||
|
||||
static SherpaOnnxCircularBufferFree? circularBufferFree;
|
||||
|
||||
static SherpaOnnxCircularBufferPop? circularBufferPop;
|
||||
|
||||
static SherpaOnnxCircularBufferSize? circularBufferSize;
|
||||
|
||||
static SherpaOnnxCircularBufferHead? circularBufferHead;
|
||||
|
||||
static SherpaOnnxCircularBufferReset? circularBufferReset;
|
||||
|
||||
static SherpaOnnxCreateSpeakerEmbeddingExtractor?
|
||||
createSpeakerEmbeddingExtractor;
|
||||
|
||||
@@ -252,8 +451,107 @@ class SherpaOnnxBindings {
|
||||
static SherpaOnnxFreeWave? freeWave;
|
||||
|
||||
static void init(DynamicLibrary dynamicLibrary) {
|
||||
createVoiceActivityDetector ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxCreateVoiceActivityDetectorNative>>(
|
||||
'SherpaOnnxCreateVoiceActivityDetector')
|
||||
.asFunction();
|
||||
|
||||
destroyVoiceActivityDetector ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxDestroyVoiceActivityDetectorNative>>(
|
||||
'SherpaOnnxDestroyVoiceActivityDetector')
|
||||
.asFunction();
|
||||
|
||||
voiceActivityDetectorAcceptWaveform ??= dynamicLibrary
|
||||
.lookup<
|
||||
NativeFunction<
|
||||
SherpaOnnxVoiceActivityDetectorAcceptWaveformNative>>(
|
||||
'SherpaOnnxVoiceActivityDetectorAcceptWaveform')
|
||||
.asFunction();
|
||||
|
||||
voiceActivityDetectorEmpty ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorEmptyNative>>(
|
||||
'SherpaOnnxVoiceActivityDetectorEmpty')
|
||||
.asFunction();
|
||||
|
||||
voiceActivityDetectorDetected ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorDetectedNative>>(
|
||||
'SherpaOnnxVoiceActivityDetectorDetected')
|
||||
.asFunction();
|
||||
|
||||
voiceActivityDetectorPop ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorPopNative>>(
|
||||
'SherpaOnnxVoiceActivityDetectorPop')
|
||||
.asFunction();
|
||||
|
||||
voiceActivityDetectorClear ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorClearNative>>(
|
||||
'SherpaOnnxVoiceActivityDetectorClear')
|
||||
.asFunction();
|
||||
|
||||
voiceActivityDetectorFront ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorFrontNative>>(
|
||||
'SherpaOnnxVoiceActivityDetectorFront')
|
||||
.asFunction();
|
||||
|
||||
destroySpeechSegment ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxDestroySpeechSegmentNative>>(
|
||||
'SherpaOnnxDestroySpeechSegment')
|
||||
.asFunction();
|
||||
|
||||
voiceActivityDetectorReset ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorResetNative>>(
|
||||
'SherpaOnnxVoiceActivityDetectorReset')
|
||||
.asFunction();
|
||||
|
||||
createCircularBuffer ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxCreateCircularBufferNative>>(
|
||||
'SherpaOnnxCreateCircularBuffer')
|
||||
.asFunction();
|
||||
|
||||
destroyCircularBuffer ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxDestroyCircularBufferNative>>(
|
||||
'SherpaOnnxDestroyCircularBuffer')
|
||||
.asFunction();
|
||||
|
||||
circularBufferPush ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxCircularBufferPushNative>>(
|
||||
'SherpaOnnxCircularBufferPush')
|
||||
.asFunction();
|
||||
|
||||
circularBufferGet ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxCircularBufferGetNative>>(
|
||||
'SherpaOnnxCircularBufferGet')
|
||||
.asFunction();
|
||||
|
||||
circularBufferFree ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxCircularBufferFreeNative>>(
|
||||
'SherpaOnnxCircularBufferFree')
|
||||
.asFunction();
|
||||
|
||||
circularBufferPop ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxCircularBufferPopNative>>(
|
||||
'SherpaOnnxCircularBufferPop')
|
||||
.asFunction();
|
||||
|
||||
circularBufferSize ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxCircularBufferSizeNative>>(
|
||||
'SherpaOnnxCircularBufferSize')
|
||||
.asFunction();
|
||||
|
||||
circularBufferHead ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxCircularBufferHeadNative>>(
|
||||
'SherpaOnnxCircularBufferHead')
|
||||
.asFunction();
|
||||
|
||||
circularBufferReset ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxCircularBufferResetNative>>(
|
||||
'SherpaOnnxCircularBufferReset')
|
||||
.asFunction();
|
||||
|
||||
createSpeakerEmbeddingExtractor ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxCreateSpeakerEmbeddingExtractor>>(
|
||||
.lookup<
|
||||
NativeFunction<
|
||||
SherpaOnnxCreateSpeakerEmbeddingExtractorNative>>(
|
||||
'SherpaOnnxCreateSpeakerEmbeddingExtractor')
|
||||
.asFunction();
|
||||
|
||||
|
||||
@@ -2,19 +2,20 @@
|
||||
import 'dart:ffi';
|
||||
import 'dart:typed_data';
|
||||
import 'package:ffi/ffi.dart';
|
||||
import "./sherpa_onnx_bindings.dart";
|
||||
import "./online_stream.dart";
|
||||
|
||||
import './online_stream.dart';
|
||||
import './sherpa_onnx_bindings.dart';
|
||||
|
||||
class SpeakerEmbeddingExtractorConfig {
|
||||
const SpeakerEmbeddingExtractorConfig(
|
||||
{required this.model,
|
||||
this.numThreads = 1,
|
||||
this.debug = true,
|
||||
this.provider = "cpu"});
|
||||
this.provider = 'cpu'});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return "SpeakerEmbeddingExtractorConfig(model: $model, numThreads: $numThreads, debug: $debug, provider: $provider)";
|
||||
return 'SpeakerEmbeddingExtractorConfig(model: $model, numThreads: $numThreads, debug: $debug, provider: $provider)';
|
||||
}
|
||||
|
||||
final String model;
|
||||
@@ -116,7 +117,7 @@ class SpeakerEmbeddingManager {
|
||||
|
||||
/// Return true if added successfully; return false otherwise
|
||||
bool add({required String name, required Float32List embedding}) {
|
||||
assert(embedding.length == this.dim, "${embedding.length} vs ${this.dim}");
|
||||
assert(embedding.length == this.dim, '${embedding.length} vs ${this.dim}');
|
||||
|
||||
final Pointer<Utf8> namePtr = name.toNativeUtf8();
|
||||
final int n = embedding.length;
|
||||
@@ -145,7 +146,7 @@ class SpeakerEmbeddingManager {
|
||||
|
||||
int offset = 0;
|
||||
for (final e in embeddingList) {
|
||||
assert(e.length == this.dim, "${e.length} vs ${this.dim}");
|
||||
assert(e.length == this.dim, '${e.length} vs ${this.dim}');
|
||||
|
||||
pList.setAll(offset, e);
|
||||
offset += this.dim;
|
||||
|
||||
213
sherpa-onnx/flutter/lib/src/vad.dart
Normal file
213
sherpa-onnx/flutter/lib/src/vad.dart
Normal file
@@ -0,0 +1,213 @@
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
import 'dart:ffi';
|
||||
import 'dart:typed_data';
|
||||
import 'package:ffi/ffi.dart';
|
||||
|
||||
import './sherpa_onnx_bindings.dart';
|
||||
|
||||
class SileroVadModelConfig {
|
||||
const SileroVadModelConfig(
|
||||
{this.model = '',
|
||||
this.threshold = 0.5,
|
||||
this.minSilenceDuration = 0.5,
|
||||
this.minSpeechDuration = 0.25,
|
||||
this.windowSize = 512});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'SileroVadModelConfig(model: $model, threshold: $threshold, minSilenceDuration: $minSilenceDuration, minSpeechDuration: $minSpeechDuration, windowSize: $windowSize)';
|
||||
}
|
||||
|
||||
final String model;
|
||||
final double threshold;
|
||||
final double minSilenceDuration;
|
||||
final double minSpeechDuration;
|
||||
final int windowSize;
|
||||
}
|
||||
|
||||
class VadModelConfig {
|
||||
VadModelConfig(
|
||||
{this.sileroVad = const SileroVadModelConfig(),
|
||||
this.sampleRate = 16000,
|
||||
this.numThreads = 1,
|
||||
this.provider = 'cpu',
|
||||
this.debug = true});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'VadModelConfig(sileroVad: $sileroVad, sampleRate: $sampleRate, numThreads: $numThreads, provider: $provider, debug: $debug)';
|
||||
}
|
||||
|
||||
final SileroVadModelConfig sileroVad;
|
||||
final int sampleRate;
|
||||
final int numThreads;
|
||||
final String provider;
|
||||
final bool debug;
|
||||
}
|
||||
|
||||
class SpeechSegment {
|
||||
SpeechSegment({required this.samples, required this.start});
|
||||
final Float32List samples;
|
||||
final int start;
|
||||
}
|
||||
|
||||
class CircularBuffer {
|
||||
CircularBuffer._({required this.ptr});
|
||||
|
||||
/// The user has to invoke CircularBuffer.free() on the returned instance
|
||||
/// to avoid memory leak.
|
||||
factory CircularBuffer({required int capacity}) {
|
||||
assert(capacity > 0, 'capacity is $capacity');
|
||||
final p =
|
||||
SherpaOnnxBindings.createCircularBuffer?.call(capacity) ?? nullptr;
|
||||
|
||||
return CircularBuffer._(ptr: p);
|
||||
}
|
||||
|
||||
void free() {
|
||||
SherpaOnnxBindings.destroyCircularBuffer?.call(ptr);
|
||||
ptr = nullptr;
|
||||
}
|
||||
|
||||
void push(Float32List data) {
|
||||
final n = data.length;
|
||||
final Pointer<Float> p = calloc<Float>(n);
|
||||
|
||||
final pList = p.asTypedList(n);
|
||||
pList.setAll(0, data);
|
||||
|
||||
SherpaOnnxBindings.circularBufferPush?.call(this.ptr, p, n);
|
||||
|
||||
calloc.free(p);
|
||||
}
|
||||
|
||||
Float32List get({required int startIndex, required int n}) {
|
||||
final Pointer<Float> p =
|
||||
SherpaOnnxBindings.circularBufferGet?.call(this.ptr, startIndex, n) ??
|
||||
nullptr;
|
||||
|
||||
if (p == nullptr) {
|
||||
return Float32List(0);
|
||||
}
|
||||
|
||||
final pList = p.asTypedList(n);
|
||||
final Float32List ans = Float32List.fromList(pList);
|
||||
|
||||
SherpaOnnxBindings.circularBufferFree?.call(p);
|
||||
|
||||
return ans;
|
||||
}
|
||||
|
||||
void pop(int n) {
|
||||
SherpaOnnxBindings.circularBufferPop?.call(this.ptr, n);
|
||||
}
|
||||
|
||||
void reset() {
|
||||
SherpaOnnxBindings.circularBufferReset?.call(this.ptr);
|
||||
}
|
||||
|
||||
int get size => SherpaOnnxBindings.circularBufferSize?.call(this.ptr) ?? 0;
|
||||
int get head => SherpaOnnxBindings.circularBufferHead?.call(this.ptr) ?? 0;
|
||||
|
||||
Pointer<SherpaOnnxCircularBuffer> ptr;
|
||||
}
|
||||
|
||||
class VoiceActivityDetector {
|
||||
VoiceActivityDetector._({required this.ptr});
|
||||
|
||||
// The user has to invoke VoiceActivityDetector.free() to avoid memory leak.
|
||||
factory VoiceActivityDetector(
|
||||
{required VadModelConfig config, required double bufferSizeInSeconds}) {
|
||||
final c = calloc<SherpaOnnxVadModelConfig>();
|
||||
|
||||
final modelPtr = config.sileroVad.model.toNativeUtf8();
|
||||
c.ref.sileroVad.model = modelPtr;
|
||||
|
||||
c.ref.sileroVad.threshold = config.sileroVad.threshold;
|
||||
c.ref.sileroVad.minSilenceDuration = config.sileroVad.minSilenceDuration;
|
||||
c.ref.sileroVad.minSpeechDuration = config.sileroVad.minSpeechDuration;
|
||||
c.ref.sileroVad.windowSize = config.sileroVad.windowSize;
|
||||
|
||||
c.ref.sampleRate = config.sampleRate;
|
||||
c.ref.numThreads = config.numThreads;
|
||||
|
||||
final providerPtr = config.provider.toNativeUtf8();
|
||||
c.ref.provider = providerPtr;
|
||||
|
||||
c.ref.debug = config.debug ? 1 : 0;
|
||||
|
||||
final ptr = SherpaOnnxBindings.createVoiceActivityDetector
|
||||
?.call(c, bufferSizeInSeconds) ??
|
||||
nullptr;
|
||||
|
||||
calloc.free(providerPtr);
|
||||
calloc.free(modelPtr);
|
||||
calloc.free(c);
|
||||
|
||||
return VoiceActivityDetector._(ptr: ptr);
|
||||
}
|
||||
|
||||
void free() {
|
||||
SherpaOnnxBindings.destroyVoiceActivityDetector?.call(ptr);
|
||||
ptr = nullptr;
|
||||
}
|
||||
|
||||
void acceptWaveform(Float32List samples) {
|
||||
final n = samples.length;
|
||||
final Pointer<Float> p = calloc<Float>(n);
|
||||
|
||||
final pList = p.asTypedList(n);
|
||||
pList.setAll(0, samples);
|
||||
|
||||
SherpaOnnxBindings.voiceActivityDetectorAcceptWaveform
|
||||
?.call(this.ptr, p, n);
|
||||
|
||||
calloc.free(p);
|
||||
}
|
||||
|
||||
bool isEmpty() {
|
||||
final int empty =
|
||||
SherpaOnnxBindings.voiceActivityDetectorEmpty?.call(this.ptr) ?? 0;
|
||||
|
||||
return empty == 1;
|
||||
}
|
||||
|
||||
bool isDetected() {
|
||||
final int detected =
|
||||
SherpaOnnxBindings.voiceActivityDetectorDetected?.call(this.ptr) ?? 0;
|
||||
|
||||
return detected == 1;
|
||||
}
|
||||
|
||||
void pop() {
|
||||
SherpaOnnxBindings.voiceActivityDetectorPop?.call(this.ptr);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
SherpaOnnxBindings.voiceActivityDetectorClear?.call(this.ptr);
|
||||
}
|
||||
|
||||
SpeechSegment front() {
|
||||
final Pointer<SherpaOnnxSpeechSegment> segment =
|
||||
SherpaOnnxBindings.voiceActivityDetectorFront?.call(this.ptr) ??
|
||||
nullptr;
|
||||
if (segment == nullptr) {
|
||||
return SpeechSegment(samples: Float32List(0), start: 0);
|
||||
}
|
||||
|
||||
final sampleList = segment.ref.samples.asTypedList(segment.ref.n);
|
||||
final start = segment.ref.start;
|
||||
|
||||
final samples = Float32List.fromList(sampleList);
|
||||
|
||||
SherpaOnnxBindings.destroySpeechSegment?.call(segment);
|
||||
|
||||
return SpeechSegment(samples: samples, start: start);
|
||||
}
|
||||
|
||||
void reset() {
|
||||
SherpaOnnxBindings.voiceActivityDetectorReset?.call(this.ptr);
|
||||
}
|
||||
|
||||
Pointer<SherpaOnnxVoiceActivityDetector> ptr;
|
||||
}
|
||||
@@ -2,7 +2,8 @@
|
||||
import 'dart:ffi';
|
||||
import 'dart:typed_data';
|
||||
import 'package:ffi/ffi.dart';
|
||||
import "./sherpa_onnx_bindings.dart";
|
||||
|
||||
import './sherpa_onnx_bindings.dart';
|
||||
|
||||
class WaveData {
|
||||
WaveData({required this.samples, required this.sampleRate});
|
||||
|
||||
Reference in New Issue
Block a user