Add Dart API for VAD (#904)
This commit is contained in:
1
.github/workflows/flutter.yaml
vendored
1
.github/workflows/flutter.yaml
vendored
@@ -123,6 +123,7 @@ jobs:
|
|||||||
pushd example/assets
|
pushd example/assets
|
||||||
|
|
||||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||||
git clone https://github.com/csukuangfj/sr-data
|
git clone https://github.com/csukuangfj/sr-data
|
||||||
|
|
||||||
rm -rf sr-data/.git
|
rm -rf sr-data/.git
|
||||||
|
|||||||
2
sherpa-onnx/flutter/.gitignore
vendored
2
sherpa-onnx/flutter/.gitignore
vendored
@@ -143,3 +143,5 @@ xcuserdata/
|
|||||||
## Xcode 8 and earlier
|
## Xcode 8 and earlier
|
||||||
*.xcscmblueprint
|
*.xcscmblueprint
|
||||||
*.xccheckout
|
*.xccheckout
|
||||||
|
|
||||||
|
flutter_jank_metrics*.json
|
||||||
|
|||||||
@@ -6,6 +6,7 @@
|
|||||||
# switch to this directory and run
|
# switch to this directory and run
|
||||||
|
|
||||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
||||||
|
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||||
git clone https://github.com/csukuangfj/sr-data
|
git clone https://github.com/csukuangfj/sr-data
|
||||||
|
|
||||||
rm -rf sr-data/.git
|
rm -rf sr-data/.git
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
// Copyright (c) 2024 Xiaomi Corporation
|
// Copyright (c) 2024 Xiaomi Corporation
|
||||||
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
|
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
|
||||||
import 'package:flutter/material.dart';
|
import 'package:flutter/material.dart';
|
||||||
|
|
||||||
import "./speaker_identification_test.dart";
|
import "./speaker_identification_test.dart";
|
||||||
|
import "./vad_test.dart";
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
runApp(const MyApp());
|
runApp(const MyApp());
|
||||||
@@ -51,6 +53,7 @@ class _MyHomePageState extends State<MyHomePage> {
|
|||||||
if (_counter <= 10) {
|
if (_counter <= 10) {
|
||||||
sherpa_onnx.initBindings();
|
sherpa_onnx.initBindings();
|
||||||
await testSpeakerID();
|
await testSpeakerID();
|
||||||
|
// await testVad();
|
||||||
}
|
}
|
||||||
|
|
||||||
setState(() {
|
setState(() {
|
||||||
|
|||||||
62
sherpa-onnx/flutter/example/lib/vad_test.dart
Normal file
62
sherpa-onnx/flutter/example/lib/vad_test.dart
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
// Copyright (c) 2024 Xiaomi Corporation
|
||||||
|
import 'dart:typed_data';
|
||||||
|
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
|
||||||
|
import './utils.dart';
|
||||||
|
|
||||||
|
Future<void> testVad() async {
|
||||||
|
final src = 'assets/silero_vad.onnx';
|
||||||
|
final modelPath = await copyAssetFile(src: src, dst: 'silero_vad.onnx');
|
||||||
|
|
||||||
|
final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(model: modelPath);
|
||||||
|
final config = sherpa_onnx.VadModelConfig(
|
||||||
|
sileroVad: sileroVadConfig,
|
||||||
|
numThreads: 1,
|
||||||
|
debug: true,
|
||||||
|
);
|
||||||
|
|
||||||
|
final vad = sherpa_onnx.VoiceActivityDetector(
|
||||||
|
config: config, bufferSizeInSeconds: 10);
|
||||||
|
print('before vad.free(): ${vad.ptr}');
|
||||||
|
vad.free();
|
||||||
|
print('after vad.free(): ${vad.ptr}');
|
||||||
|
|
||||||
|
final buffer = sherpa_onnx.CircularBuffer(capacity: 16000 * 2);
|
||||||
|
|
||||||
|
final d = Float32List.fromList([0, 10, 20, 30]);
|
||||||
|
buffer.push(d);
|
||||||
|
assert(d.length == buffer.size, '${d.length} vs ${buffer.size}');
|
||||||
|
|
||||||
|
final f = Float32List.fromList([-5, 100.25, 599]);
|
||||||
|
buffer.push(f);
|
||||||
|
|
||||||
|
assert(buffer.size == d.length + f.length);
|
||||||
|
final g = buffer.get(startIndex: 0, n: 5);
|
||||||
|
|
||||||
|
assert(g.length == 5);
|
||||||
|
assert(g[0] == 0);
|
||||||
|
assert(g[1] == 10);
|
||||||
|
assert(g[2] == 20);
|
||||||
|
assert(g[3] == 30);
|
||||||
|
assert(g[4] == -5);
|
||||||
|
|
||||||
|
assert(buffer.size == d.length + f.length);
|
||||||
|
|
||||||
|
buffer.pop(3);
|
||||||
|
assert(buffer.size == d.length + f.length - 3);
|
||||||
|
|
||||||
|
final h = buffer.get(startIndex: buffer.head, n: 4);
|
||||||
|
assert(h.length == 4);
|
||||||
|
assert(h[0] == 30);
|
||||||
|
assert(h[1] == -5);
|
||||||
|
assert(h[2] == 100.25);
|
||||||
|
assert(h[3] == 599);
|
||||||
|
|
||||||
|
buffer.reset();
|
||||||
|
|
||||||
|
assert(buffer.size == 0);
|
||||||
|
assert(buffer.head == 0);
|
||||||
|
|
||||||
|
print('before free: ${buffer.ptr}');
|
||||||
|
buffer.free();
|
||||||
|
print('after free: ${buffer.ptr}');
|
||||||
|
}
|
||||||
@@ -2,10 +2,11 @@
|
|||||||
import 'dart:io';
|
import 'dart:io';
|
||||||
import 'dart:ffi';
|
import 'dart:ffi';
|
||||||
|
|
||||||
import 'src/sherpa_onnx_bindings.dart';
|
|
||||||
export 'src/speaker_identification.dart';
|
|
||||||
export 'src/online_stream.dart';
|
export 'src/online_stream.dart';
|
||||||
|
export 'src/speaker_identification.dart';
|
||||||
|
export 'src/vad.dart';
|
||||||
export 'src/wave_reader.dart';
|
export 'src/wave_reader.dart';
|
||||||
|
import 'src/sherpa_onnx_bindings.dart';
|
||||||
|
|
||||||
final DynamicLibrary _dylib = () {
|
final DynamicLibrary _dylib = () {
|
||||||
if (Platform.isIOS) {
|
if (Platform.isIOS) {
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
// Copyright (c) 2024 Xiaomi Corporation
|
// Copyright (c) 2024 Xiaomi Corporation
|
||||||
import 'dart:typed_data';
|
|
||||||
import 'dart:ffi';
|
import 'dart:ffi';
|
||||||
|
import 'dart:typed_data';
|
||||||
import 'package:ffi/ffi.dart';
|
import 'package:ffi/ffi.dart';
|
||||||
import "./sherpa_onnx_bindings.dart";
|
|
||||||
|
import './sherpa_onnx_bindings.dart';
|
||||||
|
|
||||||
class OnlineStream {
|
class OnlineStream {
|
||||||
/// The user has to call OnlineStream.free() to avoid memory leak.
|
/// The user has to call OnlineStream.free() to avoid memory leak.
|
||||||
|
|||||||
@@ -2,6 +2,47 @@
|
|||||||
import 'dart:ffi';
|
import 'dart:ffi';
|
||||||
import 'package:ffi/ffi.dart';
|
import 'package:ffi/ffi.dart';
|
||||||
|
|
||||||
|
final class SherpaOnnxSileroVadModelConfig extends Struct {
|
||||||
|
external Pointer<Utf8> model;
|
||||||
|
|
||||||
|
@Float()
|
||||||
|
external double threshold;
|
||||||
|
|
||||||
|
@Float()
|
||||||
|
external double minSilenceDuration;
|
||||||
|
|
||||||
|
@Float()
|
||||||
|
external double minSpeechDuration;
|
||||||
|
|
||||||
|
@Int32()
|
||||||
|
external int windowSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
final class SherpaOnnxVadModelConfig extends Struct {
|
||||||
|
external SherpaOnnxSileroVadModelConfig sileroVad;
|
||||||
|
|
||||||
|
@Int32()
|
||||||
|
external int sampleRate;
|
||||||
|
|
||||||
|
@Int32()
|
||||||
|
external int numThreads;
|
||||||
|
|
||||||
|
external Pointer<Utf8> provider;
|
||||||
|
|
||||||
|
@Int32()
|
||||||
|
external int debug;
|
||||||
|
}
|
||||||
|
|
||||||
|
final class SherpaOnnxSpeechSegment extends Struct {
|
||||||
|
@Int32()
|
||||||
|
external int start;
|
||||||
|
|
||||||
|
external Pointer<Float> samples;
|
||||||
|
|
||||||
|
@Int32()
|
||||||
|
external int n;
|
||||||
|
}
|
||||||
|
|
||||||
final class SherpaOnnxWave extends Struct {
|
final class SherpaOnnxWave extends Struct {
|
||||||
external Pointer<Float> samples;
|
external Pointer<Float> samples;
|
||||||
|
|
||||||
@@ -24,17 +65,136 @@ final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct {
|
|||||||
external Pointer<Utf8> provider;
|
external Pointer<Utf8> provider;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final class SherpaOnnxCircularBuffer extends Opaque {}
|
||||||
|
|
||||||
|
final class SherpaOnnxVoiceActivityDetector extends Opaque {}
|
||||||
|
|
||||||
final class SherpaOnnxOnlineStream extends Opaque {}
|
final class SherpaOnnxOnlineStream extends Opaque {}
|
||||||
|
|
||||||
final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {}
|
final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {}
|
||||||
|
|
||||||
final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {}
|
final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {}
|
||||||
|
|
||||||
|
typedef SherpaOnnxCreateVoiceActivityDetectorNative
|
||||||
|
= Pointer<SherpaOnnxVoiceActivityDetector> Function(
|
||||||
|
Pointer<SherpaOnnxVadModelConfig>, Float);
|
||||||
|
|
||||||
|
typedef SherpaOnnxCreateVoiceActivityDetector
|
||||||
|
= Pointer<SherpaOnnxVoiceActivityDetector> Function(
|
||||||
|
Pointer<SherpaOnnxVadModelConfig>, double);
|
||||||
|
|
||||||
|
typedef SherpaOnnxDestroyVoiceActivityDetectorNative = Void Function(
|
||||||
|
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxDestroyVoiceActivityDetector = void Function(
|
||||||
|
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxVoiceActivityDetectorAcceptWaveformNative = Void Function(
|
||||||
|
Pointer<SherpaOnnxVoiceActivityDetector>, Pointer<Float>, Int32);
|
||||||
|
|
||||||
|
typedef SherpaOnnxVoiceActivityDetectorAcceptWaveform = void Function(
|
||||||
|
Pointer<SherpaOnnxVoiceActivityDetector>, Pointer<Float>, int);
|
||||||
|
|
||||||
|
typedef SherpaOnnxVoiceActivityDetectorEmptyNative = Int32 Function(
|
||||||
|
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxVoiceActivityDetectorEmpty = int Function(
|
||||||
|
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxVoiceActivityDetectorDetectedNative = Int32 Function(
|
||||||
|
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxVoiceActivityDetectorDetected = int Function(
|
||||||
|
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxVoiceActivityDetectorPopNative = Void Function(
|
||||||
|
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxVoiceActivityDetectorPop = void Function(
|
||||||
|
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxVoiceActivityDetectorClearNative = Void Function(
|
||||||
|
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxVoiceActivityDetectorClear = void Function(
|
||||||
|
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxVoiceActivityDetectorResetNative = Void Function(
|
||||||
|
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxVoiceActivityDetectorReset = void Function(
|
||||||
|
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxVoiceActivityDetectorFrontNative
|
||||||
|
= Pointer<SherpaOnnxSpeechSegment> Function(
|
||||||
|
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxVoiceActivityDetectorFront
|
||||||
|
= SherpaOnnxVoiceActivityDetectorFrontNative;
|
||||||
|
|
||||||
|
typedef SherpaOnnxDestroySpeechSegmentNative = Void Function(
|
||||||
|
Pointer<SherpaOnnxSpeechSegment>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxDestroySpeechSegment = void Function(
|
||||||
|
Pointer<SherpaOnnxSpeechSegment>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxCreateCircularBufferNative = Pointer<SherpaOnnxCircularBuffer>
|
||||||
|
Function(Int32);
|
||||||
|
|
||||||
|
typedef SherpaOnnxCreateCircularBuffer = Pointer<SherpaOnnxCircularBuffer>
|
||||||
|
Function(int);
|
||||||
|
|
||||||
|
typedef SherpaOnnxDestroyCircularBufferNative = Void Function(
|
||||||
|
Pointer<SherpaOnnxCircularBuffer>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxDestroyCircularBuffer = void Function(
|
||||||
|
Pointer<SherpaOnnxCircularBuffer>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxCircularBufferPushNative = Void Function(
|
||||||
|
Pointer<SherpaOnnxCircularBuffer>, Pointer<Float>, Int32);
|
||||||
|
|
||||||
|
typedef SherpaOnnxCircularBufferPush = void Function(
|
||||||
|
Pointer<SherpaOnnxCircularBuffer>, Pointer<Float>, int);
|
||||||
|
|
||||||
|
typedef SherpaOnnxCircularBufferGetNative = Pointer<Float> Function(
|
||||||
|
Pointer<SherpaOnnxCircularBuffer>, Int32, Int32);
|
||||||
|
|
||||||
|
typedef SherpaOnnxCircularBufferGet = Pointer<Float> Function(
|
||||||
|
Pointer<SherpaOnnxCircularBuffer>, int, int);
|
||||||
|
|
||||||
|
typedef SherpaOnnxCircularBufferFreeNative = Void Function(Pointer<Float>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxCircularBufferFree = void Function(Pointer<Float>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxCircularBufferPopNative = Void Function(
|
||||||
|
Pointer<SherpaOnnxCircularBuffer>, Int32);
|
||||||
|
|
||||||
|
typedef SherpaOnnxCircularBufferPop = void Function(
|
||||||
|
Pointer<SherpaOnnxCircularBuffer>, int);
|
||||||
|
|
||||||
|
typedef SherpaOnnxCircularBufferSizeNative = Int32 Function(
|
||||||
|
Pointer<SherpaOnnxCircularBuffer>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxCircularBufferSize = int Function(
|
||||||
|
Pointer<SherpaOnnxCircularBuffer>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxCircularBufferHeadNative = Int32 Function(
|
||||||
|
Pointer<SherpaOnnxCircularBuffer>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxCircularBufferHead = int Function(
|
||||||
|
Pointer<SherpaOnnxCircularBuffer>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxCircularBufferResetNative = Void Function(
|
||||||
|
Pointer<SherpaOnnxCircularBuffer>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxCircularBufferReset = void Function(
|
||||||
|
Pointer<SherpaOnnxCircularBuffer>);
|
||||||
|
|
||||||
typedef SherpaOnnxCreateSpeakerEmbeddingManagerNative
|
typedef SherpaOnnxCreateSpeakerEmbeddingManagerNative
|
||||||
= Pointer<SherpaOnnxSpeakerEmbeddingManager> Function(Int32 dim);
|
= Pointer<SherpaOnnxSpeakerEmbeddingManager> Function(Int32);
|
||||||
|
|
||||||
typedef SherpaOnnxCreateSpeakerEmbeddingManager
|
typedef SherpaOnnxCreateSpeakerEmbeddingManager
|
||||||
= Pointer<SherpaOnnxSpeakerEmbeddingManager> Function(int dim);
|
= Pointer<SherpaOnnxSpeakerEmbeddingManager> Function(int);
|
||||||
|
|
||||||
typedef SherpaOnnxDestroySpeakerEmbeddingManagerNative = Void Function(
|
typedef SherpaOnnxDestroySpeakerEmbeddingManagerNative = Void Function(
|
||||||
Pointer<SherpaOnnxSpeakerEmbeddingManager>);
|
Pointer<SherpaOnnxSpeakerEmbeddingManager>);
|
||||||
@@ -190,6 +350,45 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>);
|
|||||||
typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>);
|
typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>);
|
||||||
|
|
||||||
class SherpaOnnxBindings {
|
class SherpaOnnxBindings {
|
||||||
|
static SherpaOnnxCreateVoiceActivityDetector? createVoiceActivityDetector;
|
||||||
|
|
||||||
|
static SherpaOnnxDestroyVoiceActivityDetector? destroyVoiceActivityDetector;
|
||||||
|
|
||||||
|
static SherpaOnnxVoiceActivityDetectorAcceptWaveform?
|
||||||
|
voiceActivityDetectorAcceptWaveform;
|
||||||
|
|
||||||
|
static SherpaOnnxVoiceActivityDetectorEmpty? voiceActivityDetectorEmpty;
|
||||||
|
|
||||||
|
static SherpaOnnxVoiceActivityDetectorDetected? voiceActivityDetectorDetected;
|
||||||
|
|
||||||
|
static SherpaOnnxVoiceActivityDetectorPop? voiceActivityDetectorPop;
|
||||||
|
|
||||||
|
static SherpaOnnxVoiceActivityDetectorClear? voiceActivityDetectorClear;
|
||||||
|
|
||||||
|
static SherpaOnnxVoiceActivityDetectorFront? voiceActivityDetectorFront;
|
||||||
|
|
||||||
|
static SherpaOnnxDestroySpeechSegment? destroySpeechSegment;
|
||||||
|
|
||||||
|
static SherpaOnnxVoiceActivityDetectorReset? voiceActivityDetectorReset;
|
||||||
|
|
||||||
|
static SherpaOnnxCreateCircularBuffer? createCircularBuffer;
|
||||||
|
|
||||||
|
static SherpaOnnxDestroyCircularBuffer? destroyCircularBuffer;
|
||||||
|
|
||||||
|
static SherpaOnnxCircularBufferPush? circularBufferPush;
|
||||||
|
|
||||||
|
static SherpaOnnxCircularBufferGet? circularBufferGet;
|
||||||
|
|
||||||
|
static SherpaOnnxCircularBufferFree? circularBufferFree;
|
||||||
|
|
||||||
|
static SherpaOnnxCircularBufferPop? circularBufferPop;
|
||||||
|
|
||||||
|
static SherpaOnnxCircularBufferSize? circularBufferSize;
|
||||||
|
|
||||||
|
static SherpaOnnxCircularBufferHead? circularBufferHead;
|
||||||
|
|
||||||
|
static SherpaOnnxCircularBufferReset? circularBufferReset;
|
||||||
|
|
||||||
static SherpaOnnxCreateSpeakerEmbeddingExtractor?
|
static SherpaOnnxCreateSpeakerEmbeddingExtractor?
|
||||||
createSpeakerEmbeddingExtractor;
|
createSpeakerEmbeddingExtractor;
|
||||||
|
|
||||||
@@ -252,8 +451,107 @@ class SherpaOnnxBindings {
|
|||||||
static SherpaOnnxFreeWave? freeWave;
|
static SherpaOnnxFreeWave? freeWave;
|
||||||
|
|
||||||
static void init(DynamicLibrary dynamicLibrary) {
|
static void init(DynamicLibrary dynamicLibrary) {
|
||||||
|
createVoiceActivityDetector ??= dynamicLibrary
|
||||||
|
.lookup<NativeFunction<SherpaOnnxCreateVoiceActivityDetectorNative>>(
|
||||||
|
'SherpaOnnxCreateVoiceActivityDetector')
|
||||||
|
.asFunction();
|
||||||
|
|
||||||
|
destroyVoiceActivityDetector ??= dynamicLibrary
|
||||||
|
.lookup<NativeFunction<SherpaOnnxDestroyVoiceActivityDetectorNative>>(
|
||||||
|
'SherpaOnnxDestroyVoiceActivityDetector')
|
||||||
|
.asFunction();
|
||||||
|
|
||||||
|
voiceActivityDetectorAcceptWaveform ??= dynamicLibrary
|
||||||
|
.lookup<
|
||||||
|
NativeFunction<
|
||||||
|
SherpaOnnxVoiceActivityDetectorAcceptWaveformNative>>(
|
||||||
|
'SherpaOnnxVoiceActivityDetectorAcceptWaveform')
|
||||||
|
.asFunction();
|
||||||
|
|
||||||
|
voiceActivityDetectorEmpty ??= dynamicLibrary
|
||||||
|
.lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorEmptyNative>>(
|
||||||
|
'SherpaOnnxVoiceActivityDetectorEmpty')
|
||||||
|
.asFunction();
|
||||||
|
|
||||||
|
voiceActivityDetectorDetected ??= dynamicLibrary
|
||||||
|
.lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorDetectedNative>>(
|
||||||
|
'SherpaOnnxVoiceActivityDetectorDetected')
|
||||||
|
.asFunction();
|
||||||
|
|
||||||
|
voiceActivityDetectorPop ??= dynamicLibrary
|
||||||
|
.lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorPopNative>>(
|
||||||
|
'SherpaOnnxVoiceActivityDetectorPop')
|
||||||
|
.asFunction();
|
||||||
|
|
||||||
|
voiceActivityDetectorClear ??= dynamicLibrary
|
||||||
|
.lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorClearNative>>(
|
||||||
|
'SherpaOnnxVoiceActivityDetectorClear')
|
||||||
|
.asFunction();
|
||||||
|
|
||||||
|
voiceActivityDetectorFront ??= dynamicLibrary
|
||||||
|
.lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorFrontNative>>(
|
||||||
|
'SherpaOnnxVoiceActivityDetectorFront')
|
||||||
|
.asFunction();
|
||||||
|
|
||||||
|
destroySpeechSegment ??= dynamicLibrary
|
||||||
|
.lookup<NativeFunction<SherpaOnnxDestroySpeechSegmentNative>>(
|
||||||
|
'SherpaOnnxDestroySpeechSegment')
|
||||||
|
.asFunction();
|
||||||
|
|
||||||
|
voiceActivityDetectorReset ??= dynamicLibrary
|
||||||
|
.lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorResetNative>>(
|
||||||
|
'SherpaOnnxVoiceActivityDetectorReset')
|
||||||
|
.asFunction();
|
||||||
|
|
||||||
|
createCircularBuffer ??= dynamicLibrary
|
||||||
|
.lookup<NativeFunction<SherpaOnnxCreateCircularBufferNative>>(
|
||||||
|
'SherpaOnnxCreateCircularBuffer')
|
||||||
|
.asFunction();
|
||||||
|
|
||||||
|
destroyCircularBuffer ??= dynamicLibrary
|
||||||
|
.lookup<NativeFunction<SherpaOnnxDestroyCircularBufferNative>>(
|
||||||
|
'SherpaOnnxDestroyCircularBuffer')
|
||||||
|
.asFunction();
|
||||||
|
|
||||||
|
circularBufferPush ??= dynamicLibrary
|
||||||
|
.lookup<NativeFunction<SherpaOnnxCircularBufferPushNative>>(
|
||||||
|
'SherpaOnnxCircularBufferPush')
|
||||||
|
.asFunction();
|
||||||
|
|
||||||
|
circularBufferGet ??= dynamicLibrary
|
||||||
|
.lookup<NativeFunction<SherpaOnnxCircularBufferGetNative>>(
|
||||||
|
'SherpaOnnxCircularBufferGet')
|
||||||
|
.asFunction();
|
||||||
|
|
||||||
|
circularBufferFree ??= dynamicLibrary
|
||||||
|
.lookup<NativeFunction<SherpaOnnxCircularBufferFreeNative>>(
|
||||||
|
'SherpaOnnxCircularBufferFree')
|
||||||
|
.asFunction();
|
||||||
|
|
||||||
|
circularBufferPop ??= dynamicLibrary
|
||||||
|
.lookup<NativeFunction<SherpaOnnxCircularBufferPopNative>>(
|
||||||
|
'SherpaOnnxCircularBufferPop')
|
||||||
|
.asFunction();
|
||||||
|
|
||||||
|
circularBufferSize ??= dynamicLibrary
|
||||||
|
.lookup<NativeFunction<SherpaOnnxCircularBufferSizeNative>>(
|
||||||
|
'SherpaOnnxCircularBufferSize')
|
||||||
|
.asFunction();
|
||||||
|
|
||||||
|
circularBufferHead ??= dynamicLibrary
|
||||||
|
.lookup<NativeFunction<SherpaOnnxCircularBufferHeadNative>>(
|
||||||
|
'SherpaOnnxCircularBufferHead')
|
||||||
|
.asFunction();
|
||||||
|
|
||||||
|
circularBufferReset ??= dynamicLibrary
|
||||||
|
.lookup<NativeFunction<SherpaOnnxCircularBufferResetNative>>(
|
||||||
|
'SherpaOnnxCircularBufferReset')
|
||||||
|
.asFunction();
|
||||||
|
|
||||||
createSpeakerEmbeddingExtractor ??= dynamicLibrary
|
createSpeakerEmbeddingExtractor ??= dynamicLibrary
|
||||||
.lookup<NativeFunction<SherpaOnnxCreateSpeakerEmbeddingExtractor>>(
|
.lookup<
|
||||||
|
NativeFunction<
|
||||||
|
SherpaOnnxCreateSpeakerEmbeddingExtractorNative>>(
|
||||||
'SherpaOnnxCreateSpeakerEmbeddingExtractor')
|
'SherpaOnnxCreateSpeakerEmbeddingExtractor')
|
||||||
.asFunction();
|
.asFunction();
|
||||||
|
|
||||||
|
|||||||
@@ -2,19 +2,20 @@
|
|||||||
import 'dart:ffi';
|
import 'dart:ffi';
|
||||||
import 'dart:typed_data';
|
import 'dart:typed_data';
|
||||||
import 'package:ffi/ffi.dart';
|
import 'package:ffi/ffi.dart';
|
||||||
import "./sherpa_onnx_bindings.dart";
|
|
||||||
import "./online_stream.dart";
|
import './online_stream.dart';
|
||||||
|
import './sherpa_onnx_bindings.dart';
|
||||||
|
|
||||||
class SpeakerEmbeddingExtractorConfig {
|
class SpeakerEmbeddingExtractorConfig {
|
||||||
const SpeakerEmbeddingExtractorConfig(
|
const SpeakerEmbeddingExtractorConfig(
|
||||||
{required this.model,
|
{required this.model,
|
||||||
this.numThreads = 1,
|
this.numThreads = 1,
|
||||||
this.debug = true,
|
this.debug = true,
|
||||||
this.provider = "cpu"});
|
this.provider = 'cpu'});
|
||||||
|
|
||||||
@override
|
@override
|
||||||
String toString() {
|
String toString() {
|
||||||
return "SpeakerEmbeddingExtractorConfig(model: $model, numThreads: $numThreads, debug: $debug, provider: $provider)";
|
return 'SpeakerEmbeddingExtractorConfig(model: $model, numThreads: $numThreads, debug: $debug, provider: $provider)';
|
||||||
}
|
}
|
||||||
|
|
||||||
final String model;
|
final String model;
|
||||||
@@ -116,7 +117,7 @@ class SpeakerEmbeddingManager {
|
|||||||
|
|
||||||
/// Return true if added successfully; return false otherwise
|
/// Return true if added successfully; return false otherwise
|
||||||
bool add({required String name, required Float32List embedding}) {
|
bool add({required String name, required Float32List embedding}) {
|
||||||
assert(embedding.length == this.dim, "${embedding.length} vs ${this.dim}");
|
assert(embedding.length == this.dim, '${embedding.length} vs ${this.dim}');
|
||||||
|
|
||||||
final Pointer<Utf8> namePtr = name.toNativeUtf8();
|
final Pointer<Utf8> namePtr = name.toNativeUtf8();
|
||||||
final int n = embedding.length;
|
final int n = embedding.length;
|
||||||
@@ -145,7 +146,7 @@ class SpeakerEmbeddingManager {
|
|||||||
|
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
for (final e in embeddingList) {
|
for (final e in embeddingList) {
|
||||||
assert(e.length == this.dim, "${e.length} vs ${this.dim}");
|
assert(e.length == this.dim, '${e.length} vs ${this.dim}');
|
||||||
|
|
||||||
pList.setAll(offset, e);
|
pList.setAll(offset, e);
|
||||||
offset += this.dim;
|
offset += this.dim;
|
||||||
|
|||||||
213
sherpa-onnx/flutter/lib/src/vad.dart
Normal file
213
sherpa-onnx/flutter/lib/src/vad.dart
Normal file
@@ -0,0 +1,213 @@
|
|||||||
|
// Copyright (c) 2024 Xiaomi Corporation
|
||||||
|
import 'dart:ffi';
|
||||||
|
import 'dart:typed_data';
|
||||||
|
import 'package:ffi/ffi.dart';
|
||||||
|
|
||||||
|
import './sherpa_onnx_bindings.dart';
|
||||||
|
|
||||||
|
class SileroVadModelConfig {
|
||||||
|
const SileroVadModelConfig(
|
||||||
|
{this.model = '',
|
||||||
|
this.threshold = 0.5,
|
||||||
|
this.minSilenceDuration = 0.5,
|
||||||
|
this.minSpeechDuration = 0.25,
|
||||||
|
this.windowSize = 512});
|
||||||
|
|
||||||
|
@override
|
||||||
|
String toString() {
|
||||||
|
return 'SileroVadModelConfig(model: $model, threshold: $threshold, minSilenceDuration: $minSilenceDuration, minSpeechDuration: $minSpeechDuration, windowSize: $windowSize)';
|
||||||
|
}
|
||||||
|
|
||||||
|
final String model;
|
||||||
|
final double threshold;
|
||||||
|
final double minSilenceDuration;
|
||||||
|
final double minSpeechDuration;
|
||||||
|
final int windowSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
class VadModelConfig {
|
||||||
|
VadModelConfig(
|
||||||
|
{this.sileroVad = const SileroVadModelConfig(),
|
||||||
|
this.sampleRate = 16000,
|
||||||
|
this.numThreads = 1,
|
||||||
|
this.provider = 'cpu',
|
||||||
|
this.debug = true});
|
||||||
|
|
||||||
|
@override
|
||||||
|
String toString() {
|
||||||
|
return 'VadModelConfig(sileroVad: $sileroVad, sampleRate: $sampleRate, numThreads: $numThreads, provider: $provider, debug: $debug)';
|
||||||
|
}
|
||||||
|
|
||||||
|
final SileroVadModelConfig sileroVad;
|
||||||
|
final int sampleRate;
|
||||||
|
final int numThreads;
|
||||||
|
final String provider;
|
||||||
|
final bool debug;
|
||||||
|
}
|
||||||
|
|
||||||
|
class SpeechSegment {
|
||||||
|
SpeechSegment({required this.samples, required this.start});
|
||||||
|
final Float32List samples;
|
||||||
|
final int start;
|
||||||
|
}
|
||||||
|
|
||||||
|
class CircularBuffer {
|
||||||
|
CircularBuffer._({required this.ptr});
|
||||||
|
|
||||||
|
/// The user has to invoke CircularBuffer.free() on the returned instance
|
||||||
|
/// to avoid memory leak.
|
||||||
|
factory CircularBuffer({required int capacity}) {
|
||||||
|
assert(capacity > 0, 'capacity is $capacity');
|
||||||
|
final p =
|
||||||
|
SherpaOnnxBindings.createCircularBuffer?.call(capacity) ?? nullptr;
|
||||||
|
|
||||||
|
return CircularBuffer._(ptr: p);
|
||||||
|
}
|
||||||
|
|
||||||
|
void free() {
|
||||||
|
SherpaOnnxBindings.destroyCircularBuffer?.call(ptr);
|
||||||
|
ptr = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void push(Float32List data) {
|
||||||
|
final n = data.length;
|
||||||
|
final Pointer<Float> p = calloc<Float>(n);
|
||||||
|
|
||||||
|
final pList = p.asTypedList(n);
|
||||||
|
pList.setAll(0, data);
|
||||||
|
|
||||||
|
SherpaOnnxBindings.circularBufferPush?.call(this.ptr, p, n);
|
||||||
|
|
||||||
|
calloc.free(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
Float32List get({required int startIndex, required int n}) {
|
||||||
|
final Pointer<Float> p =
|
||||||
|
SherpaOnnxBindings.circularBufferGet?.call(this.ptr, startIndex, n) ??
|
||||||
|
nullptr;
|
||||||
|
|
||||||
|
if (p == nullptr) {
|
||||||
|
return Float32List(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
final pList = p.asTypedList(n);
|
||||||
|
final Float32List ans = Float32List.fromList(pList);
|
||||||
|
|
||||||
|
SherpaOnnxBindings.circularBufferFree?.call(p);
|
||||||
|
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
|
|
||||||
|
void pop(int n) {
|
||||||
|
SherpaOnnxBindings.circularBufferPop?.call(this.ptr, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
void reset() {
|
||||||
|
SherpaOnnxBindings.circularBufferReset?.call(this.ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
int get size => SherpaOnnxBindings.circularBufferSize?.call(this.ptr) ?? 0;
|
||||||
|
int get head => SherpaOnnxBindings.circularBufferHead?.call(this.ptr) ?? 0;
|
||||||
|
|
||||||
|
Pointer<SherpaOnnxCircularBuffer> ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
class VoiceActivityDetector {
|
||||||
|
VoiceActivityDetector._({required this.ptr});
|
||||||
|
|
||||||
|
// The user has to invoke VoiceActivityDetector.free() to avoid memory leak.
|
||||||
|
factory VoiceActivityDetector(
|
||||||
|
{required VadModelConfig config, required double bufferSizeInSeconds}) {
|
||||||
|
final c = calloc<SherpaOnnxVadModelConfig>();
|
||||||
|
|
||||||
|
final modelPtr = config.sileroVad.model.toNativeUtf8();
|
||||||
|
c.ref.sileroVad.model = modelPtr;
|
||||||
|
|
||||||
|
c.ref.sileroVad.threshold = config.sileroVad.threshold;
|
||||||
|
c.ref.sileroVad.minSilenceDuration = config.sileroVad.minSilenceDuration;
|
||||||
|
c.ref.sileroVad.minSpeechDuration = config.sileroVad.minSpeechDuration;
|
||||||
|
c.ref.sileroVad.windowSize = config.sileroVad.windowSize;
|
||||||
|
|
||||||
|
c.ref.sampleRate = config.sampleRate;
|
||||||
|
c.ref.numThreads = config.numThreads;
|
||||||
|
|
||||||
|
final providerPtr = config.provider.toNativeUtf8();
|
||||||
|
c.ref.provider = providerPtr;
|
||||||
|
|
||||||
|
c.ref.debug = config.debug ? 1 : 0;
|
||||||
|
|
||||||
|
final ptr = SherpaOnnxBindings.createVoiceActivityDetector
|
||||||
|
?.call(c, bufferSizeInSeconds) ??
|
||||||
|
nullptr;
|
||||||
|
|
||||||
|
calloc.free(providerPtr);
|
||||||
|
calloc.free(modelPtr);
|
||||||
|
calloc.free(c);
|
||||||
|
|
||||||
|
return VoiceActivityDetector._(ptr: ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void free() {
|
||||||
|
SherpaOnnxBindings.destroyVoiceActivityDetector?.call(ptr);
|
||||||
|
ptr = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void acceptWaveform(Float32List samples) {
|
||||||
|
final n = samples.length;
|
||||||
|
final Pointer<Float> p = calloc<Float>(n);
|
||||||
|
|
||||||
|
final pList = p.asTypedList(n);
|
||||||
|
pList.setAll(0, samples);
|
||||||
|
|
||||||
|
SherpaOnnxBindings.voiceActivityDetectorAcceptWaveform
|
||||||
|
?.call(this.ptr, p, n);
|
||||||
|
|
||||||
|
calloc.free(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isEmpty() {
|
||||||
|
final int empty =
|
||||||
|
SherpaOnnxBindings.voiceActivityDetectorEmpty?.call(this.ptr) ?? 0;
|
||||||
|
|
||||||
|
return empty == 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isDetected() {
|
||||||
|
final int detected =
|
||||||
|
SherpaOnnxBindings.voiceActivityDetectorDetected?.call(this.ptr) ?? 0;
|
||||||
|
|
||||||
|
return detected == 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void pop() {
|
||||||
|
SherpaOnnxBindings.voiceActivityDetectorPop?.call(this.ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear() {
|
||||||
|
SherpaOnnxBindings.voiceActivityDetectorClear?.call(this.ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
SpeechSegment front() {
|
||||||
|
final Pointer<SherpaOnnxSpeechSegment> segment =
|
||||||
|
SherpaOnnxBindings.voiceActivityDetectorFront?.call(this.ptr) ??
|
||||||
|
nullptr;
|
||||||
|
if (segment == nullptr) {
|
||||||
|
return SpeechSegment(samples: Float32List(0), start: 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
final sampleList = segment.ref.samples.asTypedList(segment.ref.n);
|
||||||
|
final start = segment.ref.start;
|
||||||
|
|
||||||
|
final samples = Float32List.fromList(sampleList);
|
||||||
|
|
||||||
|
SherpaOnnxBindings.destroySpeechSegment?.call(segment);
|
||||||
|
|
||||||
|
return SpeechSegment(samples: samples, start: start);
|
||||||
|
}
|
||||||
|
|
||||||
|
void reset() {
|
||||||
|
SherpaOnnxBindings.voiceActivityDetectorReset?.call(this.ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
Pointer<SherpaOnnxVoiceActivityDetector> ptr;
|
||||||
|
}
|
||||||
@@ -2,7 +2,8 @@
|
|||||||
import 'dart:ffi';
|
import 'dart:ffi';
|
||||||
import 'dart:typed_data';
|
import 'dart:typed_data';
|
||||||
import 'package:ffi/ffi.dart';
|
import 'package:ffi/ffi.dart';
|
||||||
import "./sherpa_onnx_bindings.dart";
|
|
||||||
|
import './sherpa_onnx_bindings.dart';
|
||||||
|
|
||||||
class WaveData {
|
class WaveData {
|
||||||
WaveData({required this.samples, required this.sampleRate});
|
WaveData({required this.samples, required this.sampleRate});
|
||||||
|
|||||||
Reference in New Issue
Block a user