This repository has been archived on 2025-08-26. You can view files and clone it, but cannot push or open issues or pull requests.
Files
enginex-mr_series-sherpa-onnx/flutter/sherpa_onnx/lib/src/vad.dart
2025-02-25 14:43:48 +08:00

266 lines
7.4 KiB
Dart

// Copyright (c) 2024 Xiaomi Corporation
import 'dart:ffi';
import 'dart:typed_data';
import 'package:ffi/ffi.dart';
import './sherpa_onnx_bindings.dart';
class SileroVadModelConfig {
const SileroVadModelConfig(
{this.model = '',
this.threshold = 0.5,
this.minSilenceDuration = 0.5,
this.minSpeechDuration = 0.25,
this.windowSize = 512,
this.maxSpeechDuration = 5.0});
factory SileroVadModelConfig.fromJson(Map<String, dynamic> json) {
return SileroVadModelConfig(
model: json['model'] as String? ?? '',
threshold: (json['threshold'] as num?)?.toDouble() ?? 0.5,
minSilenceDuration:
(json['minSilenceDuration'] as num?)?.toDouble() ?? 0.5,
minSpeechDuration:
(json['minSpeechDuration'] as num?)?.toDouble() ?? 0.25,
windowSize: json['windowSize'] as int? ?? 512,
maxSpeechDuration: (json['maxSpeechDuration'] as num?)?.toDouble() ?? 5.0,
);
}
@override
String toString() {
return 'SileroVadModelConfig(model: $model, threshold: $threshold, minSilenceDuration: $minSilenceDuration, minSpeechDuration: $minSpeechDuration, windowSize: $windowSize, maxSpeechDuration: $maxSpeechDuration)';
}
Map<String, dynamic> toJson() => {
'model': model,
'threshold': threshold,
'minSilenceDuration': minSilenceDuration,
'minSpeechDuration': minSpeechDuration,
'windowSize': windowSize,
'maxSpeechDuration': maxSpeechDuration,
};
final String model;
final double threshold;
final double minSilenceDuration;
final double minSpeechDuration;
final int windowSize;
final double maxSpeechDuration;
}
class VadModelConfig {
VadModelConfig({
this.sileroVad = const SileroVadModelConfig(),
this.sampleRate = 16000,
this.numThreads = 1,
this.provider = 'cpu',
this.debug = true,
});
final SileroVadModelConfig sileroVad;
final int sampleRate;
final int numThreads;
final String provider;
final bool debug;
factory VadModelConfig.fromJson(Map<String, dynamic> json) {
return VadModelConfig(
sileroVad: SileroVadModelConfig.fromJson(
json['sileroVad'] as Map<String, dynamic>? ?? const {}),
sampleRate: json['sampleRate'] as int? ?? 16000,
numThreads: json['numThreads'] as int? ?? 1,
provider: json['provider'] as String? ?? 'cpu',
debug: json['debug'] as bool? ?? true,
);
}
Map<String, dynamic> toJson() => {
'sileroVad': sileroVad.toJson(),
'sampleRate': sampleRate,
'numThreads': numThreads,
'provider': provider,
'debug': debug,
};
@override
String toString() {
return 'VadModelConfig(sileroVad: $sileroVad, sampleRate: $sampleRate, numThreads: $numThreads, provider: $provider, debug: $debug)';
}
}
class SpeechSegment {
SpeechSegment({required this.samples, required this.start});
final Float32List samples;
final int start;
}
class CircularBuffer {
CircularBuffer.fromPtr({required this.ptr});
CircularBuffer._({required this.ptr});
/// The user has to invoke CircularBuffer.free() on the returned instance
/// to avoid memory leak.
factory CircularBuffer({required int capacity}) {
assert(capacity > 0, 'capacity is $capacity');
final p =
SherpaOnnxBindings.createCircularBuffer?.call(capacity) ?? nullptr;
return CircularBuffer._(ptr: p);
}
void free() {
SherpaOnnxBindings.destroyCircularBuffer?.call(ptr);
ptr = nullptr;
}
void push(Float32List data) {
final n = data.length;
final Pointer<Float> p = calloc<Float>(n);
final pList = p.asTypedList(n);
pList.setAll(0, data);
SherpaOnnxBindings.circularBufferPush?.call(ptr, p, n);
calloc.free(p);
}
Float32List get({required int startIndex, required int n}) {
final Pointer<Float> p =
SherpaOnnxBindings.circularBufferGet?.call(ptr, startIndex, n) ??
nullptr;
if (p == nullptr) {
return Float32List(0);
}
final pList = p.asTypedList(n);
final Float32List ans = Float32List.fromList(pList);
SherpaOnnxBindings.circularBufferFree?.call(p);
return ans;
}
void pop(int n) {
SherpaOnnxBindings.circularBufferPop?.call(ptr, n);
}
void reset() {
SherpaOnnxBindings.circularBufferReset?.call(ptr);
}
int get size => SherpaOnnxBindings.circularBufferSize?.call(ptr) ?? 0;
int get head => SherpaOnnxBindings.circularBufferHead?.call(ptr) ?? 0;
Pointer<SherpaOnnxCircularBuffer> ptr;
}
class VoiceActivityDetector {
VoiceActivityDetector.fromPtr({required this.ptr, required this.config});
VoiceActivityDetector._({required this.ptr, required this.config});
// The user has to invoke VoiceActivityDetector.free() to avoid memory leak.
factory VoiceActivityDetector(
{required VadModelConfig config, required double bufferSizeInSeconds}) {
final c = calloc<SherpaOnnxVadModelConfig>();
final modelPtr = config.sileroVad.model.toNativeUtf8();
c.ref.sileroVad.model = modelPtr;
c.ref.sileroVad.threshold = config.sileroVad.threshold;
c.ref.sileroVad.minSilenceDuration = config.sileroVad.minSilenceDuration;
c.ref.sileroVad.minSpeechDuration = config.sileroVad.minSpeechDuration;
c.ref.sileroVad.windowSize = config.sileroVad.windowSize;
c.ref.sileroVad.maxSpeechDuration = config.sileroVad.maxSpeechDuration;
c.ref.sampleRate = config.sampleRate;
c.ref.numThreads = config.numThreads;
final providerPtr = config.provider.toNativeUtf8();
c.ref.provider = providerPtr;
c.ref.debug = config.debug ? 1 : 0;
final ptr = SherpaOnnxBindings.createVoiceActivityDetector
?.call(c, bufferSizeInSeconds) ??
nullptr;
calloc.free(providerPtr);
calloc.free(modelPtr);
calloc.free(c);
return VoiceActivityDetector._(ptr: ptr, config: config);
}
void free() {
SherpaOnnxBindings.destroyVoiceActivityDetector?.call(ptr);
ptr = nullptr;
}
void acceptWaveform(Float32List samples) {
final n = samples.length;
final Pointer<Float> p = calloc<Float>(n);
final pList = p.asTypedList(n);
pList.setAll(0, samples);
SherpaOnnxBindings.voiceActivityDetectorAcceptWaveform?.call(ptr, p, n);
calloc.free(p);
}
bool isEmpty() {
final int empty =
SherpaOnnxBindings.voiceActivityDetectorEmpty?.call(ptr) ?? 0;
return empty == 1;
}
bool isDetected() {
final int detected =
SherpaOnnxBindings.voiceActivityDetectorDetected?.call(ptr) ?? 0;
return detected == 1;
}
void pop() {
SherpaOnnxBindings.voiceActivityDetectorPop?.call(ptr);
}
void clear() {
SherpaOnnxBindings.voiceActivityDetectorClear?.call(ptr);
}
SpeechSegment front() {
final Pointer<SherpaOnnxSpeechSegment> segment =
SherpaOnnxBindings.voiceActivityDetectorFront?.call(ptr) ?? nullptr;
if (segment == nullptr) {
return SpeechSegment(samples: Float32List(0), start: 0);
}
final sampleList = segment.ref.samples.asTypedList(segment.ref.n);
final start = segment.ref.start;
final samples = Float32List.fromList(sampleList);
SherpaOnnxBindings.destroySpeechSegment?.call(segment);
return SpeechSegment(samples: samples, start: start);
}
void reset() {
SherpaOnnxBindings.voiceActivityDetectorReset?.call(ptr);
}
void flush() {
SherpaOnnxBindings.voiceActivityDetectorFlush?.call(ptr);
}
Pointer<SherpaOnnxVoiceActivityDetector> ptr;
final VadModelConfig config;
}