2024-05-21 20:38:52 +08:00
|
|
|
// Copyright (c) 2024 Xiaomi Corporation
|
|
|
|
|
import 'dart:ffi';
|
|
|
|
|
import 'dart:typed_data';
|
|
|
|
|
import 'package:ffi/ffi.dart';
|
2024-05-22 21:56:21 +08:00
|
|
|
|
|
|
|
|
import './online_stream.dart';
|
|
|
|
|
import './sherpa_onnx_bindings.dart';
|
2024-05-21 20:38:52 +08:00
|
|
|
|
|
|
|
|
class SpeakerEmbeddingExtractorConfig {
|
|
|
|
|
const SpeakerEmbeddingExtractorConfig(
|
|
|
|
|
{required this.model,
|
|
|
|
|
this.numThreads = 1,
|
|
|
|
|
this.debug = true,
|
2024-05-22 21:56:21 +08:00
|
|
|
this.provider = 'cpu'});
|
2024-05-21 20:38:52 +08:00
|
|
|
|
|
|
|
|
@override
|
|
|
|
|
String toString() {
|
2024-05-22 21:56:21 +08:00
|
|
|
return 'SpeakerEmbeddingExtractorConfig(model: $model, numThreads: $numThreads, debug: $debug, provider: $provider)';
|
2024-05-21 20:38:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
final String model;
|
|
|
|
|
final int numThreads;
|
|
|
|
|
final bool debug;
|
|
|
|
|
final String provider;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
class SpeakerEmbeddingExtractor {
|
|
|
|
|
SpeakerEmbeddingExtractor._({required this.ptr, required this.dim});
|
|
|
|
|
|
|
|
|
|
/// The user is responsible to call the SpeakerEmbeddingExtractor.free()
|
|
|
|
|
/// method of the returned instance to avoid memory leak.
|
|
|
|
|
factory SpeakerEmbeddingExtractor(
|
|
|
|
|
{required SpeakerEmbeddingExtractorConfig config}) {
|
|
|
|
|
final c = calloc<SherpaOnnxSpeakerEmbeddingExtractorConfig>();
|
|
|
|
|
|
|
|
|
|
final modelPtr = config.model.toNativeUtf8();
|
|
|
|
|
c.ref.model = modelPtr;
|
|
|
|
|
|
|
|
|
|
c.ref.numThreads = config.numThreads;
|
|
|
|
|
c.ref.debug = config.debug ? 1 : 0;
|
|
|
|
|
|
|
|
|
|
final providerPtr = config.provider.toNativeUtf8();
|
|
|
|
|
c.ref.provider = providerPtr;
|
|
|
|
|
|
|
|
|
|
final ptr =
|
|
|
|
|
SherpaOnnxBindings.createSpeakerEmbeddingExtractor?.call(c) ?? nullptr;
|
|
|
|
|
|
|
|
|
|
calloc.free(providerPtr);
|
|
|
|
|
calloc.free(modelPtr);
|
|
|
|
|
calloc.free(c);
|
|
|
|
|
|
|
|
|
|
final dim = SherpaOnnxBindings.speakerEmbeddingExtractorDim?.call(ptr) ?? 0;
|
|
|
|
|
|
|
|
|
|
return SpeakerEmbeddingExtractor._(ptr: ptr, dim: dim);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void free() {
|
|
|
|
|
SherpaOnnxBindings.destroySpeakerEmbeddingExtractor?.call(ptr);
|
|
|
|
|
ptr = nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// The user has to invoke stream.free() on the returned instance
|
|
|
|
|
/// to avoid memory leak
|
|
|
|
|
OnlineStream createStream() {
|
|
|
|
|
final p =
|
|
|
|
|
SherpaOnnxBindings.speakerEmbeddingExtractorCreateStream?.call(ptr) ??
|
|
|
|
|
nullptr;
|
|
|
|
|
|
|
|
|
|
return OnlineStream(ptr: p);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool isReady(OnlineStream stream) {
|
|
|
|
|
final int ready = SherpaOnnxBindings.speakerEmbeddingExtractorIsReady
|
2024-06-17 11:57:38 +08:00
|
|
|
?.call(ptr, stream.ptr) ??
|
2024-05-21 20:38:52 +08:00
|
|
|
0;
|
|
|
|
|
return ready == 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Float32List compute(OnlineStream stream) {
|
|
|
|
|
final Pointer<Float> embedding = SherpaOnnxBindings
|
|
|
|
|
.speakerEmbeddingExtractorComputeEmbedding
|
2024-06-17 11:57:38 +08:00
|
|
|
?.call(ptr, stream.ptr) ??
|
2024-05-21 20:38:52 +08:00
|
|
|
nullptr;
|
|
|
|
|
|
|
|
|
|
if (embedding == nullptr) {
|
|
|
|
|
return Float32List(0);
|
|
|
|
|
}
|
|
|
|
|
|
2024-06-17 11:57:38 +08:00
|
|
|
final embeddingList = embedding.asTypedList(dim);
|
|
|
|
|
final ans = Float32List(dim);
|
2024-05-21 20:38:52 +08:00
|
|
|
ans.setAll(0, embeddingList);
|
|
|
|
|
|
|
|
|
|
SherpaOnnxBindings.speakerEmbeddingExtractorDestroyEmbedding
|
|
|
|
|
?.call(embedding);
|
|
|
|
|
|
|
|
|
|
return ans;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Pointer<SherpaOnnxSpeakerEmbeddingExtractor> ptr;
|
|
|
|
|
final int dim;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
class SpeakerEmbeddingManager {
|
|
|
|
|
SpeakerEmbeddingManager._({required this.ptr, required this.dim});
|
|
|
|
|
|
|
|
|
|
// The user has to use SpeakerEmbeddingManager.free() to avoid memory leak
|
|
|
|
|
factory SpeakerEmbeddingManager(int dim) {
|
|
|
|
|
final p =
|
|
|
|
|
SherpaOnnxBindings.createSpeakerEmbeddingManager?.call(dim) ?? nullptr;
|
|
|
|
|
return SpeakerEmbeddingManager._(ptr: p, dim: dim);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void free() {
|
2024-06-17 11:57:38 +08:00
|
|
|
SherpaOnnxBindings.destroySpeakerEmbeddingManager?.call(ptr);
|
|
|
|
|
ptr = nullptr;
|
2024-05-21 20:38:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Return true if added successfully; return false otherwise
|
|
|
|
|
bool add({required String name, required Float32List embedding}) {
|
2024-06-17 11:57:38 +08:00
|
|
|
assert(embedding.length == dim, '${embedding.length} vs $dim');
|
2024-05-21 20:38:52 +08:00
|
|
|
|
|
|
|
|
final Pointer<Utf8> namePtr = name.toNativeUtf8();
|
|
|
|
|
final int n = embedding.length;
|
|
|
|
|
|
|
|
|
|
final Pointer<Float> p = calloc<Float>(n);
|
|
|
|
|
final pList = p.asTypedList(n);
|
|
|
|
|
pList.setAll(0, embedding);
|
|
|
|
|
|
2024-06-17 11:57:38 +08:00
|
|
|
final int ok =
|
|
|
|
|
SherpaOnnxBindings.speakerEmbeddingManagerAdd?.call(ptr, namePtr, p) ??
|
|
|
|
|
0;
|
2024-05-21 20:38:52 +08:00
|
|
|
|
|
|
|
|
calloc.free(p);
|
|
|
|
|
calloc.free(namePtr);
|
|
|
|
|
|
|
|
|
|
return ok == 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool addMulti(
|
|
|
|
|
{required String name, required List<Float32List> embeddingList}) {
|
|
|
|
|
final Pointer<Utf8> namePtr = name.toNativeUtf8();
|
|
|
|
|
final int n = embeddingList.length;
|
|
|
|
|
|
2024-06-17 11:57:38 +08:00
|
|
|
final Pointer<Float> p = calloc<Float>(n * dim);
|
|
|
|
|
final pList = p.asTypedList(n * dim);
|
2024-05-21 20:38:52 +08:00
|
|
|
|
|
|
|
|
int offset = 0;
|
|
|
|
|
for (final e in embeddingList) {
|
2024-06-17 11:57:38 +08:00
|
|
|
assert(e.length == dim, '${e.length} vs $dim');
|
2024-05-21 20:38:52 +08:00
|
|
|
|
|
|
|
|
pList.setAll(offset, e);
|
2024-06-17 11:57:38 +08:00
|
|
|
offset += dim;
|
2024-05-21 20:38:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
final int ok = SherpaOnnxBindings.speakerEmbeddingManagerAddListFlattened
|
2024-06-17 11:57:38 +08:00
|
|
|
?.call(ptr, namePtr, p, n) ??
|
2024-05-21 20:38:52 +08:00
|
|
|
0;
|
|
|
|
|
|
|
|
|
|
calloc.free(p);
|
|
|
|
|
calloc.free(namePtr);
|
|
|
|
|
|
|
|
|
|
return ok == 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool contains(String name) {
|
|
|
|
|
final Pointer<Utf8> namePtr = name.toNativeUtf8();
|
|
|
|
|
|
|
|
|
|
final int found = SherpaOnnxBindings.speakerEmbeddingManagerContains
|
2024-06-17 11:57:38 +08:00
|
|
|
?.call(ptr, namePtr) ??
|
2024-05-21 20:38:52 +08:00
|
|
|
0;
|
|
|
|
|
|
|
|
|
|
calloc.free(namePtr);
|
|
|
|
|
|
|
|
|
|
return found == 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool remove(String name) {
|
|
|
|
|
final Pointer<Utf8> namePtr = name.toNativeUtf8();
|
|
|
|
|
|
2024-06-17 11:57:38 +08:00
|
|
|
final int ok =
|
|
|
|
|
SherpaOnnxBindings.speakerEmbeddingManagerRemove?.call(ptr, namePtr) ??
|
|
|
|
|
0;
|
2024-05-21 20:38:52 +08:00
|
|
|
|
|
|
|
|
calloc.free(namePtr);
|
|
|
|
|
|
|
|
|
|
return ok == 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Return an empty string if no speaker is found
|
|
|
|
|
String search({required Float32List embedding, required double threshold}) {
|
2024-06-17 11:57:38 +08:00
|
|
|
assert(embedding.length == dim);
|
2024-05-21 20:38:52 +08:00
|
|
|
|
2024-06-17 11:57:38 +08:00
|
|
|
final Pointer<Float> p = calloc<Float>(dim);
|
|
|
|
|
final pList = p.asTypedList(dim);
|
2024-05-21 20:38:52 +08:00
|
|
|
pList.setAll(0, embedding);
|
|
|
|
|
|
|
|
|
|
final Pointer<Utf8> name = SherpaOnnxBindings.speakerEmbeddingManagerSearch
|
2024-06-17 11:57:38 +08:00
|
|
|
?.call(ptr, p, threshold) ??
|
2024-05-21 20:38:52 +08:00
|
|
|
nullptr;
|
|
|
|
|
|
|
|
|
|
calloc.free(p);
|
|
|
|
|
|
|
|
|
|
if (name == nullptr) {
|
|
|
|
|
return '';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
final String ans = name.toDartString();
|
|
|
|
|
|
|
|
|
|
SherpaOnnxBindings.speakerEmbeddingManagerFreeSearch?.call(name);
|
|
|
|
|
|
|
|
|
|
return ans;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool verify(
|
|
|
|
|
{required String name,
|
|
|
|
|
required Float32List embedding,
|
|
|
|
|
required double threshold}) {
|
2024-06-17 11:57:38 +08:00
|
|
|
assert(embedding.length == dim);
|
2024-05-21 20:38:52 +08:00
|
|
|
|
|
|
|
|
final Pointer<Utf8> namePtr = name.toNativeUtf8();
|
|
|
|
|
|
2024-06-17 11:57:38 +08:00
|
|
|
final Pointer<Float> p = calloc<Float>(dim);
|
|
|
|
|
final pList = p.asTypedList(dim);
|
2024-05-21 20:38:52 +08:00
|
|
|
pList.setAll(0, embedding);
|
|
|
|
|
|
|
|
|
|
final int ok = SherpaOnnxBindings.speakerEmbeddingManagerVerify
|
2024-06-17 11:57:38 +08:00
|
|
|
?.call(ptr, namePtr, p, threshold) ??
|
2024-05-21 20:38:52 +08:00
|
|
|
0;
|
|
|
|
|
|
|
|
|
|
calloc.free(p);
|
|
|
|
|
calloc.free(namePtr);
|
|
|
|
|
|
|
|
|
|
return ok == 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int get numSpeakers =>
|
2024-06-17 11:57:38 +08:00
|
|
|
SherpaOnnxBindings.speakerEmbeddingManagerNumSpeakers?.call(ptr) ?? 0;
|
2024-05-21 20:38:52 +08:00
|
|
|
|
|
|
|
|
List<String> get allSpeakerNames {
|
2024-06-17 11:57:38 +08:00
|
|
|
int n = numSpeakers;
|
2024-05-21 20:38:52 +08:00
|
|
|
if (n == 0) {
|
|
|
|
|
return <String>[];
|
|
|
|
|
}
|
|
|
|
|
|
2024-06-17 11:57:38 +08:00
|
|
|
final Pointer<Pointer<Utf8>> names =
|
|
|
|
|
SherpaOnnxBindings.speakerEmbeddingManagerGetAllSpeakers?.call(ptr) ??
|
|
|
|
|
nullptr;
|
2024-05-21 20:38:52 +08:00
|
|
|
|
|
|
|
|
if (names == nullptr) {
|
|
|
|
|
return <String>[];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
final ans = <String>[];
|
|
|
|
|
|
|
|
|
|
// see https://api.flutter.dev/flutter/dart-ffi/PointerPointer.html
|
|
|
|
|
for (int i = 0; i != n; ++i) {
|
|
|
|
|
String name = names[i].toDartString();
|
|
|
|
|
ans.add(name);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
SherpaOnnxBindings.speakerEmbeddingManagerFreeAllSpeakers?.call(names);
|
|
|
|
|
|
|
|
|
|
return ans;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Pointer<SherpaOnnxSpeakerEmbeddingManager> ptr;
|
|
|
|
|
final int dim;
|
|
|
|
|
}
|