Dart API for speaker diarization (#1418)
This commit is contained in:
5
.github/scripts/test-dart.sh
vendored
5
.github/scripts/test-dart.sh
vendored
@@ -4,6 +4,11 @@ set -ex
|
||||
|
||||
cd dart-api-examples
|
||||
|
||||
pushd speaker-diarization
|
||||
echo '----------speaker diarization----------'
|
||||
./run.sh
|
||||
popd
|
||||
|
||||
pushd speaker-identification
|
||||
echo '----------3d speaker----------'
|
||||
./run-3d-speaker.sh
|
||||
|
||||
1
.github/workflows/test-dart.yaml
vendored
1
.github/workflows/test-dart.yaml
vendored
@@ -114,6 +114,7 @@ jobs:
|
||||
cp scripts/dart/audio-tagging-pubspec.yaml dart-api-examples/audio-tagging/pubspec.yaml
|
||||
cp scripts/dart/add-punctuations-pubspec.yaml dart-api-examples/add-punctuations/pubspec.yaml
|
||||
cp scripts/dart/speaker-id-pubspec.yaml dart-api-examples/speaker-identification/pubspec.yaml
|
||||
cp scripts/dart/speaker-diarization-pubspec.yaml dart-api-examples/speaker-diarization/pubspec.yaml
|
||||
|
||||
cp scripts/dart/sherpa-onnx-pubspec.yaml flutter/sherpa_onnx/pubspec.yaml
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ https://pub.dev/packages/sherpa_onnx
|
||||
|
||||
| Directory | Description |
|
||||
|-----------|-------------|
|
||||
| [./speaker-diarization](./speaker-diarization)| Example for speaker diarization.|
|
||||
| [./add-punctuations](./add-punctuations)| Example for adding punctuations to text.|
|
||||
| [./audio-tagging](./audio-tagging)| Example for audio tagging.|
|
||||
| [./keyword-spotter](./keyword-spotter)| Example for keyword spotting|
|
||||
|
||||
3
dart-api-examples/speaker-diarization/.gitignore
vendored
Normal file
3
dart-api-examples/speaker-diarization/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
# https://dart.dev/guides/libraries/private-files
|
||||
# Created by `dart pub`
|
||||
.dart_tool/
|
||||
3
dart-api-examples/speaker-diarization/CHANGELOG.md
Normal file
3
dart-api-examples/speaker-diarization/CHANGELOG.md
Normal file
@@ -0,0 +1,3 @@
|
||||
## 1.0.0
|
||||
|
||||
- Initial version.
|
||||
7
dart-api-examples/speaker-diarization/README.md
Normal file
7
dart-api-examples/speaker-diarization/README.md
Normal file
@@ -0,0 +1,7 @@
|
||||
# Introduction
|
||||
|
||||
This example shows how to use the Dart API from sherpa-onnx for speaker diarization.
|
||||
|
||||
# Usage
|
||||
|
||||
Please see [./run.sh](./run.sh)
|
||||
30
dart-api-examples/speaker-diarization/analysis_options.yaml
Normal file
30
dart-api-examples/speaker-diarization/analysis_options.yaml
Normal file
@@ -0,0 +1,30 @@
|
||||
# This file configures the static analysis results for your project (errors,
|
||||
# warnings, and lints).
|
||||
#
|
||||
# This enables the 'recommended' set of lints from `package:lints`.
|
||||
# This set helps identify many issues that may lead to problems when running
|
||||
# or consuming Dart code, and enforces writing Dart using a single, idiomatic
|
||||
# style and format.
|
||||
#
|
||||
# If you want a smaller set of lints you can change this to specify
|
||||
# 'package:lints/core.yaml'. These are just the most critical lints
|
||||
# (the recommended set includes the core lints).
|
||||
# The core lints are also what is used by pub.dev for scoring packages.
|
||||
|
||||
include: package:lints/recommended.yaml
|
||||
|
||||
# Uncomment the following section to specify additional rules.
|
||||
|
||||
# linter:
|
||||
# rules:
|
||||
# - camel_case_types
|
||||
|
||||
# analyzer:
|
||||
# exclude:
|
||||
# - path/to/excluded/files/**
|
||||
|
||||
# For more information about the core and recommended set of lints, see
|
||||
# https://dart.dev/go/core-lints
|
||||
|
||||
# For additional information about configuring this file, see
|
||||
# https://dart.dev/guides/language/analysis-options
|
||||
1
dart-api-examples/speaker-diarization/bin/init.dart
Symbolic link
1
dart-api-examples/speaker-diarization/bin/init.dart
Symbolic link
@@ -0,0 +1 @@
|
||||
../../vad/bin/init.dart
|
||||
@@ -0,0 +1,100 @@
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
import 'dart:io';
|
||||
import 'dart:typed_data';
|
||||
import 'dart:ffi';
|
||||
|
||||
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
|
||||
import './init.dart';
|
||||
|
||||
void main(List<String> arguments) async {
|
||||
await initSherpaOnnx();
|
||||
|
||||
/* Please use the following commands to download files used in this file
|
||||
Step 1: Download a speaker segmentation model
|
||||
|
||||
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
|
||||
for a list of available models. The following is an example
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
|
||||
Step 2: Download a speaker embedding extractor model
|
||||
|
||||
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
|
||||
for a list of available models. The following is an example
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
||||
|
||||
Step 3. Download test wave files
|
||||
|
||||
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
|
||||
for a list of available test wave files. The following is an example
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
|
||||
|
||||
Step 4. Run it
|
||||
*/
|
||||
|
||||
final segmentationModel =
|
||||
"./sherpa-onnx-pyannote-segmentation-3-0/model.onnx";
|
||||
|
||||
final embeddingModel =
|
||||
"./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";
|
||||
|
||||
final waveFilename = "./0-four-speakers-zh.wav";
|
||||
|
||||
final segmentationConfig = sherpa_onnx.OfflineSpeakerSegmentationModelConfig(
|
||||
pyannote: sherpa_onnx.OfflineSpeakerSegmentationPyannoteModelConfig(
|
||||
model: segmentationModel),
|
||||
);
|
||||
|
||||
final embeddingConfig =
|
||||
sherpa_onnx.SpeakerEmbeddingExtractorConfig(model: embeddingModel);
|
||||
|
||||
// since we know there are 4 speakers in ./0-four-speakers-zh.wav, we set
|
||||
// numClusters to 4. If you don't know the exact number, please set it to -1.
|
||||
// in that case, you have to set threshold. A larger threshold leads to
|
||||
// fewer clusters, i.e., fewer speakers.
|
||||
final clusteringConfig =
|
||||
sherpa_onnx.FastClusteringConfig(numClusters: 4, threshold: 0.5);
|
||||
|
||||
var config = sherpa_onnx.OfflineSpeakerDiarizationConfig(
|
||||
segmentation: segmentationConfig,
|
||||
embedding: embeddingConfig,
|
||||
clustering: clusteringConfig,
|
||||
minDurationOn: 0.2,
|
||||
minDurationOff: 0.5);
|
||||
|
||||
final sd = sherpa_onnx.OfflineSpeakerDiarization(config);
|
||||
if (sd.ptr == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
final waveData = sherpa_onnx.readWave(waveFilename);
|
||||
if (sd.sampleRate != waveData.sampleRate) {
|
||||
print(
|
||||
'Expected sample rate: ${sd.sampleRate}, given: ${waveData.sampleRate}');
|
||||
return;
|
||||
}
|
||||
|
||||
print('started');
|
||||
|
||||
// Use the following statement if you don't want to use a callback
|
||||
// final segments = sd.process(samples: waveData.samples);
|
||||
|
||||
final segments = sd.processWithCallback(
|
||||
samples: waveData.samples,
|
||||
callback: (int numProcessedChunk, int numTotalChunks) {
|
||||
final progress = 100.0 * numProcessedChunk / numTotalChunks;
|
||||
|
||||
print('Progress ${progress.toStringAsFixed(2)}%');
|
||||
|
||||
return 0;
|
||||
});
|
||||
|
||||
for (int i = 0; i < segments.length; ++i) {
|
||||
print(
|
||||
'${segments[i].start.toStringAsFixed(3)} -- ${segments[i].end.toStringAsFixed(3)} speaker_${segments[i].speaker}');
|
||||
}
|
||||
}
|
||||
17
dart-api-examples/speaker-diarization/pubspec.yaml
Normal file
17
dart-api-examples/speaker-diarization/pubspec.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
name: speaker_diarization
|
||||
description: >
|
||||
This example demonstrates how to use the Dart API for speaker diarization.
|
||||
|
||||
version: 1.0.0
|
||||
|
||||
environment:
|
||||
sdk: ">=3.0.0 <4.0.0"
|
||||
|
||||
dependencies:
|
||||
sherpa_onnx: ^1.10.27
|
||||
# sherpa_onnx:
|
||||
# path: ../../flutter/sherpa_onnx
|
||||
path: ^1.9.0
|
||||
|
||||
dev_dependencies:
|
||||
lints: ^3.0.0
|
||||
21
dart-api-examples/speaker-diarization/run.sh
Executable file
21
dart-api-examples/speaker-diarization/run.sh
Executable file
@@ -0,0 +1,21 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
dart pub get
|
||||
|
||||
if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
fi
|
||||
|
||||
if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
||||
fi
|
||||
|
||||
if [ ! -f ./0-four-speakers-zh.wav ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
|
||||
fi
|
||||
|
||||
dart run ./bin/speaker-diarization.dart
|
||||
@@ -11,6 +11,7 @@
|
||||
|
||||
| Functions | URL | Supported Platforms|
|
||||
|---|---|---|
|
||||
|Speaker diarization| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/speaker-diarization)| macOS, Windows, Linux|
|
||||
|Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/streaming-asr)| macOS, Windows, Linux|
|
||||
|Non-Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/non-streaming-asr)| macOS, Windows, Linux|
|
||||
|Text to speech| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/tts)| macOS, Windows, Linux|
|
||||
|
||||
@@ -6,6 +6,7 @@ export 'src/audio_tagging.dart';
|
||||
export 'src/feature_config.dart';
|
||||
export 'src/keyword_spotter.dart';
|
||||
export 'src/offline_recognizer.dart';
|
||||
export 'src/offline_speaker_diarization.dart';
|
||||
export 'src/offline_stream.dart';
|
||||
export 'src/online_recognizer.dart';
|
||||
export 'src/online_stream.dart';
|
||||
|
||||
243
flutter/sherpa_onnx/lib/src/offline_speaker_diarization.dart
Normal file
243
flutter/sherpa_onnx/lib/src/offline_speaker_diarization.dart
Normal file
@@ -0,0 +1,243 @@
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
import 'dart:ffi';
|
||||
import 'dart:typed_data';
|
||||
|
||||
import 'package:ffi/ffi.dart';
|
||||
|
||||
import './sherpa_onnx_bindings.dart';
|
||||
import './speaker_identification.dart';
|
||||
|
||||
class OfflineSpeakerDiarizationSegment {
|
||||
const OfflineSpeakerDiarizationSegment({
|
||||
required this.start,
|
||||
required this.end,
|
||||
required this.speaker,
|
||||
});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'OfflineSpeakerDiarizationSegment(start: $start, end: $end, speaker: $speaker)';
|
||||
}
|
||||
|
||||
final double start;
|
||||
final double end;
|
||||
final int speaker;
|
||||
}
|
||||
|
||||
class OfflineSpeakerSegmentationPyannoteModelConfig {
|
||||
const OfflineSpeakerSegmentationPyannoteModelConfig({
|
||||
this.model = '',
|
||||
});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'OfflineSpeakerSegmentationPyannoteModelConfig(model: $model)';
|
||||
}
|
||||
|
||||
final String model;
|
||||
}
|
||||
|
||||
class OfflineSpeakerSegmentationModelConfig {
|
||||
const OfflineSpeakerSegmentationModelConfig({
|
||||
this.pyannote = const OfflineSpeakerSegmentationPyannoteModelConfig(),
|
||||
this.numThreads = 1,
|
||||
this.debug = true,
|
||||
this.provider = 'cpu',
|
||||
});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'OfflineSpeakerSegmentationModelConfig(pyannote: $pyannote, numThreads: $numThreads, debug: $debug, provider: $provider)';
|
||||
}
|
||||
|
||||
final OfflineSpeakerSegmentationPyannoteModelConfig pyannote;
|
||||
|
||||
final int numThreads;
|
||||
final bool debug;
|
||||
final String provider;
|
||||
}
|
||||
|
||||
class FastClusteringConfig {
|
||||
const FastClusteringConfig({
|
||||
this.numClusters = -1,
|
||||
this.threshold = 0.5,
|
||||
});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'FastClusteringConfig(numClusters: $numClusters, threshold: $threshold)';
|
||||
}
|
||||
|
||||
final int numClusters;
|
||||
final double threshold;
|
||||
}
|
||||
|
||||
class OfflineSpeakerDiarizationConfig {
|
||||
const OfflineSpeakerDiarizationConfig({
|
||||
this.segmentation = const OfflineSpeakerSegmentationModelConfig(),
|
||||
this.embedding = const SpeakerEmbeddingExtractorConfig(model: ''),
|
||||
this.clustering = const FastClusteringConfig(),
|
||||
this.minDurationOn = 0.2,
|
||||
this.minDurationOff = 0.5,
|
||||
});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'OfflineSpeakerDiarizationConfig(segmentation: $segmentation, embedding: $embedding, clustering: $clustering, minDurationOn: $minDurationOn, minDurationOff: $minDurationOff)';
|
||||
}
|
||||
|
||||
final OfflineSpeakerSegmentationModelConfig segmentation;
|
||||
final SpeakerEmbeddingExtractorConfig embedding;
|
||||
final FastClusteringConfig clustering;
|
||||
final double minDurationOff; // in seconds
|
||||
final double minDurationOn; // in seconds
|
||||
}
|
||||
|
||||
class OfflineSpeakerDiarization {
|
||||
OfflineSpeakerDiarization._(
|
||||
{required this.ptr, required this.config, required this.sampleRate});
|
||||
|
||||
void free() {
|
||||
SherpaOnnxBindings.sherpaOnnxDestroyOfflineSpeakerDiarization?.call(ptr);
|
||||
ptr = nullptr;
|
||||
}
|
||||
|
||||
/// The user is responsible to call the OfflineSpeakerDiarization.free()
|
||||
/// method of the returned instance to avoid memory leak.
|
||||
factory OfflineSpeakerDiarization(OfflineSpeakerDiarizationConfig config) {
|
||||
final c = calloc<SherpaOnnxOfflineSpeakerDiarizationConfig>();
|
||||
|
||||
c.ref.segmentation.pyannote.model =
|
||||
config.segmentation.pyannote.model.toNativeUtf8();
|
||||
c.ref.segmentation.numThreads = config.segmentation.numThreads;
|
||||
c.ref.segmentation.debug = config.segmentation.debug ? 1 : 0;
|
||||
c.ref.segmentation.provider = config.segmentation.provider.toNativeUtf8();
|
||||
|
||||
c.ref.embedding.model = config.embedding.model.toNativeUtf8();
|
||||
c.ref.embedding.numThreads = config.embedding.numThreads;
|
||||
c.ref.embedding.debug = config.embedding.debug ? 1 : 0;
|
||||
c.ref.embedding.provider = config.embedding.provider.toNativeUtf8();
|
||||
|
||||
c.ref.clustering.numClusters = config.clustering.numClusters;
|
||||
c.ref.clustering.threshold = config.clustering.threshold;
|
||||
|
||||
c.ref.minDurationOn = config.minDurationOn;
|
||||
c.ref.minDurationOff = config.minDurationOff;
|
||||
|
||||
final ptr =
|
||||
SherpaOnnxBindings.sherpaOnnxCreateOfflineSpeakerDiarization?.call(c) ??
|
||||
nullptr;
|
||||
|
||||
calloc.free(c.ref.embedding.provider);
|
||||
calloc.free(c.ref.embedding.model);
|
||||
calloc.free(c.ref.segmentation.provider);
|
||||
calloc.free(c.ref.segmentation.pyannote.model);
|
||||
|
||||
int sampleRate = 0;
|
||||
if (ptr != nullptr) {
|
||||
sampleRate = SherpaOnnxBindings
|
||||
.sherpaOnnxOfflineSpeakerDiarizationGetSampleRate
|
||||
?.call(ptr) ??
|
||||
0;
|
||||
}
|
||||
return OfflineSpeakerDiarization._(
|
||||
ptr: ptr, config: config, sampleRate: sampleRate);
|
||||
}
|
||||
|
||||
List<OfflineSpeakerDiarizationSegment> process(
|
||||
{required Float32List samples}) {
|
||||
if (ptr == nullptr) {
|
||||
return <OfflineSpeakerDiarizationSegment>[];
|
||||
}
|
||||
|
||||
final n = samples.length;
|
||||
final Pointer<Float> p = calloc<Float>(n);
|
||||
|
||||
final pList = p.asTypedList(n);
|
||||
pList.setAll(0, samples);
|
||||
|
||||
final r = SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationProcess
|
||||
?.call(ptr, p, n) ??
|
||||
nullptr;
|
||||
|
||||
final ans = _processImpl(r);
|
||||
|
||||
SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationDestroyResult
|
||||
?.call(r);
|
||||
|
||||
return ans;
|
||||
}
|
||||
|
||||
List<OfflineSpeakerDiarizationSegment> processWithCallback({
|
||||
required Float32List samples,
|
||||
required int Function(int numProcessedChunks, int numTotalChunks) callback,
|
||||
}) {
|
||||
if (ptr == nullptr) {
|
||||
return <OfflineSpeakerDiarizationSegment>[];
|
||||
}
|
||||
|
||||
final n = samples.length;
|
||||
final Pointer<Float> p = calloc<Float>(n);
|
||||
|
||||
final pList = p.asTypedList(n);
|
||||
pList.setAll(0, samples);
|
||||
|
||||
final wrapper = NativeCallable<
|
||||
SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative>.isolateLocal(
|
||||
(int numProcessedChunks, int numTotalChunks) {
|
||||
return callback(numProcessedChunks, numTotalChunks);
|
||||
}, exceptionalReturn: 0);
|
||||
|
||||
final r = SherpaOnnxBindings
|
||||
.sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg
|
||||
?.call(ptr, p, n, wrapper.nativeFunction) ??
|
||||
nullptr;
|
||||
|
||||
wrapper.close();
|
||||
|
||||
final ans = _processImpl(r);
|
||||
|
||||
SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationDestroyResult
|
||||
?.call(r);
|
||||
|
||||
return ans;
|
||||
}
|
||||
|
||||
List<OfflineSpeakerDiarizationSegment> _processImpl(
|
||||
Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> r) {
|
||||
if (r == nullptr) {
|
||||
return <OfflineSpeakerDiarizationSegment>[];
|
||||
}
|
||||
|
||||
final numSegments = SherpaOnnxBindings
|
||||
.sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments
|
||||
?.call(r) ??
|
||||
0;
|
||||
final segments = SherpaOnnxBindings
|
||||
.sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime
|
||||
?.call(r) ??
|
||||
nullptr;
|
||||
|
||||
if (segments == nullptr) {
|
||||
return <OfflineSpeakerDiarizationSegment>[];
|
||||
}
|
||||
|
||||
final ans = <OfflineSpeakerDiarizationSegment>[];
|
||||
for (int i = 0; i != numSegments; ++i) {
|
||||
final s = segments + i;
|
||||
|
||||
final tmp = OfflineSpeakerDiarizationSegment(
|
||||
start: s.ref.start, end: s.ref.end, speaker: s.ref.speaker);
|
||||
ans.add(tmp);
|
||||
}
|
||||
|
||||
SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationDestroySegment
|
||||
?.call(segments);
|
||||
|
||||
return ans;
|
||||
}
|
||||
|
||||
Pointer<SherpaOnnxOfflineSpeakerDiarization> ptr;
|
||||
OfflineSpeakerDiarizationConfig config;
|
||||
final int sampleRate;
|
||||
}
|
||||
@@ -2,6 +2,66 @@
|
||||
import 'dart:ffi';
|
||||
import 'package:ffi/ffi.dart';
|
||||
|
||||
final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct {
|
||||
external Pointer<Utf8> model;
|
||||
|
||||
@Int32()
|
||||
external int numThreads;
|
||||
|
||||
@Int32()
|
||||
external int debug;
|
||||
|
||||
external Pointer<Utf8> provider;
|
||||
}
|
||||
|
||||
final class SherpaOnnxOfflineSpeakerDiarizationSegment extends Struct {
|
||||
@Float()
|
||||
external double start;
|
||||
|
||||
@Float()
|
||||
external double end;
|
||||
|
||||
@Int32()
|
||||
external int speaker;
|
||||
}
|
||||
|
||||
final class SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig
|
||||
extends Struct {
|
||||
external Pointer<Utf8> model;
|
||||
}
|
||||
|
||||
final class SherpaOnnxOfflineSpeakerSegmentationModelConfig extends Struct {
|
||||
external SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig pyannote;
|
||||
|
||||
@Int32()
|
||||
external int numThreads;
|
||||
|
||||
@Int32()
|
||||
external int debug;
|
||||
|
||||
external Pointer<Utf8> provider;
|
||||
}
|
||||
|
||||
final class SherpaOnnxFastClusteringConfig extends Struct {
|
||||
@Int32()
|
||||
external int numClusters;
|
||||
|
||||
@Float()
|
||||
external double threshold;
|
||||
}
|
||||
|
||||
final class SherpaOnnxOfflineSpeakerDiarizationConfig extends Struct {
|
||||
external SherpaOnnxOfflineSpeakerSegmentationModelConfig segmentation;
|
||||
external SherpaOnnxSpeakerEmbeddingExtractorConfig embedding;
|
||||
external SherpaOnnxFastClusteringConfig clustering;
|
||||
|
||||
@Float()
|
||||
external double minDurationOn;
|
||||
|
||||
@Float()
|
||||
external double minDurationOff;
|
||||
}
|
||||
|
||||
final class SherpaOnnxOfflinePunctuationModelConfig extends Struct {
|
||||
external Pointer<Utf8> ctTransformer;
|
||||
|
||||
@@ -341,18 +401,6 @@ final class SherpaOnnxWave extends Struct {
|
||||
external int numSamples;
|
||||
}
|
||||
|
||||
final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct {
|
||||
external Pointer<Utf8> model;
|
||||
|
||||
@Int32()
|
||||
external int numThreads;
|
||||
|
||||
@Int32()
|
||||
external int debug;
|
||||
|
||||
external Pointer<Utf8> provider;
|
||||
}
|
||||
|
||||
final class SherpaOnnxKeywordSpotterConfig extends Struct {
|
||||
external SherpaOnnxFeatureConfig feat;
|
||||
|
||||
@@ -402,10 +450,101 @@ final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {}
|
||||
|
||||
final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {}
|
||||
|
||||
final class SherpaOnnxOfflineSpeakerDiarization extends Opaque {}
|
||||
|
||||
final class SherpaOnnxOfflineSpeakerDiarizationResult extends Opaque {}
|
||||
|
||||
typedef SherpaOnnxCreateOfflineSpeakerDiarizationNative
|
||||
= Pointer<SherpaOnnxOfflineSpeakerDiarization> Function(
|
||||
Pointer<SherpaOnnxOfflineSpeakerDiarizationConfig>);
|
||||
|
||||
typedef SherpaOnnxCreateOfflineSpeakerDiarization
|
||||
= SherpaOnnxCreateOfflineSpeakerDiarizationNative;
|
||||
|
||||
typedef SherpaOnnxDestroyOfflineSpeakerDiarizationNative = Void Function(
|
||||
Pointer<SherpaOnnxOfflineSpeakerDiarization>);
|
||||
|
||||
typedef SherpaOnnxDestroyOfflineSpeakerDiarization = void Function(
|
||||
Pointer<SherpaOnnxOfflineSpeakerDiarization>);
|
||||
|
||||
typedef SherpaOnnxCreateOfflinePunctuationNative
|
||||
= Pointer<SherpaOnnxOfflinePunctuation> Function(
|
||||
Pointer<SherpaOnnxOfflinePunctuationConfig>);
|
||||
|
||||
typedef SherpaOnnxOfflineSpeakerDiarizationGetSampleRateNative = Int32 Function(
|
||||
Pointer<SherpaOnnxOfflineSpeakerDiarization>);
|
||||
|
||||
typedef SherpaOnnxOfflineSpeakerDiarizationGetSampleRate = int Function(
|
||||
Pointer<SherpaOnnxOfflineSpeakerDiarization>);
|
||||
|
||||
typedef SherpaOnnxOfflineSpeakerDiarizationSetConfigNative = Void Function(
|
||||
Pointer<SherpaOnnxOfflineSpeakerDiarization>,
|
||||
Pointer<SherpaOnnxOfflineSpeakerDiarizationConfig>);
|
||||
|
||||
typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakersNative = Int32
|
||||
Function(Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);
|
||||
|
||||
typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers = int Function(
|
||||
Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);
|
||||
|
||||
typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegmentsNative = Int32
|
||||
Function(Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);
|
||||
|
||||
typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments = int Function(
|
||||
Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);
|
||||
|
||||
typedef SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative
|
||||
= Pointer<SherpaOnnxOfflineSpeakerDiarizationSegment> Function(
|
||||
Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);
|
||||
|
||||
typedef SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime
|
||||
= SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative;
|
||||
|
||||
typedef SherpaOnnxOfflineSpeakerDiarizationDestroySegmentNative = Void Function(
|
||||
Pointer<SherpaOnnxOfflineSpeakerDiarizationSegment>);
|
||||
|
||||
typedef SherpaOnnxOfflineSpeakerDiarizationDestroySegment = void Function(
|
||||
Pointer<SherpaOnnxOfflineSpeakerDiarizationSegment>);
|
||||
|
||||
typedef SherpaOnnxOfflineSpeakerDiarizationProcessNative
|
||||
= Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> Function(
|
||||
Pointer<SherpaOnnxOfflineSpeakerDiarization>, Pointer<Float>, Int32);
|
||||
|
||||
typedef SherpaOnnxOfflineSpeakerDiarizationProcess
|
||||
= Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> Function(
|
||||
Pointer<SherpaOnnxOfflineSpeakerDiarization>, Pointer<Float>, int);
|
||||
|
||||
typedef SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative = Int32
|
||||
Function(Int32, Int32);
|
||||
|
||||
typedef SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArgNative
|
||||
= Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> Function(
|
||||
Pointer<SherpaOnnxOfflineSpeakerDiarization>,
|
||||
Pointer<Float>,
|
||||
Int32,
|
||||
Pointer<
|
||||
NativeFunction<
|
||||
SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative>>);
|
||||
|
||||
typedef SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg
|
||||
= Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> Function(
|
||||
Pointer<SherpaOnnxOfflineSpeakerDiarization>,
|
||||
Pointer<Float>,
|
||||
int,
|
||||
Pointer<
|
||||
NativeFunction<
|
||||
SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative>>);
|
||||
|
||||
typedef SherpaOnnxOfflineSpeakerDiarizationDestroyResultNative = Void Function(
|
||||
Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);
|
||||
|
||||
typedef SherpaOnnxOfflineSpeakerDiarizationDestroyResult = void Function(
|
||||
Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);
|
||||
|
||||
typedef SherpaOnnxOfflineSpeakerDiarizationSetConfig = void Function(
|
||||
Pointer<SherpaOnnxOfflineSpeakerDiarization>,
|
||||
Pointer<SherpaOnnxOfflineSpeakerDiarizationConfig>);
|
||||
|
||||
typedef SherpaOnnxCreateOfflinePunctuation
|
||||
= SherpaOnnxCreateOfflinePunctuationNative;
|
||||
|
||||
@@ -940,6 +1079,29 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>);
|
||||
typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>);
|
||||
|
||||
class SherpaOnnxBindings {
|
||||
static SherpaOnnxCreateOfflineSpeakerDiarization?
|
||||
sherpaOnnxCreateOfflineSpeakerDiarization;
|
||||
static SherpaOnnxDestroyOfflineSpeakerDiarization?
|
||||
sherpaOnnxDestroyOfflineSpeakerDiarization;
|
||||
static SherpaOnnxOfflineSpeakerDiarizationGetSampleRate?
|
||||
sherpaOnnxOfflineSpeakerDiarizationGetSampleRate;
|
||||
static SherpaOnnxOfflineSpeakerDiarizationSetConfig?
|
||||
sherpaOnnxOfflineSpeakerDiarizationSetConfig;
|
||||
static SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers?
|
||||
sherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers;
|
||||
static SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments?
|
||||
sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments;
|
||||
static SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime?
|
||||
sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime;
|
||||
static SherpaOnnxOfflineSpeakerDiarizationDestroySegment?
|
||||
sherpaOnnxOfflineSpeakerDiarizationDestroySegment;
|
||||
static SherpaOnnxOfflineSpeakerDiarizationProcess?
|
||||
sherpaOnnxOfflineSpeakerDiarizationProcess;
|
||||
static SherpaOnnxOfflineSpeakerDiarizationDestroyResult?
|
||||
sherpaOnnxOfflineSpeakerDiarizationDestroyResult;
|
||||
static SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg?
|
||||
sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg;
|
||||
|
||||
static SherpaOnnxCreateOfflinePunctuation? sherpaOnnxCreateOfflinePunctuation;
|
||||
static SherpaOnnxDestroyOfflinePunctuation?
|
||||
sherpaOnnxDestroyOfflinePunctuation;
|
||||
@@ -1107,6 +1269,83 @@ class SherpaOnnxBindings {
|
||||
static SherpaOnnxFreeWave? freeWave;
|
||||
|
||||
static void init(DynamicLibrary dynamicLibrary) {
|
||||
sherpaOnnxCreateOfflineSpeakerDiarization ??= dynamicLibrary
|
||||
.lookup<
|
||||
NativeFunction<
|
||||
SherpaOnnxCreateOfflineSpeakerDiarizationNative>>(
|
||||
'SherpaOnnxCreateOfflineSpeakerDiarization')
|
||||
.asFunction();
|
||||
|
||||
sherpaOnnxDestroyOfflineSpeakerDiarization ??= dynamicLibrary
|
||||
.lookup<
|
||||
NativeFunction<
|
||||
SherpaOnnxDestroyOfflineSpeakerDiarizationNative>>(
|
||||
'SherpaOnnxDestroyOfflineSpeakerDiarization')
|
||||
.asFunction();
|
||||
|
||||
sherpaOnnxOfflineSpeakerDiarizationGetSampleRate ??= dynamicLibrary
|
||||
.lookup<
|
||||
NativeFunction<
|
||||
SherpaOnnxOfflineSpeakerDiarizationGetSampleRateNative>>(
|
||||
'SherpaOnnxOfflineSpeakerDiarizationGetSampleRate')
|
||||
.asFunction();
|
||||
|
||||
sherpaOnnxOfflineSpeakerDiarizationSetConfig ??= dynamicLibrary
|
||||
.lookup<
|
||||
NativeFunction<
|
||||
SherpaOnnxOfflineSpeakerDiarizationSetConfigNative>>(
|
||||
'SherpaOnnxOfflineSpeakerDiarizationSetConfig')
|
||||
.asFunction();
|
||||
|
||||
sherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers ??= dynamicLibrary
|
||||
.lookup<
|
||||
NativeFunction<
|
||||
SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakersNative>>(
|
||||
'SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers')
|
||||
.asFunction();
|
||||
|
||||
sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments ??= dynamicLibrary
|
||||
.lookup<
|
||||
NativeFunction<
|
||||
SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegmentsNative>>(
|
||||
'SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments')
|
||||
.asFunction();
|
||||
|
||||
sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime ??= dynamicLibrary
|
||||
.lookup<
|
||||
NativeFunction<
|
||||
SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative>>(
|
||||
'SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime')
|
||||
.asFunction();
|
||||
|
||||
sherpaOnnxOfflineSpeakerDiarizationDestroySegment ??= dynamicLibrary
|
||||
.lookup<
|
||||
NativeFunction<
|
||||
SherpaOnnxOfflineSpeakerDiarizationDestroySegmentNative>>(
|
||||
'SherpaOnnxOfflineSpeakerDiarizationDestroySegment')
|
||||
.asFunction();
|
||||
|
||||
sherpaOnnxOfflineSpeakerDiarizationProcess ??= dynamicLibrary
|
||||
.lookup<
|
||||
NativeFunction<
|
||||
SherpaOnnxOfflineSpeakerDiarizationProcessNative>>(
|
||||
'SherpaOnnxOfflineSpeakerDiarizationProcess')
|
||||
.asFunction();
|
||||
|
||||
sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg ??= dynamicLibrary
|
||||
.lookup<
|
||||
NativeFunction<
|
||||
SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArgNative>>(
|
||||
'SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg')
|
||||
.asFunction();
|
||||
|
||||
sherpaOnnxOfflineSpeakerDiarizationDestroyResult ??= dynamicLibrary
|
||||
.lookup<
|
||||
NativeFunction<
|
||||
SherpaOnnxOfflineSpeakerDiarizationDestroyResultNative>>(
|
||||
'SherpaOnnxOfflineSpeakerDiarizationDestroyResult')
|
||||
.asFunction();
|
||||
|
||||
sherpaOnnxCreateOfflinePunctuation ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxCreateOfflinePunctuationNative>>(
|
||||
'SherpaOnnxCreateOfflinePunctuation')
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
name: sherpa_onnx
|
||||
|
||||
description: >
|
||||
Speech recognition, speech synthesis, and speaker recognition using next-gen Kaldi
|
||||
with onnxruntime without Internet connection.
|
||||
Speech recognition, speech synthesis, speaker diarization, and speaker recognition
|
||||
using next-gen Kaldi with onnxruntime without Internet connection.
|
||||
|
||||
repository: https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter
|
||||
|
||||
@@ -12,7 +12,7 @@ documentation: https://k2-fsa.github.io/sherpa/onnx/
|
||||
topics:
|
||||
- speech-recognition
|
||||
- speech-synthesis
|
||||
- speaker-identification
|
||||
- speaker-diarization
|
||||
- audio-tagging
|
||||
- voice-activity-detection
|
||||
|
||||
@@ -41,7 +41,7 @@ dependencies:
|
||||
sherpa_onnx_linux: ^1.10.27
|
||||
# sherpa_onnx_linux:
|
||||
# path: ../sherpa_onnx_linux
|
||||
#
|
||||
|
||||
sherpa_onnx_windows: ^1.10.27
|
||||
# sherpa_onnx_windows:
|
||||
# path: ../sherpa_onnx_windows
|
||||
|
||||
16
scripts/dart/speaker-diarization-pubspec.yaml
Normal file
16
scripts/dart/speaker-diarization-pubspec.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
name: speaker_diarization
|
||||
description: >
|
||||
This example demonstrates how to use the Dart API for speaker diarization.
|
||||
|
||||
version: 1.0.0
|
||||
|
||||
environment:
|
||||
sdk: ">=3.0.0 <4.0.0"
|
||||
|
||||
dependencies:
|
||||
sherpa_onnx:
|
||||
path: ../../flutter/sherpa_onnx
|
||||
path: ^1.9.0
|
||||
|
||||
dev_dependencies:
|
||||
lints: ^3.0.0
|
||||
@@ -1828,4 +1828,20 @@ SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(
|
||||
return ans;
|
||||
}
|
||||
|
||||
const SherpaOnnxOfflineSpeakerDiarizationResult *
|
||||
SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg(
|
||||
const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
|
||||
int32_t n,
|
||||
SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg callback) {
|
||||
auto wrapper = [callback](int32_t num_processed_chunks,
|
||||
int32_t num_total_chunks, void *) {
|
||||
return callback(num_processed_chunks, num_total_chunks);
|
||||
};
|
||||
|
||||
auto ans = new SherpaOnnxOfflineSpeakerDiarizationResult;
|
||||
ans->impl = sd->impl->Process(samples, n, wrapper);
|
||||
|
||||
return ans;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1485,6 +1485,9 @@ SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroySegment(
|
||||
typedef int32_t (*SherpaOnnxOfflineSpeakerDiarizationProgressCallback)(
|
||||
int32_t num_processed_chunk, int32_t num_total_chunks, void *arg);
|
||||
|
||||
typedef int32_t (*SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg)(
|
||||
int32_t num_processed_chunk, int32_t num_total_chunks);
|
||||
|
||||
// The user has to invoke SherpaOnnxOfflineSpeakerDiarizationDestroyResult()
|
||||
// to free the returned pointer to avoid memory leak.
|
||||
SHERPA_ONNX_API const SherpaOnnxOfflineSpeakerDiarizationResult *
|
||||
@@ -1500,6 +1503,12 @@ SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(
|
||||
int32_t n, SherpaOnnxOfflineSpeakerDiarizationProgressCallback callback,
|
||||
void *arg);
|
||||
|
||||
SHERPA_ONNX_API const SherpaOnnxOfflineSpeakerDiarizationResult *
|
||||
SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg(
|
||||
const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
|
||||
int32_t n,
|
||||
SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg callback);
|
||||
|
||||
SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroyResult(
|
||||
const SherpaOnnxOfflineSpeakerDiarizationResult *r);
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#define SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_PYANNOTE_IMPL_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
@@ -204,7 +204,8 @@ Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_processWithCallback(
|
||||
jfloat *p = env->GetFloatArrayElements(samples, nullptr);
|
||||
jsize n = env->GetArrayLength(samples);
|
||||
auto segments =
|
||||
sd->Process(p, n, callback_wrapper, (void *)arg).SortByStartTime();
|
||||
sd->Process(p, n, callback_wrapper, reinterpret_cast<void *>(arg))
|
||||
.SortByStartTime();
|
||||
env->ReleaseFloatArrayElements(samples, p, JNI_ABORT);
|
||||
|
||||
return ProcessImpl(env, segments);
|
||||
|
||||
Reference in New Issue
Block a user