Dart API for speaker diarization (#1418)

2024-10-11 21:17:41 +08:00
parent 1851ff6337
commit 1ed803adc1
21 changed files with 733 additions and 17 deletions
--- a/dart-api-examples/README.md
+++ b/dart-api-examples/README.md
@@ -9,6 +9,7 @@ https://pub.dev/packages/sherpa_onnx

 | Directory | Description |
 |-----------|-------------|
+| [./speaker-diarization](./speaker-diarization)| Example for speaker diarization.|
 | [./add-punctuations](./add-punctuations)| Example for adding punctuations to text.|
 | [./audio-tagging](./audio-tagging)| Example for audio tagging.|
 | [./keyword-spotter](./keyword-spotter)| Example for keyword spotting|
--- a/dart-api-examples/speaker-diarization/.gitignore
+++ b/dart-api-examples/speaker-diarization/.gitignore
@@ -0,0 +1,3 @@
+# https://dart.dev/guides/libraries/private-files
+# Created by `dart pub`
+.dart_tool/
--- a/dart-api-examples/speaker-diarization/CHANGELOG.md
+++ b/dart-api-examples/speaker-diarization/CHANGELOG.md
@@ -0,0 +1,3 @@
+## 1.0.0
+
+- Initial version.
--- a/dart-api-examples/speaker-diarization/README.md
+++ b/dart-api-examples/speaker-diarization/README.md
@@ -0,0 +1,7 @@
+# Introduction
+
+This example shows how to use the Dart API from sherpa-onnx for speaker diarization.
+
+# Usage
+
+Please see [./run.sh](./run.sh)
--- a/dart-api-examples/speaker-diarization/analysis_options.yaml
+++ b/dart-api-examples/speaker-diarization/analysis_options.yaml
@@ -0,0 +1,30 @@
+# This file configures the static analysis results for your project (errors,
+# warnings, and lints).
+#
+# This enables the 'recommended' set of lints from `package:lints`.
+# This set helps identify many issues that may lead to problems when running
+# or consuming Dart code, and enforces writing Dart using a single, idiomatic
+# style and format.
+#
+# If you want a smaller set of lints you can change this to specify
+# 'package:lints/core.yaml'. These are just the most critical lints
+# (the recommended set includes the core lints).
+# The core lints are also what is used by pub.dev for scoring packages.
+
+include: package:lints/recommended.yaml
+
+# Uncomment the following section to specify additional rules.
+
+# linter:
+#   rules:
+#     - camel_case_types
+
+# analyzer:
+#   exclude:
+#     - path/to/excluded/files/**
+
+# For more information about the core and recommended set of lints, see
+# https://dart.dev/go/core-lints
+
+# For additional information about configuring this file, see
+# https://dart.dev/guides/language/analysis-options
--- a/dart-api-examples/speaker-diarization/bin/init.dart
+++ b/dart-api-examples/speaker-diarization/bin/init.dart
@@ -0,0 +1 @@
+../../vad/bin/init.dart
--- a/dart-api-examples/speaker-diarization/bin/speaker-diarization.dart
+++ b/dart-api-examples/speaker-diarization/bin/speaker-diarization.dart
@@ -0,0 +1,100 @@
+// Copyright (c)  2024  Xiaomi Corporation
+import 'dart:io';
+import 'dart:typed_data';
+import 'dart:ffi';
+
+import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+import './init.dart';
+
+void main(List<String> arguments) async {
+  await initSherpaOnnx();
+
+  /* Please use the following commands to download files used in this file
+    Step 1: Download a speaker segmentation model
+
+    Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+    for a list of available models. The following is an example
+
+      wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+      tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+      rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+    Step 2: Download a speaker embedding extractor model
+
+    Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
+    for a list of available models. The following is an example
+
+      wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+    Step 3. Download test wave files
+
+    Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+    for a list of available test wave files. The following is an example
+
+      wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+    Step 4. Run it
+        */
+
+  final segmentationModel =
+      "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx";
+
+  final embeddingModel =
+      "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";
+
+  final waveFilename = "./0-four-speakers-zh.wav";
+
+  final segmentationConfig = sherpa_onnx.OfflineSpeakerSegmentationModelConfig(
+    pyannote: sherpa_onnx.OfflineSpeakerSegmentationPyannoteModelConfig(
+        model: segmentationModel),
+  );
+
+  final embeddingConfig =
+      sherpa_onnx.SpeakerEmbeddingExtractorConfig(model: embeddingModel);
+
+  // since we know there are 4 speakers in ./0-four-speakers-zh.wav, we set
+  // numClusters to 4. If you don't know the exact number, please set it to -1.
+  // in that case, you have to set threshold. A larger threshold leads to
+  // fewer clusters, i.e., fewer speakers.
+  final clusteringConfig =
+      sherpa_onnx.FastClusteringConfig(numClusters: 4, threshold: 0.5);
+
+  var config = sherpa_onnx.OfflineSpeakerDiarizationConfig(
+      segmentation: segmentationConfig,
+      embedding: embeddingConfig,
+      clustering: clusteringConfig,
+      minDurationOn: 0.2,
+      minDurationOff: 0.5);
+
+  final sd = sherpa_onnx.OfflineSpeakerDiarization(config);
+  if (sd.ptr == nullptr) {
+    return;
+  }
+
+  final waveData = sherpa_onnx.readWave(waveFilename);
+  if (sd.sampleRate != waveData.sampleRate) {
+    print(
+        'Expected sample rate: ${sd.sampleRate}, given: ${waveData.sampleRate}');
+    return;
+  }
+
+  print('started');
+
+  // Use the following statement if you don't want to use a callback
+  // final segments = sd.process(samples: waveData.samples);
+
+  final segments = sd.processWithCallback(
+      samples: waveData.samples,
+      callback: (int numProcessedChunk, int numTotalChunks) {
+        final progress = 100.0 * numProcessedChunk / numTotalChunks;
+
+        print('Progress ${progress.toStringAsFixed(2)}%');
+
+        return 0;
+      });
+
+  for (int i = 0; i < segments.length; ++i) {
+    print(
+        '${segments[i].start.toStringAsFixed(3)} -- ${segments[i].end.toStringAsFixed(3)}  speaker_${segments[i].speaker}');
+  }
+}
--- a/dart-api-examples/speaker-diarization/pubspec.yaml
+++ b/dart-api-examples/speaker-diarization/pubspec.yaml
@@ -0,0 +1,17 @@
+name: speaker_diarization
+description: >
+  This example demonstrates how to use the Dart API for speaker diarization.
+
+version: 1.0.0
+
+environment:
+  sdk: ">=3.0.0 <4.0.0"
+
+dependencies:
+  sherpa_onnx: ^1.10.27
+  # sherpa_onnx:
+  #   path: ../../flutter/sherpa_onnx
+  path: ^1.9.0
+
+dev_dependencies:
+  lints: ^3.0.0
--- a/dart-api-examples/speaker-diarization/run.sh
+++ b/dart-api-examples/speaker-diarization/run.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+set -ex
+
+dart pub get
+
+if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+fi
+
+if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+fi
+
+if [ ! -f ./0-four-speakers-zh.wav ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+fi
+
+dart run ./bin/speaker-diarization.dart