JavaScript API (node-addon) for speaker diarization (#1408)
This commit is contained in:
@@ -43,6 +43,12 @@ export LD_LIBRARY_PATH=$PWD/node_modules/.pnpm/sherpa-onnx-node@<REPLACE-THIS-WI
|
||||
|
||||
The following tables list the examples in this folder.
|
||||
|
||||
## Speaker diarization
|
||||
|
||||
|File| Description|
|
||||
|---|---|
|
||||
|[./test_offline_speaker_diarization.js](./test_offline_speaker_diarization.js)| It demonstrates how to use sherpa-onnx JavaScript API for speaker diarization. It supports speaker segmentation models from [pyannote-audio](https://github.com/pyannote/pyannote-audio)|
|
||||
|
||||
## Add punctuations to text
|
||||
|
||||
|File| Description|
|
||||
@@ -130,6 +136,21 @@ The following tables list the examples in this folder.
|
||||
|[./test_tts_non_streaming_vits_zh_aishell3.js](./test_tts_non_streaming_vits_zh_aishell3.js)| Text-to-speech with a Chinese TTS model|
|
||||
|
||||
|
||||
### Speaker diarization
|
||||
|
||||
```bash
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
|
||||
|
||||
node ./test_offline_speaker_diarization.js
|
||||
```
|
||||
|
||||
### Voice Activity detection (VAD)
|
||||
|
||||
```bash
|
||||
|
||||
62
nodejs-addon-examples/test_offline_speaker_diarization.js
Normal file
62
nodejs-addon-examples/test_offline_speaker_diarization.js
Normal file
@@ -0,0 +1,62 @@
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
const sherpa_onnx = require('sherpa-onnx-node');
|
||||
|
||||
// clang-format off
|
||||
/* Please use the following commands to download files
|
||||
used in this script
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
|
||||
|
||||
*/
|
||||
// clang-format on
|
||||
|
||||
const config = {
|
||||
segmentation: {
|
||||
pyannote: {
|
||||
model: './sherpa-onnx-pyannote-segmentation-3-0/model.onnx',
|
||||
},
|
||||
},
|
||||
embedding: {
|
||||
model: './3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx',
|
||||
},
|
||||
clustering: {
|
||||
// since we know that the test wave file
|
||||
// ./0-four-speakers-zh.wav contains 4 speakers, we use 4 for numClusters
|
||||
// here. if you don't have such information, please set numClusters to -1
|
||||
numClusters: 4,
|
||||
|
||||
// If numClusters is not -1, then threshold is ignored.
|
||||
//
|
||||
// A larger threshold leads to fewer clusters, i.e., fewer speakers
|
||||
// A smaller threshold leads to more clusters, i.e., more speakers
|
||||
// You need to tune it by yourself.
|
||||
threshold: 0.5,
|
||||
},
|
||||
|
||||
// If a segment is shorter than minDurationOn, we discard it
|
||||
minDurationOn: 0.2, // in seconds
|
||||
|
||||
// If the gap between two segments is less than minDurationOff, then we
|
||||
// merge these two segments into a single one
|
||||
minDurationOff: 0.5, // in seconds
|
||||
};
|
||||
|
||||
const waveFilename = './0-four-speakers-zh.wav';
|
||||
|
||||
const sd = new sherpa_onnx.OfflineSpeakerDiarization(config);
|
||||
console.log('Started')
|
||||
|
||||
const wave = sherpa_onnx.readWave(waveFilename);
|
||||
if (sd.sampleRate != wave.sampleRate) {
|
||||
throw new Error(
|
||||
`Expected sample rate: ${sd.sampleRate}, given: ${wave.sampleRate}`);
|
||||
}
|
||||
|
||||
const segments = sd.process(wave.samples);
|
||||
console.log(segments);
|
||||
Reference in New Issue
Block a user