Export Pyannote speaker segmentation models to onnx (#1382)

This commit is contained in:
Fangjun Kuang
2024-09-29 14:23:56 +08:00
committed by GitHub
parent 11f0cb7e1c
commit bc08160820
9 changed files with 707 additions and 0 deletions

View File

@@ -0,0 +1,38 @@
#!/usr/bin/env python3
import torch
from pyannote.audio import Model
from pyannote.audio.pipelines import (
VoiceActivityDetection as VoiceActivityDetectionPipeline,
)
@torch.no_grad()
def main():
# Please download it from
# https://huggingface.co/csukuangfj/pyannote-models/tree/main/segmentation-3.0
pt_filename = "./pytorch_model.bin"
model = Model.from_pretrained(pt_filename)
model.eval()
pipeline = VoiceActivityDetectionPipeline(segmentation=model)
# https://huggingface.co/pyannote/voice-activity-detection/blob/main/config.yaml
# https://github.com/pyannote/pyannote-audio/issues/1215
initial_params = {
"min_duration_on": 0.0,
"min_duration_off": 0.0,
}
pipeline.onset = 0.5
pipeline.offset = 0.5
pipeline.instantiate(initial_params)
# wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
t = pipeline("./lei-jun-test.wav")
print(type(t))
print(t)
if __name__ == "__main__":
main()