Export Pyannote speaker segmentation models to onnx (#1382)
This commit is contained in:
38
scripts/pyannote/segmentation/vad-torch.py
Executable file
38
scripts/pyannote/segmentation/vad-torch.py
Executable file
@@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import torch
|
||||
from pyannote.audio import Model
|
||||
from pyannote.audio.pipelines import (
|
||||
VoiceActivityDetection as VoiceActivityDetectionPipeline,
|
||||
)
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def main():
|
||||
# Please download it from
|
||||
# https://huggingface.co/csukuangfj/pyannote-models/tree/main/segmentation-3.0
|
||||
pt_filename = "./pytorch_model.bin"
|
||||
model = Model.from_pretrained(pt_filename)
|
||||
model.eval()
|
||||
|
||||
pipeline = VoiceActivityDetectionPipeline(segmentation=model)
|
||||
|
||||
# https://huggingface.co/pyannote/voice-activity-detection/blob/main/config.yaml
|
||||
# https://github.com/pyannote/pyannote-audio/issues/1215
|
||||
initial_params = {
|
||||
"min_duration_on": 0.0,
|
||||
"min_duration_off": 0.0,
|
||||
}
|
||||
pipeline.onset = 0.5
|
||||
pipeline.offset = 0.5
|
||||
|
||||
pipeline.instantiate(initial_params)
|
||||
|
||||
# wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
|
||||
t = pipeline("./lei-jun-test.wav")
|
||||
print(type(t))
|
||||
print(t)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user