39 lines
1.0 KiB
Python
Executable File
39 lines
1.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import torch
|
|
from pyannote.audio import Model
|
|
from pyannote.audio.pipelines import (
|
|
VoiceActivityDetection as VoiceActivityDetectionPipeline,
|
|
)
|
|
|
|
|
|
@torch.no_grad()
|
|
def main():
|
|
# Please download it from
|
|
# https://huggingface.co/csukuangfj/pyannote-models/tree/main/segmentation-3.0
|
|
pt_filename = "./pytorch_model.bin"
|
|
model = Model.from_pretrained(pt_filename)
|
|
model.eval()
|
|
|
|
pipeline = VoiceActivityDetectionPipeline(segmentation=model)
|
|
|
|
# https://huggingface.co/pyannote/voice-activity-detection/blob/main/config.yaml
|
|
# https://github.com/pyannote/pyannote-audio/issues/1215
|
|
initial_params = {
|
|
"min_duration_on": 0.0,
|
|
"min_duration_off": 0.0,
|
|
}
|
|
pipeline.onset = 0.5
|
|
pipeline.offset = 0.5
|
|
|
|
pipeline.instantiate(initial_params)
|
|
|
|
# wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
|
|
t = pipeline("./lei-jun-test.wav")
|
|
print(type(t))
|
|
print(t)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|