Support exporting models to onnx from 3D-Speaker (#522)

This commit is contained in:
Fangjun Kuang
2024-01-10 21:09:45 +08:00
committed by GitHub
parent 55266918c8
commit 07e2b9a36d
10 changed files with 442 additions and 14 deletions

View File

@@ -124,7 +124,7 @@ def main():
# all models from wespeaker expect input samples in the range
# [-32768, 32767]
normalize_features = 0
normalize_samples = 0
meta_data = {
"framework": "wespeaker",
@@ -133,7 +133,7 @@ def main():
"comment": comment,
"sample_rate": sample_rate,
"output_dim": output_dim,
"normalize_features": normalize_features,
"normalize_samples": normalize_samples,
}
print(meta_data)
add_meta_data(filename=str(model), meta_data=meta_data)

View File

@@ -3,7 +3,7 @@
"""
This script computes speaker similarity score in the range [0-1]
of two wave files using a speaker recognition model.
of two wave files using a speaker embedding model.
"""
import argparse
import wave
@@ -54,8 +54,6 @@ def read_wavefile(filename, expected_sample_rate: int = 16000) -> np.ndarray:
"""
filename = str(filename)
with wave.open(filename) as f:
# Note: If wave_file_sample_rate is different from
# recognizer.sample_rate, we will do resampling inside sherpa-ncnn
wave_file_sample_rate = f.getframerate()
assert wave_file_sample_rate == expected_sample_rate, (
wave_file_sample_rate,
@@ -104,7 +102,7 @@ class OnnxModel:
):
session_opts = ort.SessionOptions()
session_opts.inter_op_num_threads = 1
session_opts.intra_op_num_threads = 4
session_opts.intra_op_num_threads = 1
self.session_opts = session_opts
@@ -114,7 +112,7 @@ class OnnxModel:
)
meta = self.model.get_modelmeta().custom_metadata_map
self.normalize_features = int(meta["normalize_features"])
self.normalize_samples = int(meta["normalize_samples"])
self.sample_rate = int(meta["sample_rate"])
self.output_dim = int(meta["output_dim"])
@@ -151,7 +149,7 @@ def main():
wave1 = read_wavefile(file1, model.sample_rate)
wave2 = read_wavefile(file2, model.sample_rate)
if not model.normalize_features:
if not model.normalize_samples:
wave1 = wave1 * 32768
wave2 = wave2 * 32768
@@ -161,8 +159,6 @@ def main():
output1 = model(features1)
output2 = model(features2)
print(output1.shape)
print(output2.shape)
similarity = np.dot(output1, output2) / (norm(output1) * norm(output2))
print(f"similarity in the range [0-1]: {similarity}")