Replace torchaudio with soundfile in python-api-examples (#765)
This commit is contained in:
@@ -65,7 +65,7 @@ from typing import Dict, List, Tuple
|
||||
|
||||
import numpy as np
|
||||
import sherpa_onnx
|
||||
import torchaudio
|
||||
import soundfile as sf
|
||||
|
||||
try:
|
||||
import sounddevice as sd
|
||||
@@ -357,8 +357,14 @@ def load_speaker_file(args) -> Dict[str, List[str]]:
|
||||
|
||||
|
||||
def load_audio(filename: str) -> Tuple[np.ndarray, int]:
|
||||
samples, sample_rate = torchaudio.load(filename)
|
||||
return samples[0].contiguous().numpy(), sample_rate
|
||||
data, sample_rate = sf.read(
|
||||
filename,
|
||||
always_2d=True,
|
||||
dtype="float32",
|
||||
)
|
||||
data = data[:, 0] # use only the first channel
|
||||
samples = np.ascontiguousarray(data)
|
||||
return samples, sample_rate
|
||||
|
||||
|
||||
def compute_speaker_embedding(
|
||||
|
||||
@@ -60,7 +60,7 @@ from typing import Dict, List, Tuple
|
||||
|
||||
import numpy as np
|
||||
import sherpa_onnx
|
||||
import torchaudio
|
||||
import soundfile as sf
|
||||
|
||||
try:
|
||||
import sounddevice as sd
|
||||
@@ -160,8 +160,14 @@ def load_speaker_file(args) -> Dict[str, List[str]]:
|
||||
|
||||
|
||||
def load_audio(filename: str) -> Tuple[np.ndarray, int]:
|
||||
samples, sample_rate = torchaudio.load(filename)
|
||||
return samples[0].contiguous().numpy(), sample_rate
|
||||
data, sample_rate = sf.read(
|
||||
filename,
|
||||
always_2d=True,
|
||||
dtype="float32",
|
||||
)
|
||||
data = data[:, 0] # use only the first channel
|
||||
samples = np.ascontiguousarray(data)
|
||||
return samples, sample_rate
|
||||
|
||||
|
||||
def compute_speaker_embedding(
|
||||
|
||||
@@ -52,7 +52,7 @@ from typing import Dict, List, Tuple
|
||||
|
||||
import numpy as np
|
||||
import sherpa_onnx
|
||||
import torchaudio
|
||||
import soundfile as sf
|
||||
|
||||
try:
|
||||
import sounddevice as sd
|
||||
@@ -145,8 +145,14 @@ def load_speaker_file(args) -> Dict[str, List[str]]:
|
||||
|
||||
|
||||
def load_audio(filename: str) -> Tuple[np.ndarray, int]:
|
||||
samples, sample_rate = torchaudio.load(filename)
|
||||
return samples[0].contiguous().numpy(), sample_rate
|
||||
data, sample_rate = sf.read(
|
||||
filename,
|
||||
always_2d=True,
|
||||
dtype="float32",
|
||||
)
|
||||
data = data[:, 0] # use only the first channel
|
||||
samples = np.ascontiguousarray(data)
|
||||
return samples, sample_rate
|
||||
|
||||
|
||||
def compute_speaker_embedding(
|
||||
|
||||
Reference in New Issue
Block a user