Replace torchaudio with soundfile in python-api-examples (#765)
This commit is contained in:
@@ -65,7 +65,7 @@ from typing import Dict, List, Tuple
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import sherpa_onnx
|
import sherpa_onnx
|
||||||
import torchaudio
|
import soundfile as sf
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import sounddevice as sd
|
import sounddevice as sd
|
||||||
@@ -357,8 +357,14 @@ def load_speaker_file(args) -> Dict[str, List[str]]:
|
|||||||
|
|
||||||
|
|
||||||
def load_audio(filename: str) -> Tuple[np.ndarray, int]:
|
def load_audio(filename: str) -> Tuple[np.ndarray, int]:
|
||||||
samples, sample_rate = torchaudio.load(filename)
|
data, sample_rate = sf.read(
|
||||||
return samples[0].contiguous().numpy(), sample_rate
|
filename,
|
||||||
|
always_2d=True,
|
||||||
|
dtype="float32",
|
||||||
|
)
|
||||||
|
data = data[:, 0] # use only the first channel
|
||||||
|
samples = np.ascontiguousarray(data)
|
||||||
|
return samples, sample_rate
|
||||||
|
|
||||||
|
|
||||||
def compute_speaker_embedding(
|
def compute_speaker_embedding(
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ from typing import Dict, List, Tuple
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import sherpa_onnx
|
import sherpa_onnx
|
||||||
import torchaudio
|
import soundfile as sf
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import sounddevice as sd
|
import sounddevice as sd
|
||||||
@@ -160,8 +160,14 @@ def load_speaker_file(args) -> Dict[str, List[str]]:
|
|||||||
|
|
||||||
|
|
||||||
def load_audio(filename: str) -> Tuple[np.ndarray, int]:
|
def load_audio(filename: str) -> Tuple[np.ndarray, int]:
|
||||||
samples, sample_rate = torchaudio.load(filename)
|
data, sample_rate = sf.read(
|
||||||
return samples[0].contiguous().numpy(), sample_rate
|
filename,
|
||||||
|
always_2d=True,
|
||||||
|
dtype="float32",
|
||||||
|
)
|
||||||
|
data = data[:, 0] # use only the first channel
|
||||||
|
samples = np.ascontiguousarray(data)
|
||||||
|
return samples, sample_rate
|
||||||
|
|
||||||
|
|
||||||
def compute_speaker_embedding(
|
def compute_speaker_embedding(
|
||||||
|
|||||||
@@ -52,7 +52,7 @@ from typing import Dict, List, Tuple
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import sherpa_onnx
|
import sherpa_onnx
|
||||||
import torchaudio
|
import soundfile as sf
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import sounddevice as sd
|
import sounddevice as sd
|
||||||
@@ -145,8 +145,14 @@ def load_speaker_file(args) -> Dict[str, List[str]]:
|
|||||||
|
|
||||||
|
|
||||||
def load_audio(filename: str) -> Tuple[np.ndarray, int]:
|
def load_audio(filename: str) -> Tuple[np.ndarray, int]:
|
||||||
samples, sample_rate = torchaudio.load(filename)
|
data, sample_rate = sf.read(
|
||||||
return samples[0].contiguous().numpy(), sample_rate
|
filename,
|
||||||
|
always_2d=True,
|
||||||
|
dtype="float32",
|
||||||
|
)
|
||||||
|
data = data[:, 0] # use only the first channel
|
||||||
|
samples = np.ascontiguousarray(data)
|
||||||
|
return samples, sample_rate
|
||||||
|
|
||||||
|
|
||||||
def compute_speaker_embedding(
|
def compute_speaker_embedding(
|
||||||
|
|||||||
Reference in New Issue
Block a user