Feat: Support audio in Phi4-mm model (#8048)
This commit is contained in:
@@ -691,12 +691,17 @@ def decode_video_base64(video_base64):
|
||||
) # Return an empty array and size tuple if no frames were found
|
||||
|
||||
|
||||
def load_audio(audio_file: str, sr: int = 16000, mono: bool = True) -> np.ndarray:
|
||||
def load_audio(
|
||||
audio_file: str, sr: Optional[int] = None, mono: bool = True
|
||||
) -> np.ndarray:
|
||||
# Use soundfile here, since librosa use it under the hood,
|
||||
# and librosa will not support audio loading in the future
|
||||
import soundfile as sf
|
||||
from scipy.signal import resample
|
||||
|
||||
if sr is None:
|
||||
sr = 16000
|
||||
|
||||
# Load audio data
|
||||
if isinstance(audio_file, bytes):
|
||||
audio, original_sr = sf.read(BytesIO(audio_file))
|
||||
|
||||
Reference in New Issue
Block a user