Feat: Support audio in Phi4-mm model (#8048)

This commit is contained in:
Binyao Jiang
2025-07-18 21:03:53 -07:00
committed by GitHub
parent d918ab7985
commit b7e951a6db
11 changed files with 3333 additions and 54 deletions

View File

@@ -691,12 +691,17 @@ def decode_video_base64(video_base64):
) # Return an empty array and size tuple if no frames were found
def load_audio(audio_file: str, sr: int = 16000, mono: bool = True) -> np.ndarray:
def load_audio(
audio_file: str, sr: Optional[int] = None, mono: bool = True
) -> np.ndarray:
# Use soundfile here, since librosa use it under the hood,
# and librosa will not support audio loading in the future
import soundfile as sf
from scipy.signal import resample
if sr is None:
sr = 16000
# Load audio data
if isinstance(audio_file, bytes):
audio, original_sr = sf.read(BytesIO(audio_file))