Add minimal vLLM 0.16.1 build repo for BI-V150

This commit is contained in:
2026-04-18 10:56:22 +08:00
commit d69657327e
1895 changed files with 615301 additions and 0 deletions

View File

@@ -0,0 +1,20 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from .audio import AudioEmbeddingMediaIO, AudioMediaIO
from .base import MediaIO, MediaWithBytes
from .connector import MEDIA_CONNECTOR_REGISTRY, MediaConnector
from .image import ImageEmbeddingMediaIO, ImageMediaIO
from .video import VIDEO_LOADER_REGISTRY, VideoMediaIO
__all__ = [
"MediaIO",
"MediaWithBytes",
"AudioEmbeddingMediaIO",
"AudioMediaIO",
"ImageEmbeddingMediaIO",
"ImageMediaIO",
"VIDEO_LOADER_REGISTRY",
"VideoMediaIO",
"MEDIA_CONNECTOR_REGISTRY",
"MediaConnector",
]

View File

@@ -0,0 +1,89 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import base64
from io import BytesIO
from pathlib import Path
import numpy.typing as npt
import pybase64
import torch
from vllm.utils.import_utils import PlaceholderModule
from vllm.utils.serial_utils import tensor2base64
from .base import MediaIO
try:
import librosa
except ImportError:
librosa = PlaceholderModule("librosa") # type: ignore[assignment]
try:
import soundfile
except ImportError:
soundfile = PlaceholderModule("soundfile") # type: ignore[assignment]
class AudioMediaIO(MediaIO[tuple[npt.NDArray, float]]):
def __init__(self, **kwargs) -> None:
super().__init__()
# `kwargs` contains custom arguments from
# --media-io-kwargs for this modality.
# They can be passed to the underlying
# media loaders (e.g. custom implementations)
# for flexible control.
self.kwargs = kwargs
def load_bytes(self, data: bytes) -> tuple[npt.NDArray, float]:
return librosa.load(BytesIO(data), sr=None)
def load_base64(
self,
media_type: str,
data: str,
) -> tuple[npt.NDArray, float]:
return self.load_bytes(base64.b64decode(data))
def load_file(self, filepath: Path) -> tuple[npt.NDArray, float]:
return librosa.load(filepath, sr=None)
def encode_base64(
self,
media: tuple[npt.NDArray, int],
*,
audio_format: str = "WAV",
) -> str:
audio, sr = media
with BytesIO() as buffer:
soundfile.write(buffer, audio, sr, format=audio_format)
data = buffer.getvalue()
return base64.b64encode(data).decode("utf-8")
class AudioEmbeddingMediaIO(MediaIO[torch.Tensor]):
def __init__(self) -> None:
super().__init__()
def load_bytes(self, data: bytes) -> torch.Tensor:
buffer = BytesIO(data)
# Enable sparse tensor integrity checks to prevent out-of-bounds
# writes from maliciously crafted tensors
with torch.sparse.check_sparse_tensor_invariants():
tensor = torch.load(buffer, weights_only=True)
return tensor.to_dense()
def load_base64(self, media_type: str, data: str) -> torch.Tensor:
return self.load_bytes(pybase64.b64decode(data, validate=True))
def load_file(self, filepath: Path) -> torch.Tensor:
# Enable sparse tensor integrity checks to prevent out-of-bounds
# writes from maliciously crafted tensors
with torch.sparse.check_sparse_tensor_invariants():
tensor = torch.load(filepath, weights_only=True)
return tensor.to_dense()
def encode_base64(self, media: torch.Tensor) -> str:
return tensor2base64(media)

View File

@@ -0,0 +1,61 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Generic, TypeVar
import numpy as np
_T = TypeVar("_T")
@dataclass
class MediaWithBytes(Generic[_T]):
"""
Wrapper that couples a media object with its original encoded bytes.
This ensures the raw bytes and media object remain synchronized,
preventing cache corruption from in-place modifications.
The wrapper delegates attribute access to the underlying media object,
making it behave transparently like the wrapped type (e.g., PIL.Image).
NOTE: Currently, this wrapper is used only for the image modality.
"""
media: _T
original_bytes: bytes = field(repr=False)
def __array__(self, *args, **kwargs) -> np.ndarray:
"""Allow np.array(obj) to return np.array(obj.media)."""
return np.array(self.media, *args, **kwargs)
def __getstate__(self):
return self.__dict__.copy()
def __setstate__(self, state: dict[str, Any]):
self.__dict__.update(state)
def __getattr__(self, name: str):
"""Delegate attribute access to the underlying media object."""
return getattr(self.media, name)
class MediaIO(ABC, Generic[_T]):
@abstractmethod
def load_bytes(self, data: bytes) -> _T:
raise NotImplementedError
@abstractmethod
def load_base64(self, media_type: str, data: str) -> _T:
"""
List of media types:
https://www.iana.org/assignments/media-types/media-types.xhtml
"""
raise NotImplementedError
@abstractmethod
def load_file(self, filepath: Path) -> _T:
raise NotImplementedError

View File

@@ -0,0 +1,343 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import asyncio
import atexit
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from typing import Any, TypeVar
from urllib.request import url2pathname
import numpy as np
import numpy.typing as npt
import torch
from PIL import Image, UnidentifiedImageError
from urllib3.util import Url, parse_url
import vllm.envs as envs
from vllm.connections import HTTPConnection, global_http_connection
from vllm.utils.registry import ExtensionManager
from .audio import AudioEmbeddingMediaIO, AudioMediaIO
from .base import MediaIO
from .image import ImageEmbeddingMediaIO, ImageMediaIO
from .video import VideoMediaIO
_M = TypeVar("_M")
global_thread_pool = ThreadPoolExecutor(
max_workers=envs.VLLM_MEDIA_LOADING_THREAD_COUNT
)
atexit.register(global_thread_pool.shutdown)
MEDIA_CONNECTOR_REGISTRY = ExtensionManager()
@MEDIA_CONNECTOR_REGISTRY.register("http")
class MediaConnector:
def __init__(
self,
media_io_kwargs: dict[str, dict[str, Any]] | None = None,
connection: HTTPConnection = global_http_connection,
*,
allowed_local_media_path: str = "",
allowed_media_domains: list[str] | None = None,
) -> None:
"""
Args:
media_io_kwargs: Additional args passed to process media
inputs, keyed by modalities. For example,
to set num_frames for video, set
`--media-io-kwargs '{"video":{"num_frames":40}}'`
connection: HTTP connection client to download media contents.
allowed_local_media_path: A local directory to load media files from.
allowed_media_domains: If set, only media URLs that belong to this
domain can be used for multi-modal inputs.
"""
super().__init__()
self.media_io_kwargs: dict[str, dict[str, Any]] = (
media_io_kwargs if media_io_kwargs else {}
)
self.connection = connection
if allowed_local_media_path:
allowed_local_media_path_ = Path(allowed_local_media_path)
if not allowed_local_media_path_.exists():
raise ValueError(
"Invalid `--allowed-local-media-path`: The path "
f"{allowed_local_media_path_} does not exist."
)
if not allowed_local_media_path_.is_dir():
raise ValueError(
"Invalid `--allowed-local-media-path`: The path "
f"{allowed_local_media_path_} must be a directory."
)
else:
allowed_local_media_path_ = None
self.allowed_local_media_path = allowed_local_media_path_
if allowed_media_domains is None:
allowed_media_domains = []
self.allowed_media_domains = allowed_media_domains
def _load_data_url(
self,
url_spec: Url,
media_io: MediaIO[_M],
) -> _M: # type: ignore[type-var]
url_spec_path = url_spec.path or ""
data_spec, data = url_spec_path.split(",", 1)
media_type, data_type = data_spec.split(";", 1)
# media_type starts with a leading "/" (e.g., "/video/jpeg")
media_type = media_type.lstrip("/")
if data_type != "base64":
msg = "Only base64 data URLs are supported for now."
raise NotImplementedError(msg)
return media_io.load_base64(media_type, data)
def _load_file_url(
self,
url_spec: Url,
media_io: MediaIO[_M],
) -> _M: # type: ignore[type-var]
allowed_local_media_path = self.allowed_local_media_path
if allowed_local_media_path is None:
raise RuntimeError(
"Cannot load local files without `--allowed-local-media-path`."
)
url_spec_path = url_spec.path or ""
url_spec_netloc = url_spec.netloc or ""
filepath = Path(url2pathname(url_spec_netloc + url_spec_path))
if allowed_local_media_path not in filepath.resolve().parents:
raise ValueError(
f"The file path {filepath} must be a subpath "
f"of `--allowed-local-media-path {allowed_local_media_path}`."
)
return media_io.load_file(filepath)
def _assert_url_in_allowed_media_domains(self, url_spec: Url) -> None:
if (
self.allowed_media_domains
and url_spec.hostname not in self.allowed_media_domains
):
raise ValueError(
f"The URL must be from one of the allowed domains: "
f"{self.allowed_media_domains}. Input URL domain: "
f"{url_spec.hostname}"
)
def load_from_url(
self,
url: str,
media_io: MediaIO[_M],
*,
fetch_timeout: int | None = None,
) -> _M: # type: ignore[type-var]
url_spec = parse_url(url)
if url_spec.scheme and url_spec.scheme.startswith("http"):
self._assert_url_in_allowed_media_domains(url_spec)
connection = self.connection
data = connection.get_bytes(
url_spec.url,
timeout=fetch_timeout,
allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS,
)
return media_io.load_bytes(data)
if url_spec.scheme == "data":
return self._load_data_url(url_spec, media_io)
if url_spec.scheme == "file":
return self._load_file_url(url_spec, media_io)
msg = "The URL must be either a HTTP, data or file URL."
raise ValueError(msg)
async def load_from_url_async(
self,
url: str,
media_io: MediaIO[_M],
*,
fetch_timeout: int | None = None,
) -> _M:
url_spec = parse_url(url)
loop = asyncio.get_running_loop()
if url_spec.scheme and url_spec.scheme.startswith("http"):
self._assert_url_in_allowed_media_domains(url_spec)
connection = self.connection
data = await connection.async_get_bytes(
url_spec.url,
timeout=fetch_timeout,
allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS,
)
future = loop.run_in_executor(global_thread_pool, media_io.load_bytes, data)
return await future
if url_spec.scheme == "data":
future = loop.run_in_executor(
global_thread_pool, self._load_data_url, url_spec, media_io
)
return await future
if url_spec.scheme == "file":
future = loop.run_in_executor(
global_thread_pool, self._load_file_url, url_spec, media_io
)
return await future
msg = "The URL must be either a HTTP, data or file URL."
raise ValueError(msg)
def fetch_audio(
self,
audio_url: str,
) -> tuple[np.ndarray, int | float]:
"""
Load audio from a URL.
"""
audio_io = AudioMediaIO(**self.media_io_kwargs.get("audio", {}))
return self.load_from_url(
audio_url,
audio_io,
fetch_timeout=envs.VLLM_AUDIO_FETCH_TIMEOUT,
)
async def fetch_audio_async(
self,
audio_url: str,
) -> tuple[np.ndarray, int | float]:
"""
Asynchronously fetch audio from a URL.
"""
audio_io = AudioMediaIO(**self.media_io_kwargs.get("audio", {}))
return await self.load_from_url_async(
audio_url,
audio_io,
fetch_timeout=envs.VLLM_AUDIO_FETCH_TIMEOUT,
)
def fetch_image(
self,
image_url: str,
*,
image_mode: str = "RGB",
) -> Image.Image:
"""
Load a PIL image from an HTTP or base64 data URL.
By default, the image is converted into RGB format.
"""
image_io = ImageMediaIO(
image_mode=image_mode, **self.media_io_kwargs.get("image", {})
)
try:
return self.load_from_url(
image_url,
image_io,
fetch_timeout=envs.VLLM_IMAGE_FETCH_TIMEOUT,
)
except UnidentifiedImageError as e:
# convert to ValueError to be properly caught upstream
raise ValueError(str(e)) from e
async def fetch_image_async(
self,
image_url: str,
*,
image_mode: str = "RGB",
) -> Image.Image:
"""
Asynchronously load a PIL image from an HTTP or base64 data URL.
By default, the image is converted into RGB format.
"""
image_io = ImageMediaIO(
image_mode=image_mode, **self.media_io_kwargs.get("image", {})
)
try:
return await self.load_from_url_async(
image_url,
image_io,
fetch_timeout=envs.VLLM_IMAGE_FETCH_TIMEOUT,
)
except UnidentifiedImageError as e:
# convert to ValueError to be properly caught upstream
raise ValueError(str(e)) from e
def fetch_video(
self,
video_url: str,
*,
image_mode: str = "RGB",
) -> tuple[npt.NDArray, dict[str, Any]]:
"""
Load video from an HTTP or base64 data URL.
"""
image_io = ImageMediaIO(
image_mode=image_mode, **self.media_io_kwargs.get("image", {})
)
video_io = VideoMediaIO(image_io, **self.media_io_kwargs.get("video", {}))
return self.load_from_url(
video_url,
video_io,
fetch_timeout=envs.VLLM_VIDEO_FETCH_TIMEOUT,
)
async def fetch_video_async(
self,
video_url: str,
*,
image_mode: str = "RGB",
) -> tuple[npt.NDArray, dict[str, Any]]:
"""
Asynchronously load video from an HTTP or base64 data URL.
By default, the image is converted into RGB format.
"""
image_io = ImageMediaIO(
image_mode=image_mode, **self.media_io_kwargs.get("image", {})
)
video_io = VideoMediaIO(image_io, **self.media_io_kwargs.get("video", {}))
return await self.load_from_url_async(
video_url,
video_io,
fetch_timeout=envs.VLLM_VIDEO_FETCH_TIMEOUT,
)
def fetch_image_embedding(
self,
data: str,
) -> torch.Tensor:
"""
Load image embedding from a URL.
"""
image_embedding_io = ImageEmbeddingMediaIO()
return image_embedding_io.load_base64("", data)
def fetch_audio_embedding(
self,
data: str,
) -> torch.Tensor:
"""
Load audio embedding from a URL.
"""
audio_embedding_io = AudioEmbeddingMediaIO()
return audio_embedding_io.load_base64("", data)

View File

@@ -0,0 +1,113 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from io import BytesIO
from pathlib import Path
import pybase64
import torch
from PIL import Image
from vllm.utils.serial_utils import tensor2base64
from ..image import convert_image_mode, rgba_to_rgb
from .base import MediaIO, MediaWithBytes
class ImageMediaIO(MediaIO[Image.Image]):
def __init__(self, image_mode: str = "RGB", **kwargs) -> None:
super().__init__()
self.image_mode = image_mode
# `kwargs` contains custom arguments from
# --media-io-kwargs for this modality.
# They can be passed to the underlying
# media loaders (e.g. custom implementations)
# for flexible control.
self.kwargs = kwargs
# Extract RGBA background color from kwargs if provided
# Default to white background for backward compatibility
rgba_bg = kwargs.get("rgba_background_color", (255, 255, 255))
# Convert list to tuple for consistency
if isinstance(rgba_bg, list):
rgba_bg = tuple(rgba_bg)
# Validate rgba_background_color format
if not (
isinstance(rgba_bg, tuple)
and len(rgba_bg) == 3
and all(isinstance(c, int) and 0 <= c <= 255 for c in rgba_bg)
):
raise ValueError(
"rgba_background_color must be a list or tuple of 3 integers "
"in the range [0, 255]."
)
self.rgba_background_color = rgba_bg
def _convert_image_mode(
self, image: Image.Image | MediaWithBytes[Image.Image]
) -> Image.Image:
"""Convert image mode with custom background color."""
if isinstance(image, MediaWithBytes):
image = image.media
if image.mode == self.image_mode:
return image
elif image.mode == "RGBA" and self.image_mode == "RGB":
return rgba_to_rgb(image, self.rgba_background_color)
else:
return convert_image_mode(image, self.image_mode)
def load_bytes(self, data: bytes) -> MediaWithBytes[Image.Image]:
image = Image.open(BytesIO(data))
return MediaWithBytes(self._convert_image_mode(image), data)
def load_base64(self, media_type: str, data: str) -> MediaWithBytes[Image.Image]:
return self.load_bytes(pybase64.b64decode(data, validate=True))
def load_file(self, filepath: Path) -> MediaWithBytes[Image.Image]:
with open(filepath, "rb") as f:
data = f.read()
image = Image.open(BytesIO(data))
return MediaWithBytes(self._convert_image_mode(image), data)
def encode_base64(
self,
media: Image.Image,
*,
image_format: str = "PNG",
) -> str:
image = media
with BytesIO() as buffer:
image = self._convert_image_mode(image)
image.save(buffer, image_format)
data = buffer.getvalue()
return pybase64.b64encode(data).decode("utf-8")
class ImageEmbeddingMediaIO(MediaIO[torch.Tensor]):
def __init__(self) -> None:
super().__init__()
def load_bytes(self, data: bytes) -> torch.Tensor:
buffer = BytesIO(data)
# Enable sparse tensor integrity checks to prevent out-of-bounds
# writes from maliciously crafted tensors
with torch.sparse.check_sparse_tensor_invariants():
tensor = torch.load(buffer, weights_only=True)
return tensor.to_dense()
def load_base64(self, media_type: str, data: str) -> torch.Tensor:
return self.load_bytes(pybase64.b64decode(data, validate=True))
def load_file(self, filepath: Path) -> torch.Tensor:
# Enable sparse tensor integrity checks to prevent out-of-bounds
# writes from maliciously crafted tensors
with torch.sparse.check_sparse_tensor_invariants():
tensor = torch.load(filepath, weights_only=True)
return tensor.to_dense()
def encode_base64(self, media: torch.Tensor) -> str:
return tensor2base64(media)

View File

@@ -0,0 +1,89 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import base64
from functools import partial
from pathlib import Path
from typing import Any
import numpy as np
import numpy.typing as npt
from PIL import Image
from vllm import envs
from ..video import VIDEO_LOADER_REGISTRY
from .base import MediaIO
from .image import ImageMediaIO
class VideoMediaIO(MediaIO[tuple[npt.NDArray, dict[str, Any]]]):
def __init__(
self,
image_io: ImageMediaIO,
num_frames: int = 32,
**kwargs,
) -> None:
super().__init__()
self.image_io = image_io
self.num_frames = num_frames
# `kwargs` contains custom arguments from
# --media-io-kwargs for this modality.
# They can be passed to the underlying
# media loaders (e.g. custom implementations)
# for flexible control.
# Allow per-request override of video backend via kwargs.
# This enables users to specify a different backend than the
# global VLLM_VIDEO_LOADER_BACKEND env var, e.g.:
# --media-io-kwargs '{"video": {"video_backend": "torchcodec"}}'
video_loader_backend = (
kwargs.pop("video_backend", None) or envs.VLLM_VIDEO_LOADER_BACKEND
)
self.kwargs = kwargs
self.video_loader = VIDEO_LOADER_REGISTRY.load(video_loader_backend)
def load_bytes(self, data: bytes) -> tuple[npt.NDArray, dict[str, Any]]:
return self.video_loader.load_bytes(
data, num_frames=self.num_frames, **self.kwargs
)
def load_base64(
self, media_type: str, data: str
) -> tuple[npt.NDArray, dict[str, Any]]:
if media_type.lower() == "video/jpeg":
load_frame = partial(
self.image_io.load_base64,
"image/jpeg",
)
return np.stack(
[np.asarray(load_frame(frame_data)) for frame_data in data.split(",")]
), {}
return self.load_bytes(base64.b64decode(data))
def load_file(self, filepath: Path) -> tuple[npt.NDArray, dict[str, Any]]:
with filepath.open("rb") as f:
data = f.read()
return self.load_bytes(data)
def encode_base64(
self,
media: npt.NDArray,
*,
video_format: str = "JPEG",
) -> str:
video = media
if video_format == "JPEG":
encode_frame = partial(
self.image_io.encode_base64,
image_format=video_format,
)
return ",".join(encode_frame(Image.fromarray(frame)) for frame in video)
msg = "Only JPEG format is supported for now."
raise NotImplementedError(msg)