Upgrade to vllm 0.17.0 corex v4.1 overlay

This commit is contained in:
2026-04-29 19:38:22 +08:00
parent 8fac6062e4
commit 938d0854a5
430 changed files with 35969 additions and 14511 deletions

View File

@@ -31,6 +31,7 @@ from tempfile import NamedTemporaryFile
from typing import Any, cast
import numpy as np
from huggingface_hub import snapshot_download
from PIL import Image
from typing_extensions import deprecated
@@ -60,6 +61,8 @@ except ImportError:
logger = logging.getLogger(__name__)
DEFAULT_NUM_PROMPTS = 1000
# -----------------------------------------------------------------------------
# Data Classes
# -----------------------------------------------------------------------------
@@ -303,9 +306,11 @@ def process_image(image: Any) -> Mapping[str, Any]:
a JPEG in memory. - Encodes the JPEG data as a base64 string. - Returns
a dictionary with the image as a base64 data URL.
3. String input: - Treats the string as a URL or local file path. -
Prepends "file://" if the string doesn't start with "http://" or
"file://". - Returns a dictionary with the image URL.
3. String input: - Treats the string as a URL, local file path, or base64
encoded data. - If string starts with "data:image/", treats as base64.
- If string starts with "http://", "https://", or "file://", treats as URL.
- Otherwise treats as local file path and prepends "file://".
- Returns a dictionary with the image URL or base64 data.
Raises:
ValueError: If the input is not a supported type.
@@ -325,14 +330,14 @@ def process_image(image: Any) -> Mapping[str, Any]:
if isinstance(image, str):
image_url = (
image
if image.startswith(("http://", "https://", "file://"))
if image.startswith(("http://", "https://", "file://", "data:image/"))
else f"file://{image}"
)
return {"type": "image_url", "image_url": {"url": image_url}}
raise ValueError(
f"Invalid image input {image}. Must be a PIL.Image.Image"
" or str or dictionary with raw image bytes."
f"Invalid image input {image}. Must be a PIL.Image.Image, "
"str (URL, file path, or base64 data URL), or dictionary with raw image bytes."
)
@@ -1338,7 +1343,7 @@ def add_dataset_parser(parser: FlexibleArgumentParser):
parser.add_argument(
"--num-prompts",
type=int,
default=1000,
default=DEFAULT_NUM_PROMPTS,
help="Number of prompts to process.",
)
parser.add_argument(
@@ -2676,6 +2681,14 @@ class MMVUDataset(HuggingFaceDataset):
+ (" ".join(f"{k}.{v}" for k, v in x["choices"].items())),
}
def __init__(self, **kwargs) -> None:
super().__init__(**kwargs)
self._remote_path_root = (
f"https://huggingface.co/datasets/{self.hf_name}/resolve/main"
)
self._local_path_root = snapshot_download(self.hf_name, repo_type="dataset")
def sample(
self,
tokenizer: TokenizerLike,
@@ -2698,7 +2711,9 @@ class MMVUDataset(HuggingFaceDataset):
break
prompt = parser_fn(item)
mm_content = process_video(item["video"])
mm_content = process_video(
item["video"].replace(self._remote_path_root, self._local_path_root)
)
prompt_len = len(tokenizer.encode(prompt))
if enable_multimodal_chat:
# Note: when chat is enabled the request prompt_len is no longer