diff --git a/benchmark/mmmu/data_utils.py b/benchmark/mmmu/data_utils.py index cf8916934..8c36768d0 100644 --- a/benchmark/mmmu/data_utils.py +++ b/benchmark/mmmu/data_utils.py @@ -75,12 +75,6 @@ CAT_SHORT2LONG = { } -# DATA SAVING -def save_json(filename, ds): - with open(filename, "w") as f: - json.dump(ds, f, indent=4) - - def get_multi_choice_info(options): """ Given the list of options for multiple choice question diff --git a/examples/runtime/multimodal/llava_onevision_server.py b/examples/runtime/multimodal/llava_onevision_server.py index ee921b558..b5636f0a1 100644 --- a/examples/runtime/multimodal/llava_onevision_server.py +++ b/examples/runtime/multimodal/llava_onevision_server.py @@ -6,7 +6,6 @@ python3 -m sglang.launch_server --model-path lmms-lab/llava-onevision-qwen2-72b- python3 llava_onevision_server.py """ -import base64 import io import os import sys @@ -14,6 +13,7 @@ import time import numpy as np import openai +import pybase64 import requests from decord import VideoReader, cpu from PIL import Image @@ -213,7 +213,7 @@ def prepare_video_messages(video_path): pil_img = Image.fromarray(frame) buff = io.BytesIO() pil_img.save(buff, format="JPEG") - base64_str = base64.b64encode(buff.getvalue()).decode("utf-8") + base64_str = pybase64.b64encode(buff.getvalue()).decode("utf-8") base64_frames.append(base64_str) messages = [{"role": "user", "content": []}] diff --git a/python/sglang/bench_serving.py b/python/sglang/bench_serving.py index 21b822f04..5e4e2c8ef 100644 --- a/python/sglang/bench_serving.py +++ b/python/sglang/bench_serving.py @@ -31,7 +31,10 @@ from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union import aiohttp import numpy as np +import pybase64 import requests +from datasets import load_dataset +from PIL import Image from tqdm.asyncio import tqdm from transformers import ( AutoProcessor, @@ -1020,14 +1023,6 @@ def sample_mmmu_requests( Returns: List of tuples (prompt, prompt_token_len, output_token_len). """ - try: - import io - - import pybase64 - from datasets import load_dataset - except ImportError: - raise ImportError("Please install datasets: pip install datasets") - print("Loading MMMU dataset from HuggingFace...") try: @@ -1396,13 +1391,6 @@ def sample_image_requests( - Text lengths follow the 'random' dataset sampling rule. ``prompt_len`` only counts text tokens and excludes image data. """ - try: - import pybase64 - from PIL import Image - except ImportError as e: - raise ImportError( - "Please install Pillow to generate random images: pip install pillow" - ) from e # Parse resolution (supports presets and 'heightxwidth') width, height = parse_image_resolution(image_resolution) diff --git a/python/sglang/srt/distributed/naive_distributed.py b/python/sglang/srt/distributed/naive_distributed.py index b340ff44d..b59380d07 100644 --- a/python/sglang/srt/distributed/naive_distributed.py +++ b/python/sglang/srt/distributed/naive_distributed.py @@ -1,9 +1,9 @@ -import base64 import pickle import time from pathlib import Path from typing import Any, List, Optional +import pybase64 import torch from sglang.srt.utils import MultiprocessingSerializer @@ -77,14 +77,16 @@ class NaiveDistributed: ) _get_path(self._rank).write_text( - base64.b64encode(pickle.dumps(obj)).decode("utf-8") + text_postfix + pybase64.b64encode(pickle.dumps(obj)).decode("utf-8") + text_postfix ) def _read_one(interesting_rank: int): p = _get_path(interesting_rank) while True: if p.exists() and (text := p.read_text()).endswith(text_postfix): - return pickle.loads(base64.b64decode(text[: -len(text_postfix)])) + return pickle.loads( + pybase64.b64decode(text[: -len(text_postfix)], validate=True) + ) time.sleep(0.001) return [ diff --git a/python/sglang/srt/utils/common.py b/python/sglang/srt/utils/common.py index 42fb93374..97415b280 100644 --- a/python/sglang/srt/utils/common.py +++ b/python/sglang/srt/utils/common.py @@ -872,9 +872,9 @@ def get_image_bytes(image_file: Union[str, bytes]): return f.read() elif image_file.startswith("data:"): image_file = image_file.split(",")[1] - return pybase64.b64decode(image_file) + return pybase64.b64decode(image_file, validate=True) elif isinstance(image_file, str): - return pybase64.b64decode(image_file) + return pybase64.b64decode(image_file, validate=True) else: raise NotImplementedError(f"Invalid image: {image_file}") @@ -911,7 +911,7 @@ def load_video(video_file: Union[str, bytes], use_gpu: bool = True): vr = VideoReader(tmp_file.name, ctx=ctx) elif video_file.startswith("data:"): _, encoded = video_file.split(",", 1) - video_bytes = pybase64.b64decode(encoded) + video_bytes = pybase64.b64decode(encoded, validate=True) tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") tmp_file.write(video_bytes) tmp_file.close() @@ -919,7 +919,7 @@ def load_video(video_file: Union[str, bytes], use_gpu: bool = True): elif os.path.isfile(video_file): vr = VideoReader(video_file, ctx=ctx) else: - video_bytes = pybase64.b64decode(video_file) + video_bytes = pybase64.b64decode(video_file, validate=True) tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") tmp_file.write(video_bytes) tmp_file.close() @@ -2083,7 +2083,7 @@ class MultiprocessingSerializer: if output_str: # Convert bytes to base64-encoded string - output = pybase64.b64encode(output).decode("utf-8") + pybase64.b64encode(output).decode("utf-8") return output diff --git a/test/srt/test_bnb.py b/test/srt/test_bnb.py index 1d9f0201d..4505b20cf 100644 --- a/test/srt/test_bnb.py +++ b/test/srt/test_bnb.py @@ -4,7 +4,6 @@ python3 -m unittest test_bnb.TestVisionModel.test_vlm python3 -m unittest test_bnb.TestLanguageModel.test_mmlu """ -import base64 import io import json import multiprocessing as mp @@ -15,6 +14,7 @@ from types import SimpleNamespace import numpy as np import openai +import pybase64 import requests from PIL import Image diff --git a/test/srt/test_vision_chunked_prefill.py b/test/srt/test_vision_chunked_prefill.py index 3876e915b..90fe21330 100644 --- a/test/srt/test_vision_chunked_prefill.py +++ b/test/srt/test_vision_chunked_prefill.py @@ -3,7 +3,6 @@ Usage: python3 -m unittest test_vision_chunked_prefill.TestVisionChunkedPrefill.test_chunked_prefill """ -import base64 import io import os import unittest @@ -11,6 +10,7 @@ from concurrent.futures import ThreadPoolExecutor from typing import Union import numpy as np +import pybase64 import requests from PIL import Image @@ -45,7 +45,7 @@ class TestVisionChunkedPrefill(CustomTestCase): pil_img = Image.fromarray(frame) buff = io.BytesIO() pil_img.save(buff, format="JPEG") - base64_str = base64.b64encode(buff.getvalue()).decode("utf-8") + base64_str = pybase64.b64encode(buff.getvalue()).decode("utf-8") base64_frames.append(base64_str) messages = [{"role": "user", "content": []}] diff --git a/test/srt/test_vision_openai_server_common.py b/test/srt/test_vision_openai_server_common.py index 66f0c0d7c..392ccf0f8 100644 --- a/test/srt/test_vision_openai_server_common.py +++ b/test/srt/test_vision_openai_server_common.py @@ -1,10 +1,10 @@ -import base64 import io import os from concurrent.futures import ThreadPoolExecutor import numpy as np import openai +import pybase64 import requests from PIL import Image @@ -386,7 +386,7 @@ class ImageOpenAITestMixin(TestOpenAIMLLMServerBase): pil_img = Image.fromarray(frame) buff = io.BytesIO() pil_img.save(buff, format="JPEG") - base64_str = base64.b64encode(buff.getvalue()).decode("utf-8") + base64_str = pybase64.b64encode(buff.getvalue()).decode("utf-8") base64_frames.append(base64_str) messages = [{"role": "user", "content": []}]