bench: Add MMMU benchmark for vLM (#3562)

2025-02-23 00:10:59 +08:00
parent 9087694006
commit 45205d88a0
9 changed files with 1026 additions and 7 deletions
--- a/python/sglang/lang/backend/runtime_endpoint.py
+++ b/python/sglang/lang/backend/runtime_endpoint.py
@@ -336,7 +336,7 @@ class Runtime:
    """
    A wrapper for the HTTP server.
    This is used for launching the server in a python program without
-    using the commond line interface.
+    using the command line interface.

    It is mainly used for the frontend language.
    You should use the Engine class if you want to do normal offline processing without the frontend language.
--- a/python/sglang/srt/managers/image_processor.py
+++ b/python/sglang/srt/managers/image_processor.py
@@ -544,7 +544,7 @@ class Qwen2VLImageProcessor(BaseImageProcessor):
                image_hashes = [image_hash]
                image_sizes = [image_size]
                image_grid_thws = [image_grid_thw]
-        elif isinstance(image_data, str):
+        elif isinstance(image_data, str) or isinstance(image_data, bytes):
            # A single image
            pixel_values, image_hash, image_size, image_grid_thw = (
                await self._process_single_image(image_data)
@@ -553,6 +553,7 @@ class Qwen2VLImageProcessor(BaseImageProcessor):
            image_sizes = [image_size]
            image_grid_thws = [image_grid_thw]
        else:
+
            raise ValueError(f"Invalid image data: {image_data}")

        return {