fix: examples for token_in_token_out_vlm (#5193)

2025-04-11 01:38:23 -07:00
parent 4d2e305149
commit e7beff8a13
2 changed files with 16 additions and 17 deletions
--- a/examples/runtime/token_in_token_out/token_in_token_out_vlm_engine.py
+++ b/examples/runtime/token_in_token_out/token_in_token_out_vlm_engine.py
@@ -1,10 +1,7 @@
 import argparse
 import dataclasses
-from io import BytesIO
 from typing import Tuple

-import requests
-from PIL import Image
 from transformers import AutoProcessor

 from sglang import Engine
@@ -19,20 +16,22 @@ def get_input_ids(
 ) -> Tuple[list[int], list]:
    chat_template = get_chat_template_by_model_path(model_config.model_path)
    text = f"{chat_template.image_token}What is in this picture?"
-    images = [Image.open(BytesIO(requests.get(DEFAULT_IMAGE_URL).content))]
    image_data = [DEFAULT_IMAGE_URL]

    processor = AutoProcessor.from_pretrained(
        model_config.model_path, trust_remote_code=server_args.trust_remote_code
    )

-    inputs = processor(
-        text=[text],
-        images=images,
-        return_tensors="pt",
+    input_ids = (
+        processor.tokenizer(
+            text=[text],
+            return_tensors="pt",
+        )
+        .input_ids[0]
+        .tolist()
    )

-    return inputs.input_ids[0].tolist(), image_data
+    return input_ids, image_data


 def token_in_out_example(
--- a/examples/runtime/token_in_token_out/token_in_token_out_vlm_server.py
+++ b/examples/runtime/token_in_token_out/token_in_token_out_vlm_server.py
@@ -5,11 +5,9 @@ python token_in_token_out_vlm_server.py

 """

-from io import BytesIO
 from typing import Tuple

 import requests
-from PIL import Image
 from transformers import AutoProcessor

 from sglang.lang.chat_template import get_chat_template_by_model_path
@@ -28,18 +26,20 @@ MODEL_PATH = "Qwen/Qwen2-VL-2B"
 def get_input_ids() -> Tuple[list[int], list]:
    chat_template = get_chat_template_by_model_path(MODEL_PATH)
    text = f"{chat_template.image_token}What is in this picture?"
-    images = [Image.open(BytesIO(requests.get(DEFAULT_IMAGE_URL).content))]
    image_data = [DEFAULT_IMAGE_URL]

    processor = AutoProcessor.from_pretrained(MODEL_PATH)

-    inputs = processor(
-        text=[text],
-        images=images,
-        return_tensors="pt",
+    input_ids = (
+        processor.tokenizer(
+            text=[text],
+            return_tensors="pt",
+        )
+        .input_ids[0]
+        .tolist()
    )

-    return inputs.input_ids[0].tolist(), image_data
+    return input_ids, image_data


 def main():