[CPU] Fix phi4-mm prompt issue in bench_serving (#9900)

2025-09-09 11:12:32 +08:00
parent ba066ca02f
commit 97fff98c68
1 changed files with 15 additions and 7 deletions
--- a/python/sglang/bench_serving.py
+++ b/python/sglang/bench_serving.py
@@ -995,17 +995,25 @@ def sample_mmmu_requests(
                prompt = f"Question: {question}\n\nAnswer: "
                if apply_chat_template:
                    try:
-                        prompt = tokenizer.apply_chat_template(
+                        is_phi4_multimodal = (
-                            [
+                            "phi-4-multimodal" in tokenizer.name_or_path.lower()
-                                {
+                        )
-                                    "role": "user",
+                        if is_phi4_multimodal:
-                                    "content": [
+                            # <|endoftext10|> is the image token used in the phi-4-multimodal model.
                            content = prompt.replace("image 1", "<|endoftext10|>")
                        else:
                            content = [
                                {
                                    "type": "image_url",
                                    "image_url": {"url": image_data},
                                },
                                {"type": "text", "text": prompt},
-                                    ],
+                            ]
                        prompt = tokenizer.apply_chat_template(
                            [
                                {
                                    "role": "user",
                                    "content": content,
                                }
                            ],
                            add_generation_prompt=True,