diff --git a/python/sglang/bench_serving.py b/python/sglang/bench_serving.py index f05658052..27ff8a6da 100644 --- a/python/sglang/bench_serving.py +++ b/python/sglang/bench_serving.py @@ -995,17 +995,25 @@ def sample_mmmu_requests( prompt = f"Question: {question}\n\nAnswer: " if apply_chat_template: try: + is_phi4_multimodal = ( + "phi-4-multimodal" in tokenizer.name_or_path.lower() + ) + if is_phi4_multimodal: + # <|endoftext10|> is the image token used in the phi-4-multimodal model. + content = prompt.replace("image 1", "<|endoftext10|>") + else: + content = [ + { + "type": "image_url", + "image_url": {"url": image_data}, + }, + {"type": "text", "text": prompt}, + ] prompt = tokenizer.apply_chat_template( [ { "role": "user", - "content": [ - { - "type": "image_url", - "image_url": {"url": image_data}, - }, - {"type": "text", "text": prompt}, - ], + "content": content, } ], add_generation_prompt=True,