From 0cd1996eae2499a200036f4ab1d12bf7611eca21 Mon Sep 17 00:00:00 2001 From: Xinyuan Tong <115166877+JustinTong0323@users.noreply.github.com> Date: Sun, 5 Oct 2025 21:13:17 -0700 Subject: [PATCH] feat: add shortcut detection for multimodal templates in Jinja format (#11209) --- python/sglang/srt/parser/jinja_template_utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/sglang/srt/parser/jinja_template_utils.py b/python/sglang/srt/parser/jinja_template_utils.py index be7d44097..088c3eb91 100644 --- a/python/sglang/srt/parser/jinja_template_utils.py +++ b/python/sglang/srt/parser/jinja_template_utils.py @@ -89,6 +89,12 @@ def detect_jinja_template_content_format(chat_template: str) -> str: - If template has loops like {%- for content in message['content'] -%} → 'openai' - Otherwise → 'string' """ + # Shortcut for multimodal templates + if any( + keyword in chat_template for keyword in ["image", "audio", "video", "vision"] + ): + return "openai" + jinja_ast = _try_extract_ast(chat_template) if jinja_ast is None: return "string"