From 84b30d9e0012e724f320dd4f23bdf2a86fbf2a4c Mon Sep 17 00:00:00 2001 From: zifeitong Date: Sun, 17 Aug 2025 16:34:19 -0700 Subject: [PATCH] Set the default attention backend for GLM-4.5v to fa3 (#9245) --- python/sglang/srt/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils.py index b31f2a5ec..0318f3bd4 100644 --- a/python/sglang/srt/utils.py +++ b/python/sglang/srt/utils.py @@ -2345,6 +2345,7 @@ def is_fa3_default_architecture(hf_config): "Qwen3ForCausalLM", "Qwen3MoeForCausalLM", "Glm4MoeForCausalLM", + "Glm4vMoeForConditionalGeneration", "Step3VLForConditionalGeneration", } return architectures[0] in default_archs