From 84b30d9e0012e724f320dd4f23bdf2a86fbf2a4c Mon Sep 17 00:00:00 2001
From: zifeitong <zifeitong@gmail.com>
Date: Sun, 17 Aug 2025 16:34:19 -0700
Subject: [PATCH] Set the default attention backend for GLM-4.5v to fa3 (#9245)

---
 python/sglang/srt/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils.py
index b31f2a5ec..0318f3bd4 100644
--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -2345,6 +2345,7 @@ def is_fa3_default_architecture(hf_config):
         "Qwen3ForCausalLM",
         "Qwen3MoeForCausalLM",
         "Glm4MoeForCausalLM",
+        "Glm4vMoeForConditionalGeneration",
         "Step3VLForConditionalGeneration",
     }
     return architectures[0] in default_archs