Set the default attention backend for GLM-4.5v to fa3 (#9245)

2025-08-17 16:34:19 -07:00
parent ff0cf51c8e
commit 84b30d9e00
1 changed files with 1 additions and 0 deletions
--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -2345,6 +2345,7 @@ def is_fa3_default_architecture(hf_config):
        "Qwen3ForCausalLM",
        "Qwen3MoeForCausalLM",
        "Glm4MoeForCausalLM",
        "Glm4vMoeForConditionalGeneration",
        "Step3VLForConditionalGeneration",
    }
    return architectures[0] in default_archs