Fix Llama 4 with MXFP4 dynamic quant on MI35x (#9993)

2025-09-04 00:48:58 -07:00
parent b648d86216
commit 2c562fd2d0
2 changed files with 6 additions and 2 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -2336,7 +2336,8 @@ class ServerArgs:
            assert self.attention_backend in {
                "fa3",
                "aiter",
-            }, "fa3 or aiter is required for Llama4 model"
+                "triton",
+            }, "fa3, aiter, or triton is required for Llama4 model"
        elif model_arch in [
            "Gemma2ForCausalLM",
            "Gemma3ForCausalLM",