Fix Llama 4 with MXFP4 dynamic quant on MI35x (#9993)

This commit is contained in:
Hubert Lu
2025-09-04 00:48:58 -07:00
committed by GitHub
parent b648d86216
commit 2c562fd2d0
2 changed files with 6 additions and 2 deletions

View File

@@ -2336,7 +2336,8 @@ class ServerArgs:
assert self.attention_backend in {
"fa3",
"aiter",
}, "fa3 or aiter is required for Llama4 model"
"triton",
}, "fa3, aiter, or triton is required for Llama4 model"
elif model_arch in [
"Gemma2ForCausalLM",
"Gemma3ForCausalLM",