[CPU] fix CPU backend sel. issue for Llama4 (#10511)

2025-09-16 17:57:45 +08:00
parent 8df7353af3
commit 925dbb3218
2 changed files with 3 additions and 1 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -2648,7 +2648,7 @@ class ServerArgs:
                # use bf16 for mxfp4 triton kernels
                self.dtype = "bfloat16"

-        elif "Llama4" in model_arch:
+        elif "Llama4" in model_arch and self.device != "cpu":
            assert self.attention_backend in {
                "fa3",
                "aiter",