[CPU] fix CPU backend sel. issue for Llama4 (#10511)
This commit is contained in:
@@ -2648,7 +2648,7 @@ class ServerArgs:
|
||||
# use bf16 for mxfp4 triton kernels
|
||||
self.dtype = "bfloat16"
|
||||
|
||||
elif "Llama4" in model_arch:
|
||||
elif "Llama4" in model_arch and self.device != "cpu":
|
||||
assert self.attention_backend in {
|
||||
"fa3",
|
||||
"aiter",
|
||||
|
||||
Reference in New Issue
Block a user