[CPU] fix CPU backend sel. issue for Llama4 (#10511)
This commit is contained in:
@@ -81,6 +81,8 @@ git clone https://github.com/sgl-project/sglang.git
|
|||||||
cd sglang
|
cd sglang
|
||||||
git checkout <YOUR-DESIRED-VERSION>
|
git checkout <YOUR-DESIRED-VERSION>
|
||||||
|
|
||||||
|
# Use dedicated toml file
|
||||||
|
cp python/pyproject_other.toml python/pyproject.toml
|
||||||
# Install SGLang dependent libs, and build SGLang main package
|
# Install SGLang dependent libs, and build SGLang main package
|
||||||
pip install --upgrade pip setuptools
|
pip install --upgrade pip setuptools
|
||||||
conda install -y libsqlite==3.48.0 gperftools tbb libnuma numactl
|
conda install -y libsqlite==3.48.0 gperftools tbb libnuma numactl
|
||||||
|
|||||||
@@ -2648,7 +2648,7 @@ class ServerArgs:
|
|||||||
# use bf16 for mxfp4 triton kernels
|
# use bf16 for mxfp4 triton kernels
|
||||||
self.dtype = "bfloat16"
|
self.dtype = "bfloat16"
|
||||||
|
|
||||||
elif "Llama4" in model_arch:
|
elif "Llama4" in model_arch and self.device != "cpu":
|
||||||
assert self.attention_backend in {
|
assert self.attention_backend in {
|
||||||
"fa3",
|
"fa3",
|
||||||
"aiter",
|
"aiter",
|
||||||
|
|||||||
Reference in New Issue
Block a user