[CI][XPU]enable sglang CI on Intel XPU (#9493)

Co-authored-by: huaiyuzh <huaiyu.zheng@intel.com>
Co-authored-by: Ma Mingfei <mingfei.ma@intel.com>
Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
This commit is contained in:
DiweiSun
2025-10-16 08:13:19 +08:00
committed by GitHub
parent baf277a9bf
commit 4c03dbaaef
6 changed files with 266 additions and 2 deletions

View File

@@ -17,6 +17,7 @@ from sglang.srt.utils import (
is_cuda,
is_hip,
is_npu,
is_xpu,
)
_is_cuda = is_cuda()
@@ -25,6 +26,7 @@ _use_aiter = get_bool_env_var("SGLANG_USE_AITER") and _is_hip
_is_npu = is_npu()
_is_cpu_amx_available = cpu_has_amx_support()
_is_cpu = is_cpu()
_is_xpu = is_xpu()
if _is_cuda:
from sgl_kernel import FusedSetKVBufferArg, apply_rope_with_cos_sin_cache_inplace
@@ -109,8 +111,10 @@ class RotaryEmbedding(CustomOp):
cache = cache.to(dtype)
if (
not (_is_cuda or _is_npu) or self.head_size not in [64, 128, 256, 512]
) and not (_is_cpu and _is_cpu_amx_available):
(not (_is_cuda or _is_npu) or self.head_size not in [64, 128, 256, 512])
and not (_is_cpu and _is_cpu_amx_available)
and not _is_xpu
):
from vllm._custom_ops import rotary_embedding
self.vllm_rotary_embedding = rotary_embedding
@@ -284,6 +288,16 @@ class RotaryEmbedding(CustomOp):
s += f", base={self.base}, is_neox_style={self.is_neox_style}"
return s
def forward_xpu(
self,
positions: torch.Tensor,
query: torch.Tensor,
key: torch.Tensor,
offsets: Optional[torch.Tensor] = None,
) -> Tuple[torch.Tensor, torch.Tensor]:
# TODO: make a wrapper, and XPU will implement this kernel later.
return self.forward_native(positions, query, key, offsets)
class LinearScalingRotaryEmbedding(RotaryEmbedding):
"""RotaryEmbedding extended with linear scaling.

View File

@@ -75,6 +75,11 @@ DEFAULT_MODEL_NAME_FOR_TEST_FP8_WITH_MOE = "gaunernst/DeepSeek-V2-Lite-Chat-FP8"
DEFAULT_MODEL_NAME_FOR_TEST_W8A8 = "RedHatAI/Llama-3.2-3B-quantized.w8a8"
DEFAULT_MODEL_NAME_FOR_TEST_W8A8_WITH_MOE = "nytopop/Qwen3-30B-A3B.w8a8"
# INT4 models
DEFAULT_MODEL_NAME_FOR_TEST_AWQ_INT4 = (
"hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4"
)
# EAGLE
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST = "meta-llama/Llama-2-7b-chat-hf"
DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST = "lmsys/sglang-EAGLE-llama2-chat-7B"