[CI][XPU]enable sglang CI on Intel XPU (#9493)

Co-authored-by: huaiyuzh <huaiyu.zheng@intel.com>
Co-authored-by: Ma Mingfei <mingfei.ma@intel.com>
Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
This commit is contained in:
DiweiSun
2025-10-16 08:13:19 +08:00
committed by GitHub
parent baf277a9bf
commit 4c03dbaaef
6 changed files with 266 additions and 2 deletions

View File

@@ -316,6 +316,13 @@ suite_xeon = {
],
}
# Add Intel XPU tests
suite_xpu = {
"per-commit-xpu": [
TestFile("xpu/test_intel_xpu_backend.py"),
],
}
# Add Ascend NPU tests
# NOTE: please sort the test cases alphabetically by the test file name
suite_ascend = {
@@ -341,6 +348,7 @@ suite_ascend = {
suites.update(suite_amd)
suites.update(suite_xeon)
suites.update(suite_ascend)
suites.update(suite_xpu)
def auto_partition(files, rank, size):

View File

@@ -0,0 +1,60 @@
"""
Usage:
python3 -m unittest test_intel_xpu_backend.TestIntelXPUBackend.test_latency_qwen_model
"""
import os
import unittest
from functools import wraps
from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN,
CustomTestCase,
is_in_ci,
run_bench_one_batch,
)
def intel_xpu_benchmark(extra_args=None, min_throughput=None):
def decorator(test_func):
@wraps(test_func)
def wrapper(self):
common_args = [
"--disable-radix",
"--trust-remote-code",
"--mem-fraction-static",
"0.3",
"--batch-size",
"1",
"--device",
"xpu",
]
full_args = common_args + (extra_args or [])
model = test_func(self)
prefill_latency, decode_throughput, decode_latency = run_bench_one_batch(
model, full_args
)
print(f"{model=}")
print(f"{prefill_latency=}")
print(f"{decode_throughput=}")
print(f"{decode_latency=}")
if is_in_ci() and min_throughput is not None:
self.assertGreater(decode_throughput, min_throughput)
return wrapper
return decorator
class TestIntelXPUBackend(CustomTestCase):
@intel_xpu_benchmark(min_throughput=10)
def test_latency_qwen_model(self):
return DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN
if __name__ == "__main__":
unittest.main()