[CI][XPU]enable sglang CI on Intel XPU (#9493)

Co-authored-by: huaiyuzh <huaiyu.zheng@intel.com> Co-authored-by: Ma Mingfei <mingfei.ma@intel.com> Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
2025-10-16 08:13:19 +08:00
parent baf277a9bf
commit 4c03dbaaef
6 changed files with 266 additions and 2 deletions
--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -316,6 +316,13 @@ suite_xeon = {
    ],
 }

+# Add Intel XPU tests
+suite_xpu = {
+    "per-commit-xpu": [
+        TestFile("xpu/test_intel_xpu_backend.py"),
+    ],
+}
+
 # Add Ascend NPU tests
 # NOTE: please sort the test cases alphabetically by the test file name
 suite_ascend = {
@@ -341,6 +348,7 @@ suite_ascend = {
 suites.update(suite_amd)
 suites.update(suite_xeon)
 suites.update(suite_ascend)
+suites.update(suite_xpu)


 def auto_partition(files, rank, size):
--- a/test/srt/xpu/test_intel_xpu_backend.py
+++ b/test/srt/xpu/test_intel_xpu_backend.py
@@ -0,0 +1,60 @@
+"""
+Usage:
+python3 -m unittest test_intel_xpu_backend.TestIntelXPUBackend.test_latency_qwen_model
+"""
+
+import os
+import unittest
+from functools import wraps
+
+from sglang.test.test_utils import (
+    DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN,
+    CustomTestCase,
+    is_in_ci,
+    run_bench_one_batch,
+)
+
+
+def intel_xpu_benchmark(extra_args=None, min_throughput=None):
+    def decorator(test_func):
+        @wraps(test_func)
+        def wrapper(self):
+            common_args = [
+                "--disable-radix",
+                "--trust-remote-code",
+                "--mem-fraction-static",
+                "0.3",
+                "--batch-size",
+                "1",
+                "--device",
+                "xpu",
+            ]
+            full_args = common_args + (extra_args or [])
+
+            model = test_func(self)
+            prefill_latency, decode_throughput, decode_latency = run_bench_one_batch(
+                model, full_args
+            )
+
+            print(f"{model=}")
+            print(f"{prefill_latency=}")
+            print(f"{decode_throughput=}")
+            print(f"{decode_latency=}")
+
+            if is_in_ci() and min_throughput is not None:
+                self.assertGreater(decode_throughput, min_throughput)
+
+        return wrapper
+
+    return decorator
+
+
+class TestIntelXPUBackend(CustomTestCase):
+
+    @intel_xpu_benchmark(min_throughput=10)
+    def test_latency_qwen_model(self):
+        return DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN
+
+
+if __name__ == "__main__":
+    unittest.main()