[CI][XPU]enable sglang CI on Intel XPU (#9493)
Co-authored-by: huaiyuzh <huaiyu.zheng@intel.com> Co-authored-by: Ma Mingfei <mingfei.ma@intel.com> Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
This commit is contained in:
@@ -316,6 +316,13 @@ suite_xeon = {
|
||||
],
|
||||
}
|
||||
|
||||
# Add Intel XPU tests
|
||||
suite_xpu = {
|
||||
"per-commit-xpu": [
|
||||
TestFile("xpu/test_intel_xpu_backend.py"),
|
||||
],
|
||||
}
|
||||
|
||||
# Add Ascend NPU tests
|
||||
# NOTE: please sort the test cases alphabetically by the test file name
|
||||
suite_ascend = {
|
||||
@@ -341,6 +348,7 @@ suite_ascend = {
|
||||
suites.update(suite_amd)
|
||||
suites.update(suite_xeon)
|
||||
suites.update(suite_ascend)
|
||||
suites.update(suite_xpu)
|
||||
|
||||
|
||||
def auto_partition(files, rank, size):
|
||||
|
||||
60
test/srt/xpu/test_intel_xpu_backend.py
Normal file
60
test/srt/xpu/test_intel_xpu_backend.py
Normal file
@@ -0,0 +1,60 @@
|
||||
"""
|
||||
Usage:
|
||||
python3 -m unittest test_intel_xpu_backend.TestIntelXPUBackend.test_latency_qwen_model
|
||||
"""
|
||||
|
||||
import os
|
||||
import unittest
|
||||
from functools import wraps
|
||||
|
||||
from sglang.test.test_utils import (
|
||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN,
|
||||
CustomTestCase,
|
||||
is_in_ci,
|
||||
run_bench_one_batch,
|
||||
)
|
||||
|
||||
|
||||
def intel_xpu_benchmark(extra_args=None, min_throughput=None):
|
||||
def decorator(test_func):
|
||||
@wraps(test_func)
|
||||
def wrapper(self):
|
||||
common_args = [
|
||||
"--disable-radix",
|
||||
"--trust-remote-code",
|
||||
"--mem-fraction-static",
|
||||
"0.3",
|
||||
"--batch-size",
|
||||
"1",
|
||||
"--device",
|
||||
"xpu",
|
||||
]
|
||||
full_args = common_args + (extra_args or [])
|
||||
|
||||
model = test_func(self)
|
||||
prefill_latency, decode_throughput, decode_latency = run_bench_one_batch(
|
||||
model, full_args
|
||||
)
|
||||
|
||||
print(f"{model=}")
|
||||
print(f"{prefill_latency=}")
|
||||
print(f"{decode_throughput=}")
|
||||
print(f"{decode_latency=}")
|
||||
|
||||
if is_in_ci() and min_throughput is not None:
|
||||
self.assertGreater(decode_throughput, min_throughput)
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
class TestIntelXPUBackend(CustomTestCase):
|
||||
|
||||
@intel_xpu_benchmark(min_throughput=10)
|
||||
def test_latency_qwen_model(self):
|
||||
return DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user