Init attention backend for Intel XPU (#10656)

Co-authored-by: guangyey <guangye.yu@intel.com>
Co-authored-by: DiweiSun <105627594+DiweiSun@users.noreply.github.com>
This commit is contained in:
Meng, Hengyu
2025-10-21 11:41:28 +08:00
committed by GitHub
parent fb6cc7b000
commit b113c72e7a
18 changed files with 1210 additions and 26 deletions

View File

@@ -8,6 +8,7 @@ import unittest
from functools import wraps
from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST_BASE,
DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN,
CustomTestCase,
is_in_ci,
@@ -55,6 +56,10 @@ class TestIntelXPUBackend(CustomTestCase):
def test_latency_qwen_model(self):
return DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN
@intel_xpu_benchmark(["--attention-backend", "intel_xpu", "--page-size", "128"])
def test_attention_backend(self):
return DEFAULT_SMALL_MODEL_NAME_FOR_TEST_BASE
if __name__ == "__main__":
unittest.main()