Init attention backend for Intel XPU (#10656)
Co-authored-by: guangyey <guangye.yu@intel.com> Co-authored-by: DiweiSun <105627594+DiweiSun@users.noreply.github.com>
This commit is contained in:
@@ -8,6 +8,7 @@ import unittest
|
||||
from functools import wraps
|
||||
|
||||
from sglang.test.test_utils import (
|
||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST_BASE,
|
||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN,
|
||||
CustomTestCase,
|
||||
is_in_ci,
|
||||
@@ -55,6 +56,10 @@ class TestIntelXPUBackend(CustomTestCase):
|
||||
def test_latency_qwen_model(self):
|
||||
return DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN
|
||||
|
||||
@intel_xpu_benchmark(["--attention-backend", "intel_xpu", "--page-size", "128"])
|
||||
def test_attention_backend(self):
|
||||
return DEFAULT_SMALL_MODEL_NAME_FOR_TEST_BASE
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user