Sync cuda graph runners (#6976)

This commit is contained in:
Lianmin Zheng
2025-06-08 16:12:25 -07:00
committed by GitHub
parent 3712abfaf9
commit 0c1f03a23d
5 changed files with 56 additions and 49 deletions

View File

@@ -23,6 +23,7 @@ from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
is_in_ci,
popen_launch_server,
run_logprob_check,
)
@@ -578,6 +579,7 @@ class TestEAGLEServerTriton(TestEAGLEServer):
)
@unittest.skipIf(is_in_ci(), "To reduce the CI execution time.")
class TestEAGLEDraftExtend(CustomTestCase):
@classmethod
def setUpClass(cls):
@@ -669,6 +671,7 @@ class TestEAGLEDraftExtendFlashinfer(TestEAGLEDraftExtend):
cls.accept_len_threshold = 1.50
@unittest.skipIf(is_in_ci(), "To reduce the CI execution time.")
class TestEAGLEDraftExtendTriton(TestEAGLEDraftExtend):
@classmethod
def setUpClass(cls):
@@ -697,6 +700,7 @@ class TestEAGLEDraftExtendTriton(TestEAGLEDraftExtend):
cls.accept_len_threshold = 1.50
@unittest.skipIf(is_in_ci(), "To reduce the CI execution time.")
class TestEAGLEDraftExtendFlashinferMLA(TestEAGLEDraftExtend):
@classmethod
def setUpClass(cls):