Fix port conflicts in CI (#11497)

This commit is contained in:
Lianmin Zheng
2025-10-12 06:46:36 -07:00
committed by GitHub
parent 88e73ed048
commit 548a57b1f3
6 changed files with 34 additions and 336 deletions

View File

@@ -79,7 +79,7 @@ suites = {
TestFile("test_gpt_oss_1gpu.py", 600),
TestFile("test_harmony_parser.py", 20),
TestFile("test_hidden_states.py", 55),
TestFile("test_hybrid_attn_backend.py", 100),
TestFile("test_hybrid_attn_backend.py", 379),
TestFile("test_input_embeddings.py", 38),
TestFile("test_io_struct.py", 8),
TestFile("test_jinja_template_utils.py", 1),

View File

@@ -22,7 +22,15 @@ class TestMLADeepseekV3ChannelInt8(CustomTestCase):
cls.base_url = DEFAULT_URL_FOR_TEST
other_args = ["--trust-remote-code"]
if torch.cuda.is_available() and torch.version.cuda:
other_args.extend(["--enable-torch-compile", "--cuda-graph-max-bs", "2"])
other_args.extend(
[
"--cuda-graph-max-bs",
"16",
"--enable-torch-compile",
"--torch-compile-max-bs",
"2",
]
)
cls.process = popen_launch_server(
cls.model,
cls.base_url,
@@ -50,6 +58,7 @@ class TestMLADeepseekV3ChannelInt8(CustomTestCase):
self.assertGreaterEqual(metrics["accuracy"], 0.61)
@unittest.skipIf(is_in_ci(), "To reduce the CI execution time.")
class TestDeepseekV3MTPChannelInt8(CustomTestCase):
@classmethod
def setUpClass(cls):
@@ -60,12 +69,10 @@ class TestDeepseekV3MTPChannelInt8(CustomTestCase):
other_args.extend(
[
"--cuda-graph-max-bs",
"2",
"--disable-radix",
"16",
"--enable-torch-compile",
"--torch-compile-max-bs",
"1",
"--speculative-algorithm",
"2" "--speculative-algorithm",
"EAGLE",
"--speculative-draft-model-path",
"sgl-project/sglang-ci-dsv3-channel-int8-test-NextN",
@@ -121,7 +128,15 @@ class TestMLADeepseekV3BlockInt8(CustomTestCase):
cls.base_url = DEFAULT_URL_FOR_TEST
other_args = ["--trust-remote-code"]
if torch.cuda.is_available() and torch.version.cuda:
other_args.extend(["--enable-torch-compile", "--cuda-graph-max-bs", "2"])
other_args.extend(
[
"--cuda-graph-max-bs",
"16",
"--enable-torch-compile",
"--torch-compile-max-bs",
"2",
]
)
cls.process = popen_launch_server(
cls.model,
cls.base_url,
@@ -159,12 +174,10 @@ class TestDeepseekV3MTPBlockInt8(CustomTestCase):
other_args.extend(
[
"--cuda-graph-max-bs",
"2",
"--disable-radix",
"16",
"--enable-torch-compile",
"--torch-compile-max-bs",
"1",
"--speculative-algorithm",
"2" "--speculative-algorithm",
"EAGLE",
"--speculative-num-steps",
"2",