Fix port conflicts in CI (#11497)

2025-10-12 06:46:36 -07:00
parent 88e73ed048
commit 548a57b1f3
6 changed files with 34 additions and 336 deletions
--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -79,7 +79,7 @@ suites = {
        TestFile("test_gpt_oss_1gpu.py", 600),
        TestFile("test_harmony_parser.py", 20),
        TestFile("test_hidden_states.py", 55),
-        TestFile("test_hybrid_attn_backend.py", 100),
+        TestFile("test_hybrid_attn_backend.py", 379),
        TestFile("test_input_embeddings.py", 38),
        TestFile("test_io_struct.py", 8),
        TestFile("test_jinja_template_utils.py", 1),
--- a/test/srt/test_mla_int8_deepseek_v3.py
+++ b/test/srt/test_mla_int8_deepseek_v3.py
@@ -22,7 +22,15 @@ class TestMLADeepseekV3ChannelInt8(CustomTestCase):
        cls.base_url = DEFAULT_URL_FOR_TEST
        other_args = ["--trust-remote-code"]
        if torch.cuda.is_available() and torch.version.cuda:
-            other_args.extend(["--enable-torch-compile", "--cuda-graph-max-bs", "2"])
+            other_args.extend(
+                [
+                    "--cuda-graph-max-bs",
+                    "16",
+                    "--enable-torch-compile",
+                    "--torch-compile-max-bs",
+                    "2",
+                ]
+            )
        cls.process = popen_launch_server(
            cls.model,
            cls.base_url,
@@ -50,6 +58,7 @@ class TestMLADeepseekV3ChannelInt8(CustomTestCase):
        self.assertGreaterEqual(metrics["accuracy"], 0.61)


+@unittest.skipIf(is_in_ci(), "To reduce the CI execution time.")
 class TestDeepseekV3MTPChannelInt8(CustomTestCase):
    @classmethod
    def setUpClass(cls):
@@ -60,12 +69,10 @@ class TestDeepseekV3MTPChannelInt8(CustomTestCase):
            other_args.extend(
                [
                    "--cuda-graph-max-bs",
-                    "2",
-                    "--disable-radix",
+                    "16",
                    "--enable-torch-compile",
                    "--torch-compile-max-bs",
-                    "1",
-                    "--speculative-algorithm",
+                    "2" "--speculative-algorithm",
                    "EAGLE",
                    "--speculative-draft-model-path",
                    "sgl-project/sglang-ci-dsv3-channel-int8-test-NextN",
@@ -121,7 +128,15 @@ class TestMLADeepseekV3BlockInt8(CustomTestCase):
        cls.base_url = DEFAULT_URL_FOR_TEST
        other_args = ["--trust-remote-code"]
        if torch.cuda.is_available() and torch.version.cuda:
-            other_args.extend(["--enable-torch-compile", "--cuda-graph-max-bs", "2"])
+            other_args.extend(
+                [
+                    "--cuda-graph-max-bs",
+                    "16",
+                    "--enable-torch-compile",
+                    "--torch-compile-max-bs",
+                    "2",
+                ]
+            )
        cls.process = popen_launch_server(
            cls.model,
            cls.base_url,
@@ -159,12 +174,10 @@ class TestDeepseekV3MTPBlockInt8(CustomTestCase):
            other_args.extend(
                [
                    "--cuda-graph-max-bs",
-                    "2",
-                    "--disable-radix",
+                    "16",
                    "--enable-torch-compile",
                    "--torch-compile-max-bs",
-                    "1",
-                    "--speculative-algorithm",
+                    "2" "--speculative-algorithm",
                    "EAGLE",
                    "--speculative-num-steps",
                    "2",