[CI] improve disaggregation CI. (#11264)

Signed-off-by: Shangming Cai <csmthu@gmail.com> Co-authored-by: Shangming Cai <csmthu@gmail.com>
2025-10-08 21:40:56 +08:00
parent e3bb7f5ae6
commit c882b5ae75
9 changed files with 107 additions and 68 deletions
--- a/test/srt/hicache/test_disaggregation_hicache.py
+++ b/test/srt/hicache/test_disaggregation_hicache.py
@@ -70,11 +70,8 @@ class DisaggregationHiCacheBase(TestDisaggregationBase):
            "wait_complete",
            "--mem-fraction-static",
            "0.8",
-            "--disaggregation-ib-device",
-            "mlx5_roce0",
-            "--disaggregation-transfer-backend",
-            "mooncake",
        ]
+        prefill_args += cls.transfer_backend + cls.rdma_devices
        env = {
            **os.environ,
            "SGLANG_HICACHE_FILE_BACKEND_STORAGE_DIR": cls.temp_dir,
@@ -148,11 +145,8 @@ class TestDisaggregationPrefillWithHiCache(DisaggregationHiCacheBase):
            "0.8",
            "--base-gpu-id",
            "1",
-            "--disaggregation-ib-device",
-            "mlx5_roce0",
-            "--disaggregation-transfer-backend",
-            "mooncake",
        ]
+        decode_args += cls.transfer_backend + cls.rdma_devices
        env = {
            **os.environ,
            "SGLANG_HICACHE_FILE_BACKEND_STORAGE_DIR": cls.temp_dir,
@@ -201,10 +195,6 @@ class TestDisaggregationDecodeWithHiCache(DisaggregationHiCacheBase):
            "0.8",
            "--base-gpu-id",
            "1",
-            "--disaggregation-ib-device",
-            "mlx5_roce0",
-            "--disaggregation-transfer-backend",
-            "mooncake",
            "--disaggregation-decode-enable-offload-kvcache",
            "--hicache-ratio",
            "1.2",
@@ -215,6 +205,7 @@ class TestDisaggregationDecodeWithHiCache(DisaggregationHiCacheBase):
            "--hicache-storage-prefetch-policy",
            "wait_complete",
        ]
+        decode_args += cls.transfer_backend + cls.rdma_devices
        env = {
            **os.environ,
            "SGLANG_HICACHE_FILE_BACKEND_STORAGE_DIR": cls.temp_dir,
--- a/test/srt/hicache/test_hicache_storage_mooncake_backend.py
+++ b/test/srt/hicache/test_hicache_storage_mooncake_backend.py
@@ -15,6 +15,7 @@ import requests
 from test_hicache_storage_file_backend import HiCacheStorageBaseMixin

 from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
+from sglang.test.test_disaggregation_utils import get_rdma_devices_args
 from sglang.test.test_utils import (
    DEFAULT_MLA_MODEL_NAME_FOR_TEST,
    CustomTestCase,
@@ -192,7 +193,7 @@ class HiCacheStorageMooncakeBackendBaseMixin(HiCacheStorageBaseMixin):
        """Get additional server arguments specific to configuration - override in subclasses"""

        server_args = {
-            "--tp-size": 1,
+            "--tp-size": 2,
            "--hicache-ratio": 2,
            "--hicache-storage-backend": "mooncake",
        }
@@ -202,7 +203,7 @@ class HiCacheStorageMooncakeBackendBaseMixin(HiCacheStorageBaseMixin):
            "MOONCAKE_MASTER": f"127.0.0.1:{cls.mooncake_master_port}",
            "MOONCAKE_PROTOCOL": "rdma",
            "MC_MS_AUTO_DISC": "0",
-            "MOONCAKE_DEVICE": "mlx5_roce0,mlx5_roce1",
+            "MOONCAKE_DEVICE": get_rdma_devices_args(),
            "MOONCAKE_TE_META_DATA_SERVER": f"http://127.0.0.1:{cls.mooncake_metadata_port}/metadata",
            "MOONCAKE_GLOBAL_SEGMENT_SIZE": "4294967296",  # 4 GiB
        }
--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -134,11 +134,13 @@ suites = {
        TestFile("lora/test_lora_tp.py", 116),
        TestFile("rl/test_update_weights_from_distributed.py", 103),
        TestFile("test_data_parallelism.py", 73),
+        TestFile("test_disaggregation.py", 499),
        TestFile("test_dp_attention.py", 594),
        TestFile("test_load_weights_from_remote_instance.py", 72),
        TestFile("test_patch_torch.py", 19),
        TestFile("test_release_memory_occupation.py", 257),
        TestFile("hicache/test_hicache_storage_file_backend.py", 200),
+        TestFile("hicache/test_hicache_storage_mooncake_backend.py", 400),
        TestFile("hicache/test_hicache_storage_3fs_backend.py", 200),
    ],
    "per-commit-4-gpu": [
@@ -149,9 +151,7 @@ suites = {
        TestFile("test_multi_instance_release_memory_occupation.py", 64),
    ],
    "per-commit-8-gpu": [
-        TestFile("hicache/test_hicache_storage_mooncake_backend.py", 400),
        TestFile("lora/test_lora_llama4.py", 400),
-        TestFile("test_disaggregation.py", 499),
        TestFile("test_disaggregation_dp_attention.py", 155),
        TestFile("test_disaggregation_different_tp.py", 600),
        TestFile("test_disaggregation_pp.py", 140),
--- a/test/srt/test_disaggregation.py
+++ b/test/srt/test_disaggregation.py
@@ -40,10 +40,9 @@ class TestDisaggregationAccuracy(TestDisaggregationBase):
            "--disaggregation-mode",
            "prefill",
            "--tp",
-            "2",
-            "--disaggregation-ib-device",
-            "mlx5_roce0,mlx5_roce1",
+            "1",
        ]
+        prefill_args += cls.transfer_backend + cls.rdma_devices
        cls.process_prefill = popen_launch_pd_server(
            cls.model,
            cls.prefill_url,
@@ -58,12 +57,11 @@ class TestDisaggregationAccuracy(TestDisaggregationBase):
            "--disaggregation-mode",
            "decode",
            "--tp",
-            "2",
+            "1",
            "--base-gpu-id",
-            "2",
-            "--disaggregation-ib-device",
-            "mlx5_roce2,mlx5_roce3",
+            "1",
        ]
+        decode_args += cls.transfer_backend + cls.rdma_devices
        cls.process_decode = popen_launch_pd_server(
            cls.model,
            cls.decode_url,
@@ -171,10 +169,9 @@ class TestDisaggregationMooncakeFailure(TestDisaggregationBase):
            "--disaggregation-mode",
            "prefill",
            "--tp",
-            "2",
-            "--disaggregation-ib-device",
-            "mlx5_roce0,mlx5_roce1",
+            "1",
        ]
+        prefill_args += cls.transfer_backend + cls.rdma_devices
        cls.process_prefill = popen_launch_pd_server(
            cls.model,
            cls.prefill_url,
@@ -189,12 +186,11 @@ class TestDisaggregationMooncakeFailure(TestDisaggregationBase):
            "--disaggregation-mode",
            "decode",
            "--tp",
-            "2",
+            "1",
            "--base-gpu-id",
-            "2",
-            "--disaggregation-ib-device",
-            "mlx5_roce2,mlx5_roce3",
+            "1",
        ]
+        decode_args += cls.transfer_backend + cls.rdma_devices
        cls.process_decode = popen_launch_pd_server(
            cls.model,
            cls.decode_url,
@@ -270,10 +266,9 @@ class TestDisaggregationMooncakeSpec(TestDisaggregationBase):
            "--disaggregation-mode",
            "prefill",
            "--tp",
-            "2",
-            "--disaggregation-ib-device",
-            "mlx5_roce0,mlx5_roce1",
+            "1",
        ] + cls.spec_args
+        prefill_args += cls.transfer_backend + cls.rdma_devices
        cls.process_prefill = popen_launch_pd_server(
            cls.model,
            cls.prefill_url,
@@ -288,12 +283,11 @@ class TestDisaggregationMooncakeSpec(TestDisaggregationBase):
            "--disaggregation-mode",
            "decode",
            "--tp",
-            "2",
+            "1",
            "--base-gpu-id",
-            "2",
-            "--disaggregation-ib-device",
-            "mlx5_roce2,mlx5_roce3",
+            "1",
        ] + cls.spec_args
+        decode_args += cls.transfer_backend + cls.rdma_devices
        cls.process_decode = popen_launch_pd_server(
            cls.model,
            cls.decode_url,
@@ -346,10 +340,9 @@ class TestDisaggregationSimulatedRetract(TestDisaggregationBase):
            "--disaggregation-mode",
            "prefill",
            "--tp",
-            "2",
-            "--disaggregation-ib-device",
-            "mlx5_roce0,mlx5_roce1",
+            "1",
        ]
+        prefill_args += cls.transfer_backend + cls.rdma_devices
        cls.process_prefill = popen_launch_pd_server(
            cls.model,
            cls.prefill_url,
@@ -364,12 +357,11 @@ class TestDisaggregationSimulatedRetract(TestDisaggregationBase):
            "--disaggregation-mode",
            "decode",
            "--tp",
-            "2",
+            "1",
            "--base-gpu-id",
-            "2",
-            "--disaggregation-ib-device",
-            "mlx5_roce2,mlx5_roce3",
+            "1",
        ]
+        decode_args += cls.transfer_backend + cls.rdma_devices
        cls.process_decode = popen_launch_pd_server(
            cls.model,
            cls.decode_url,
--- a/test/srt/test_disaggregation_different_tp.py
+++ b/test/srt/test_disaggregation_different_tp.py
@@ -41,9 +41,8 @@ class TestDisaggregationMooncakePrefillLargerTP(TestDisaggregationBase):
            "prefill",
            "--tp",
            "4",
-            "--disaggregation-ib-device",
-            "mlx5_roce0,mlx5_roce1",
        ]
+        prefill_args += cls.transfer_backend + cls.rdma_devices
        cls.process_prefill = popen_launch_pd_server(
            cls.model,
            cls.prefill_url,
@@ -61,9 +60,8 @@ class TestDisaggregationMooncakePrefillLargerTP(TestDisaggregationBase):
            "2",
            "--base-gpu-id",
            "4",
-            "--disaggregation-ib-device",
-            "mlx5_roce4,mlx5_roce5",
        ]
+        decode_args += cls.transfer_backend + cls.rdma_devices
        cls.process_decode = popen_launch_pd_server(
            cls.model,
            cls.decode_url,
@@ -115,9 +113,8 @@ class TestDisaggregationMooncakeDecodeLargerTP(TestDisaggregationBase):
            "prefill",
            "--tp",
            "2",
-            "--disaggregation-ib-device",
-            "mlx5_roce0,mlx5_roce1",
        ]
+        prefill_args += cls.transfer_backend + cls.rdma_devices
        cls.process_prefill = popen_launch_pd_server(
            cls.model,
            cls.prefill_url,
@@ -135,9 +132,8 @@ class TestDisaggregationMooncakeDecodeLargerTP(TestDisaggregationBase):
            "4",
            "--base-gpu-id",
            "4",
-            "--disaggregation-ib-device",
-            "mlx5_roce4,mlx5_roce5",
        ]
+        decode_args += cls.transfer_backend + cls.rdma_devices
        cls.process_decode = popen_launch_pd_server(
            cls.model,
            cls.decode_url,
@@ -189,9 +185,8 @@ class TestDisaggregationMooncakeMHAPrefillLargerTP(TestDisaggregationBase):
            "prefill",
            "--tp",
            "4",
-            "--disaggregation-ib-device",
-            "mlx5_roce0,mlx5_roce1",
        ]
+        prefill_args += cls.transfer_backend + cls.rdma_devices
        cls.process_prefill = popen_launch_pd_server(
            cls.model,
            cls.prefill_url,
@@ -209,9 +204,8 @@ class TestDisaggregationMooncakeMHAPrefillLargerTP(TestDisaggregationBase):
            "2",
            "--base-gpu-id",
            "4",
-            "--disaggregation-ib-device",
-            "mlx5_roce4,mlx5_roce5",
        ]
+        decode_args += cls.transfer_backend + cls.rdma_devices
        cls.process_decode = popen_launch_pd_server(
            cls.model,
            cls.decode_url,
@@ -263,9 +257,8 @@ class TestDisaggregationMooncakeMHADecodeLargerTP(TestDisaggregationBase):
            "prefill",
            "--tp",
            "2",
-            "--disaggregation-ib-device",
-            "mlx5_roce0,mlx5_roce1",
        ]
+        prefill_args += cls.transfer_backend + cls.rdma_devices
        cls.process_prefill = popen_launch_pd_server(
            cls.model,
            cls.prefill_url,
@@ -283,9 +276,8 @@ class TestDisaggregationMooncakeMHADecodeLargerTP(TestDisaggregationBase):
            "4",
            "--base-gpu-id",
            "4",
-            "--disaggregation-ib-device",
-            "mlx5_roce4,mlx5_roce5",
        ]
+        decode_args += cls.transfer_backend + cls.rdma_devices
        cls.process_decode = popen_launch_pd_server(
            cls.model,
            cls.decode_url,
--- a/test/srt/test_disaggregation_dp_attention.py
+++ b/test/srt/test_disaggregation_dp_attention.py
@@ -45,9 +45,8 @@ class TestDisaggregationDPAttention(TestDisaggregationBase):
            "--dp",
            "2",
            "--enable-dp-attention",
-            "--disaggregation-ib-device",
-            "mlx5_roce0,mlx5_roce1",
        ]
+        prefill_args += cls.transfer_backend + cls.rdma_devices
        cls.process_prefill = popen_launch_pd_server(
            cls.model,
            cls.prefill_url,
@@ -68,9 +67,8 @@ class TestDisaggregationDPAttention(TestDisaggregationBase):
            "--enable-dp-attention",
            "--base-gpu-id",
            "2",
-            "--disaggregation-ib-device",
-            "mlx5_roce2,mlx5_roce3",
        ]
+        decode_args += cls.transfer_backend + cls.rdma_devices
        cls.process_decode = popen_launch_pd_server(
            cls.model,
            cls.decode_url,
--- a/test/srt/test_disaggregation_pp.py
+++ b/test/srt/test_disaggregation_pp.py
@@ -37,10 +37,9 @@ class TestDisaggregationPPAccuracy(TestDisaggregationBase):
            "2",
            "--pp-size",
            "2",
-            "--disaggregation-ib-device",
-            "mlx5_roce0,mlx5_roce1",
            "--disable-overlap-schedule",
        ]
+        prefill_args += cls.transfer_backend + cls.rdma_devices
        cls.process_prefill = popen_launch_pd_server(
            cls.model,
            cls.prefill_url,
@@ -58,9 +57,8 @@ class TestDisaggregationPPAccuracy(TestDisaggregationBase):
            "2",
            "--base-gpu-id",
            "4",
-            "--disaggregation-ib-device",
-            "mlx5_roce4,mlx5_roce5",
        ]
+        decode_args += cls.transfer_backend + cls.rdma_devices
        cls.process_decode = popen_launch_pd_server(
            cls.model,
            cls.decode_url,