[CI] improve disaggregation CI. (#11264)
Signed-off-by: Shangming Cai <csmthu@gmail.com> Co-authored-by: Shangming Cai <csmthu@gmail.com>
This commit is contained in:
@@ -128,6 +128,10 @@ class Envs:
|
|||||||
SGLANG_SIMULATE_ACC_METHOD = EnvStr("multinomial")
|
SGLANG_SIMULATE_ACC_METHOD = EnvStr("multinomial")
|
||||||
SGLANG_TORCH_PROFILER_DIR = EnvStr("/tmp")
|
SGLANG_TORCH_PROFILER_DIR = EnvStr("/tmp")
|
||||||
|
|
||||||
|
# Test: pd-disaggregation
|
||||||
|
SGLANG_TEST_PD_DISAGG_BACKEND = EnvStr("mooncake")
|
||||||
|
SGLANG_TEST_PD_DISAGG_DEVICES = EnvStr(None)
|
||||||
|
|
||||||
# Model Parallel
|
# Model Parallel
|
||||||
SGLANG_USE_MESSAGE_QUEUE_BROADCASTER = EnvBool(True)
|
SGLANG_USE_MESSAGE_QUEUE_BROADCASTER = EnvBool(True)
|
||||||
|
|
||||||
|
|||||||
@@ -1,13 +1,17 @@
|
|||||||
|
import os
|
||||||
import time
|
import time
|
||||||
|
import warnings
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
from sglang.srt.environ import envs
|
||||||
from sglang.srt.utils import kill_process_tree
|
from sglang.srt.utils import kill_process_tree
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
CustomTestCase,
|
CustomTestCase,
|
||||||
|
is_in_ci,
|
||||||
popen_with_error_check,
|
popen_with_error_check,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -27,6 +31,24 @@ class TestDisaggregationBase(CustomTestCase):
|
|||||||
print(f"{cls.base_host=} {cls.lb_port=} {cls.prefill_port=} {cls.decode_port=}")
|
print(f"{cls.base_host=} {cls.lb_port=} {cls.prefill_port=} {cls.decode_port=}")
|
||||||
cls.process_lb, cls.process_decode, cls.process_prefill = None, None, None
|
cls.process_lb, cls.process_decode, cls.process_prefill = None, None, None
|
||||||
|
|
||||||
|
# config transfer backend and rdma devices
|
||||||
|
if is_in_ci():
|
||||||
|
cls.transfer_backend = ["--disaggregation-transfer-backend", "mooncake"]
|
||||||
|
cls.rdma_devices = ["--disaggregation-ib-device", get_rdma_devices_args()]
|
||||||
|
else:
|
||||||
|
cls.transfer_backend = [
|
||||||
|
"--disaggregation-transfer-backend",
|
||||||
|
envs.SGLANG_TEST_PD_DISAGG_BACKEND.get(),
|
||||||
|
]
|
||||||
|
cls.rdma_devices = [
|
||||||
|
"--disaggregation-ib-device",
|
||||||
|
envs.SGLANG_TEST_PD_DISAGG_DEVICES.get(),
|
||||||
|
]
|
||||||
|
if cls.rdma_devices[1] is None:
|
||||||
|
cls.rdma_devices = []
|
||||||
|
msg = "No RDMA devices specified for disaggregation test, using default settings."
|
||||||
|
warnings.warn(msg)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def launch_lb(cls):
|
def launch_lb(cls):
|
||||||
lb_command = [
|
lb_command = [
|
||||||
@@ -75,3 +97,44 @@ class TestDisaggregationBase(CustomTestCase):
|
|||||||
|
|
||||||
# wait for 5 seconds
|
# wait for 5 seconds
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
|
|
||||||
|
|
||||||
|
def get_rdma_devices_args():
|
||||||
|
# 1. Get visible GPU indices
|
||||||
|
cuda_visible_devices = os.getenv("CUDA_VISIBLE_DEVICES")
|
||||||
|
if not cuda_visible_devices:
|
||||||
|
warnings.warn("CUDA_VISIBLE_DEVICES is not set. Using default RDMA devices.")
|
||||||
|
return "mlx5_roce0,mlx5_roce4"
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Convert to list of integers (handling possible spaces and empty strings)
|
||||||
|
gpu_indices = [
|
||||||
|
int(idx.strip()) for idx in cuda_visible_devices.split(",") if idx.strip()
|
||||||
|
]
|
||||||
|
if not gpu_indices or len(gpu_indices) > 4:
|
||||||
|
return "mlx5_roce0,mlx5_roce4"
|
||||||
|
except ValueError:
|
||||||
|
warnings.warn(f"Invalid CUDA_VISIBLE_DEVICES format: {cuda_visible_devices}")
|
||||||
|
return "mlx5_roce0,mlx5_roce4"
|
||||||
|
|
||||||
|
# 2. Calculate base RDMA index group (each group of 4 GPUs uses consecutive devices)
|
||||||
|
base_rdma_group = min(gpu_indices) // 4 * 4
|
||||||
|
|
||||||
|
# 3. Generate RDMA device names
|
||||||
|
rdma_devices = []
|
||||||
|
for gpu_idx in gpu_indices:
|
||||||
|
# Validate GPU index within expected range
|
||||||
|
if gpu_idx < base_rdma_group or gpu_idx >= base_rdma_group + 4:
|
||||||
|
warnings.warn(
|
||||||
|
f"GPU index {gpu_idx} is outside expected group {base_rdma_group}-{base_rdma_group+3}"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Map GPU index to RDMA device index
|
||||||
|
rdma_index = base_rdma_group // 4 * 4 + (gpu_idx % 4)
|
||||||
|
rdma_devices.append(f"mlx5_roce{rdma_index}")
|
||||||
|
|
||||||
|
if not rdma_devices:
|
||||||
|
return "mlx5_roce0,mlx5_roce4"
|
||||||
|
|
||||||
|
return ",".join(rdma_devices)
|
||||||
|
|||||||
@@ -70,11 +70,8 @@ class DisaggregationHiCacheBase(TestDisaggregationBase):
|
|||||||
"wait_complete",
|
"wait_complete",
|
||||||
"--mem-fraction-static",
|
"--mem-fraction-static",
|
||||||
"0.8",
|
"0.8",
|
||||||
"--disaggregation-ib-device",
|
|
||||||
"mlx5_roce0",
|
|
||||||
"--disaggregation-transfer-backend",
|
|
||||||
"mooncake",
|
|
||||||
]
|
]
|
||||||
|
prefill_args += cls.transfer_backend + cls.rdma_devices
|
||||||
env = {
|
env = {
|
||||||
**os.environ,
|
**os.environ,
|
||||||
"SGLANG_HICACHE_FILE_BACKEND_STORAGE_DIR": cls.temp_dir,
|
"SGLANG_HICACHE_FILE_BACKEND_STORAGE_DIR": cls.temp_dir,
|
||||||
@@ -148,11 +145,8 @@ class TestDisaggregationPrefillWithHiCache(DisaggregationHiCacheBase):
|
|||||||
"0.8",
|
"0.8",
|
||||||
"--base-gpu-id",
|
"--base-gpu-id",
|
||||||
"1",
|
"1",
|
||||||
"--disaggregation-ib-device",
|
|
||||||
"mlx5_roce0",
|
|
||||||
"--disaggregation-transfer-backend",
|
|
||||||
"mooncake",
|
|
||||||
]
|
]
|
||||||
|
decode_args += cls.transfer_backend + cls.rdma_devices
|
||||||
env = {
|
env = {
|
||||||
**os.environ,
|
**os.environ,
|
||||||
"SGLANG_HICACHE_FILE_BACKEND_STORAGE_DIR": cls.temp_dir,
|
"SGLANG_HICACHE_FILE_BACKEND_STORAGE_DIR": cls.temp_dir,
|
||||||
@@ -201,10 +195,6 @@ class TestDisaggregationDecodeWithHiCache(DisaggregationHiCacheBase):
|
|||||||
"0.8",
|
"0.8",
|
||||||
"--base-gpu-id",
|
"--base-gpu-id",
|
||||||
"1",
|
"1",
|
||||||
"--disaggregation-ib-device",
|
|
||||||
"mlx5_roce0",
|
|
||||||
"--disaggregation-transfer-backend",
|
|
||||||
"mooncake",
|
|
||||||
"--disaggregation-decode-enable-offload-kvcache",
|
"--disaggregation-decode-enable-offload-kvcache",
|
||||||
"--hicache-ratio",
|
"--hicache-ratio",
|
||||||
"1.2",
|
"1.2",
|
||||||
@@ -215,6 +205,7 @@ class TestDisaggregationDecodeWithHiCache(DisaggregationHiCacheBase):
|
|||||||
"--hicache-storage-prefetch-policy",
|
"--hicache-storage-prefetch-policy",
|
||||||
"wait_complete",
|
"wait_complete",
|
||||||
]
|
]
|
||||||
|
decode_args += cls.transfer_backend + cls.rdma_devices
|
||||||
env = {
|
env = {
|
||||||
**os.environ,
|
**os.environ,
|
||||||
"SGLANG_HICACHE_FILE_BACKEND_STORAGE_DIR": cls.temp_dir,
|
"SGLANG_HICACHE_FILE_BACKEND_STORAGE_DIR": cls.temp_dir,
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ import requests
|
|||||||
from test_hicache_storage_file_backend import HiCacheStorageBaseMixin
|
from test_hicache_storage_file_backend import HiCacheStorageBaseMixin
|
||||||
|
|
||||||
from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
|
from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
|
||||||
|
from sglang.test.test_disaggregation_utils import get_rdma_devices_args
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_MLA_MODEL_NAME_FOR_TEST,
|
DEFAULT_MLA_MODEL_NAME_FOR_TEST,
|
||||||
CustomTestCase,
|
CustomTestCase,
|
||||||
@@ -192,7 +193,7 @@ class HiCacheStorageMooncakeBackendBaseMixin(HiCacheStorageBaseMixin):
|
|||||||
"""Get additional server arguments specific to configuration - override in subclasses"""
|
"""Get additional server arguments specific to configuration - override in subclasses"""
|
||||||
|
|
||||||
server_args = {
|
server_args = {
|
||||||
"--tp-size": 1,
|
"--tp-size": 2,
|
||||||
"--hicache-ratio": 2,
|
"--hicache-ratio": 2,
|
||||||
"--hicache-storage-backend": "mooncake",
|
"--hicache-storage-backend": "mooncake",
|
||||||
}
|
}
|
||||||
@@ -202,7 +203,7 @@ class HiCacheStorageMooncakeBackendBaseMixin(HiCacheStorageBaseMixin):
|
|||||||
"MOONCAKE_MASTER": f"127.0.0.1:{cls.mooncake_master_port}",
|
"MOONCAKE_MASTER": f"127.0.0.1:{cls.mooncake_master_port}",
|
||||||
"MOONCAKE_PROTOCOL": "rdma",
|
"MOONCAKE_PROTOCOL": "rdma",
|
||||||
"MC_MS_AUTO_DISC": "0",
|
"MC_MS_AUTO_DISC": "0",
|
||||||
"MOONCAKE_DEVICE": "mlx5_roce0,mlx5_roce1",
|
"MOONCAKE_DEVICE": get_rdma_devices_args(),
|
||||||
"MOONCAKE_TE_META_DATA_SERVER": f"http://127.0.0.1:{cls.mooncake_metadata_port}/metadata",
|
"MOONCAKE_TE_META_DATA_SERVER": f"http://127.0.0.1:{cls.mooncake_metadata_port}/metadata",
|
||||||
"MOONCAKE_GLOBAL_SEGMENT_SIZE": "4294967296", # 4 GiB
|
"MOONCAKE_GLOBAL_SEGMENT_SIZE": "4294967296", # 4 GiB
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -134,11 +134,13 @@ suites = {
|
|||||||
TestFile("lora/test_lora_tp.py", 116),
|
TestFile("lora/test_lora_tp.py", 116),
|
||||||
TestFile("rl/test_update_weights_from_distributed.py", 103),
|
TestFile("rl/test_update_weights_from_distributed.py", 103),
|
||||||
TestFile("test_data_parallelism.py", 73),
|
TestFile("test_data_parallelism.py", 73),
|
||||||
|
TestFile("test_disaggregation.py", 499),
|
||||||
TestFile("test_dp_attention.py", 594),
|
TestFile("test_dp_attention.py", 594),
|
||||||
TestFile("test_load_weights_from_remote_instance.py", 72),
|
TestFile("test_load_weights_from_remote_instance.py", 72),
|
||||||
TestFile("test_patch_torch.py", 19),
|
TestFile("test_patch_torch.py", 19),
|
||||||
TestFile("test_release_memory_occupation.py", 257),
|
TestFile("test_release_memory_occupation.py", 257),
|
||||||
TestFile("hicache/test_hicache_storage_file_backend.py", 200),
|
TestFile("hicache/test_hicache_storage_file_backend.py", 200),
|
||||||
|
TestFile("hicache/test_hicache_storage_mooncake_backend.py", 400),
|
||||||
TestFile("hicache/test_hicache_storage_3fs_backend.py", 200),
|
TestFile("hicache/test_hicache_storage_3fs_backend.py", 200),
|
||||||
],
|
],
|
||||||
"per-commit-4-gpu": [
|
"per-commit-4-gpu": [
|
||||||
@@ -149,9 +151,7 @@ suites = {
|
|||||||
TestFile("test_multi_instance_release_memory_occupation.py", 64),
|
TestFile("test_multi_instance_release_memory_occupation.py", 64),
|
||||||
],
|
],
|
||||||
"per-commit-8-gpu": [
|
"per-commit-8-gpu": [
|
||||||
TestFile("hicache/test_hicache_storage_mooncake_backend.py", 400),
|
|
||||||
TestFile("lora/test_lora_llama4.py", 400),
|
TestFile("lora/test_lora_llama4.py", 400),
|
||||||
TestFile("test_disaggregation.py", 499),
|
|
||||||
TestFile("test_disaggregation_dp_attention.py", 155),
|
TestFile("test_disaggregation_dp_attention.py", 155),
|
||||||
TestFile("test_disaggregation_different_tp.py", 600),
|
TestFile("test_disaggregation_different_tp.py", 600),
|
||||||
TestFile("test_disaggregation_pp.py", 140),
|
TestFile("test_disaggregation_pp.py", 140),
|
||||||
|
|||||||
@@ -40,10 +40,9 @@ class TestDisaggregationAccuracy(TestDisaggregationBase):
|
|||||||
"--disaggregation-mode",
|
"--disaggregation-mode",
|
||||||
"prefill",
|
"prefill",
|
||||||
"--tp",
|
"--tp",
|
||||||
"2",
|
"1",
|
||||||
"--disaggregation-ib-device",
|
|
||||||
"mlx5_roce0,mlx5_roce1",
|
|
||||||
]
|
]
|
||||||
|
prefill_args += cls.transfer_backend + cls.rdma_devices
|
||||||
cls.process_prefill = popen_launch_pd_server(
|
cls.process_prefill = popen_launch_pd_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.prefill_url,
|
cls.prefill_url,
|
||||||
@@ -58,12 +57,11 @@ class TestDisaggregationAccuracy(TestDisaggregationBase):
|
|||||||
"--disaggregation-mode",
|
"--disaggregation-mode",
|
||||||
"decode",
|
"decode",
|
||||||
"--tp",
|
"--tp",
|
||||||
"2",
|
"1",
|
||||||
"--base-gpu-id",
|
"--base-gpu-id",
|
||||||
"2",
|
"1",
|
||||||
"--disaggregation-ib-device",
|
|
||||||
"mlx5_roce2,mlx5_roce3",
|
|
||||||
]
|
]
|
||||||
|
decode_args += cls.transfer_backend + cls.rdma_devices
|
||||||
cls.process_decode = popen_launch_pd_server(
|
cls.process_decode = popen_launch_pd_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.decode_url,
|
cls.decode_url,
|
||||||
@@ -171,10 +169,9 @@ class TestDisaggregationMooncakeFailure(TestDisaggregationBase):
|
|||||||
"--disaggregation-mode",
|
"--disaggregation-mode",
|
||||||
"prefill",
|
"prefill",
|
||||||
"--tp",
|
"--tp",
|
||||||
"2",
|
"1",
|
||||||
"--disaggregation-ib-device",
|
|
||||||
"mlx5_roce0,mlx5_roce1",
|
|
||||||
]
|
]
|
||||||
|
prefill_args += cls.transfer_backend + cls.rdma_devices
|
||||||
cls.process_prefill = popen_launch_pd_server(
|
cls.process_prefill = popen_launch_pd_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.prefill_url,
|
cls.prefill_url,
|
||||||
@@ -189,12 +186,11 @@ class TestDisaggregationMooncakeFailure(TestDisaggregationBase):
|
|||||||
"--disaggregation-mode",
|
"--disaggregation-mode",
|
||||||
"decode",
|
"decode",
|
||||||
"--tp",
|
"--tp",
|
||||||
"2",
|
"1",
|
||||||
"--base-gpu-id",
|
"--base-gpu-id",
|
||||||
"2",
|
"1",
|
||||||
"--disaggregation-ib-device",
|
|
||||||
"mlx5_roce2,mlx5_roce3",
|
|
||||||
]
|
]
|
||||||
|
decode_args += cls.transfer_backend + cls.rdma_devices
|
||||||
cls.process_decode = popen_launch_pd_server(
|
cls.process_decode = popen_launch_pd_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.decode_url,
|
cls.decode_url,
|
||||||
@@ -270,10 +266,9 @@ class TestDisaggregationMooncakeSpec(TestDisaggregationBase):
|
|||||||
"--disaggregation-mode",
|
"--disaggregation-mode",
|
||||||
"prefill",
|
"prefill",
|
||||||
"--tp",
|
"--tp",
|
||||||
"2",
|
"1",
|
||||||
"--disaggregation-ib-device",
|
|
||||||
"mlx5_roce0,mlx5_roce1",
|
|
||||||
] + cls.spec_args
|
] + cls.spec_args
|
||||||
|
prefill_args += cls.transfer_backend + cls.rdma_devices
|
||||||
cls.process_prefill = popen_launch_pd_server(
|
cls.process_prefill = popen_launch_pd_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.prefill_url,
|
cls.prefill_url,
|
||||||
@@ -288,12 +283,11 @@ class TestDisaggregationMooncakeSpec(TestDisaggregationBase):
|
|||||||
"--disaggregation-mode",
|
"--disaggregation-mode",
|
||||||
"decode",
|
"decode",
|
||||||
"--tp",
|
"--tp",
|
||||||
"2",
|
"1",
|
||||||
"--base-gpu-id",
|
"--base-gpu-id",
|
||||||
"2",
|
"1",
|
||||||
"--disaggregation-ib-device",
|
|
||||||
"mlx5_roce2,mlx5_roce3",
|
|
||||||
] + cls.spec_args
|
] + cls.spec_args
|
||||||
|
decode_args += cls.transfer_backend + cls.rdma_devices
|
||||||
cls.process_decode = popen_launch_pd_server(
|
cls.process_decode = popen_launch_pd_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.decode_url,
|
cls.decode_url,
|
||||||
@@ -346,10 +340,9 @@ class TestDisaggregationSimulatedRetract(TestDisaggregationBase):
|
|||||||
"--disaggregation-mode",
|
"--disaggregation-mode",
|
||||||
"prefill",
|
"prefill",
|
||||||
"--tp",
|
"--tp",
|
||||||
"2",
|
"1",
|
||||||
"--disaggregation-ib-device",
|
|
||||||
"mlx5_roce0,mlx5_roce1",
|
|
||||||
]
|
]
|
||||||
|
prefill_args += cls.transfer_backend + cls.rdma_devices
|
||||||
cls.process_prefill = popen_launch_pd_server(
|
cls.process_prefill = popen_launch_pd_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.prefill_url,
|
cls.prefill_url,
|
||||||
@@ -364,12 +357,11 @@ class TestDisaggregationSimulatedRetract(TestDisaggregationBase):
|
|||||||
"--disaggregation-mode",
|
"--disaggregation-mode",
|
||||||
"decode",
|
"decode",
|
||||||
"--tp",
|
"--tp",
|
||||||
"2",
|
"1",
|
||||||
"--base-gpu-id",
|
"--base-gpu-id",
|
||||||
"2",
|
"1",
|
||||||
"--disaggregation-ib-device",
|
|
||||||
"mlx5_roce2,mlx5_roce3",
|
|
||||||
]
|
]
|
||||||
|
decode_args += cls.transfer_backend + cls.rdma_devices
|
||||||
cls.process_decode = popen_launch_pd_server(
|
cls.process_decode = popen_launch_pd_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.decode_url,
|
cls.decode_url,
|
||||||
|
|||||||
@@ -41,9 +41,8 @@ class TestDisaggregationMooncakePrefillLargerTP(TestDisaggregationBase):
|
|||||||
"prefill",
|
"prefill",
|
||||||
"--tp",
|
"--tp",
|
||||||
"4",
|
"4",
|
||||||
"--disaggregation-ib-device",
|
|
||||||
"mlx5_roce0,mlx5_roce1",
|
|
||||||
]
|
]
|
||||||
|
prefill_args += cls.transfer_backend + cls.rdma_devices
|
||||||
cls.process_prefill = popen_launch_pd_server(
|
cls.process_prefill = popen_launch_pd_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.prefill_url,
|
cls.prefill_url,
|
||||||
@@ -61,9 +60,8 @@ class TestDisaggregationMooncakePrefillLargerTP(TestDisaggregationBase):
|
|||||||
"2",
|
"2",
|
||||||
"--base-gpu-id",
|
"--base-gpu-id",
|
||||||
"4",
|
"4",
|
||||||
"--disaggregation-ib-device",
|
|
||||||
"mlx5_roce4,mlx5_roce5",
|
|
||||||
]
|
]
|
||||||
|
decode_args += cls.transfer_backend + cls.rdma_devices
|
||||||
cls.process_decode = popen_launch_pd_server(
|
cls.process_decode = popen_launch_pd_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.decode_url,
|
cls.decode_url,
|
||||||
@@ -115,9 +113,8 @@ class TestDisaggregationMooncakeDecodeLargerTP(TestDisaggregationBase):
|
|||||||
"prefill",
|
"prefill",
|
||||||
"--tp",
|
"--tp",
|
||||||
"2",
|
"2",
|
||||||
"--disaggregation-ib-device",
|
|
||||||
"mlx5_roce0,mlx5_roce1",
|
|
||||||
]
|
]
|
||||||
|
prefill_args += cls.transfer_backend + cls.rdma_devices
|
||||||
cls.process_prefill = popen_launch_pd_server(
|
cls.process_prefill = popen_launch_pd_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.prefill_url,
|
cls.prefill_url,
|
||||||
@@ -135,9 +132,8 @@ class TestDisaggregationMooncakeDecodeLargerTP(TestDisaggregationBase):
|
|||||||
"4",
|
"4",
|
||||||
"--base-gpu-id",
|
"--base-gpu-id",
|
||||||
"4",
|
"4",
|
||||||
"--disaggregation-ib-device",
|
|
||||||
"mlx5_roce4,mlx5_roce5",
|
|
||||||
]
|
]
|
||||||
|
decode_args += cls.transfer_backend + cls.rdma_devices
|
||||||
cls.process_decode = popen_launch_pd_server(
|
cls.process_decode = popen_launch_pd_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.decode_url,
|
cls.decode_url,
|
||||||
@@ -189,9 +185,8 @@ class TestDisaggregationMooncakeMHAPrefillLargerTP(TestDisaggregationBase):
|
|||||||
"prefill",
|
"prefill",
|
||||||
"--tp",
|
"--tp",
|
||||||
"4",
|
"4",
|
||||||
"--disaggregation-ib-device",
|
|
||||||
"mlx5_roce0,mlx5_roce1",
|
|
||||||
]
|
]
|
||||||
|
prefill_args += cls.transfer_backend + cls.rdma_devices
|
||||||
cls.process_prefill = popen_launch_pd_server(
|
cls.process_prefill = popen_launch_pd_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.prefill_url,
|
cls.prefill_url,
|
||||||
@@ -209,9 +204,8 @@ class TestDisaggregationMooncakeMHAPrefillLargerTP(TestDisaggregationBase):
|
|||||||
"2",
|
"2",
|
||||||
"--base-gpu-id",
|
"--base-gpu-id",
|
||||||
"4",
|
"4",
|
||||||
"--disaggregation-ib-device",
|
|
||||||
"mlx5_roce4,mlx5_roce5",
|
|
||||||
]
|
]
|
||||||
|
decode_args += cls.transfer_backend + cls.rdma_devices
|
||||||
cls.process_decode = popen_launch_pd_server(
|
cls.process_decode = popen_launch_pd_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.decode_url,
|
cls.decode_url,
|
||||||
@@ -263,9 +257,8 @@ class TestDisaggregationMooncakeMHADecodeLargerTP(TestDisaggregationBase):
|
|||||||
"prefill",
|
"prefill",
|
||||||
"--tp",
|
"--tp",
|
||||||
"2",
|
"2",
|
||||||
"--disaggregation-ib-device",
|
|
||||||
"mlx5_roce0,mlx5_roce1",
|
|
||||||
]
|
]
|
||||||
|
prefill_args += cls.transfer_backend + cls.rdma_devices
|
||||||
cls.process_prefill = popen_launch_pd_server(
|
cls.process_prefill = popen_launch_pd_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.prefill_url,
|
cls.prefill_url,
|
||||||
@@ -283,9 +276,8 @@ class TestDisaggregationMooncakeMHADecodeLargerTP(TestDisaggregationBase):
|
|||||||
"4",
|
"4",
|
||||||
"--base-gpu-id",
|
"--base-gpu-id",
|
||||||
"4",
|
"4",
|
||||||
"--disaggregation-ib-device",
|
|
||||||
"mlx5_roce4,mlx5_roce5",
|
|
||||||
]
|
]
|
||||||
|
decode_args += cls.transfer_backend + cls.rdma_devices
|
||||||
cls.process_decode = popen_launch_pd_server(
|
cls.process_decode = popen_launch_pd_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.decode_url,
|
cls.decode_url,
|
||||||
|
|||||||
@@ -45,9 +45,8 @@ class TestDisaggregationDPAttention(TestDisaggregationBase):
|
|||||||
"--dp",
|
"--dp",
|
||||||
"2",
|
"2",
|
||||||
"--enable-dp-attention",
|
"--enable-dp-attention",
|
||||||
"--disaggregation-ib-device",
|
|
||||||
"mlx5_roce0,mlx5_roce1",
|
|
||||||
]
|
]
|
||||||
|
prefill_args += cls.transfer_backend + cls.rdma_devices
|
||||||
cls.process_prefill = popen_launch_pd_server(
|
cls.process_prefill = popen_launch_pd_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.prefill_url,
|
cls.prefill_url,
|
||||||
@@ -68,9 +67,8 @@ class TestDisaggregationDPAttention(TestDisaggregationBase):
|
|||||||
"--enable-dp-attention",
|
"--enable-dp-attention",
|
||||||
"--base-gpu-id",
|
"--base-gpu-id",
|
||||||
"2",
|
"2",
|
||||||
"--disaggregation-ib-device",
|
|
||||||
"mlx5_roce2,mlx5_roce3",
|
|
||||||
]
|
]
|
||||||
|
decode_args += cls.transfer_backend + cls.rdma_devices
|
||||||
cls.process_decode = popen_launch_pd_server(
|
cls.process_decode = popen_launch_pd_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.decode_url,
|
cls.decode_url,
|
||||||
|
|||||||
@@ -37,10 +37,9 @@ class TestDisaggregationPPAccuracy(TestDisaggregationBase):
|
|||||||
"2",
|
"2",
|
||||||
"--pp-size",
|
"--pp-size",
|
||||||
"2",
|
"2",
|
||||||
"--disaggregation-ib-device",
|
|
||||||
"mlx5_roce0,mlx5_roce1",
|
|
||||||
"--disable-overlap-schedule",
|
"--disable-overlap-schedule",
|
||||||
]
|
]
|
||||||
|
prefill_args += cls.transfer_backend + cls.rdma_devices
|
||||||
cls.process_prefill = popen_launch_pd_server(
|
cls.process_prefill = popen_launch_pd_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.prefill_url,
|
cls.prefill_url,
|
||||||
@@ -58,9 +57,8 @@ class TestDisaggregationPPAccuracy(TestDisaggregationBase):
|
|||||||
"2",
|
"2",
|
||||||
"--base-gpu-id",
|
"--base-gpu-id",
|
||||||
"4",
|
"4",
|
||||||
"--disaggregation-ib-device",
|
|
||||||
"mlx5_roce4,mlx5_roce5",
|
|
||||||
]
|
]
|
||||||
|
decode_args += cls.transfer_backend + cls.rdma_devices
|
||||||
cls.process_decode = popen_launch_pd_server(
|
cls.process_decode = popen_launch_pd_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.decode_url,
|
cls.decode_url,
|
||||||
|
|||||||
Reference in New Issue
Block a user