[CI] Refator multi-node CI (#3487)

### What this PR does / why we need it? Refactor the multi-machine CI use case. The purpose of this PR is to increase the ease of adding multi-machine CI use cases, allowing developers to add multi-machine cluster model testing use cases (including PD separation) by simply adding a new YAML configuration file. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 --------- Signed-off-by: wangli <wangli858794774@gmail.com>
2025-10-17 09:04:31 +08:00
parent ccb6fb9ec1
commit 4c4a8458a5
18 changed files with 632 additions and 437 deletions
--- a/tests/e2e/nightly/multi_node/test_multi_node.py
+++ b/tests/e2e/nightly/multi_node/test_multi_node.py
@@ -0,0 +1,30 @@
+from tests.e2e.conftest import RemoteOpenAIServer
+from tests.e2e.nightly.multi_node.config.multi_node_config import (
+    DISAGGREGATED_PREFILL_PROXY_SCRIPT, MultiNodeConfig)
+
+
+def test_multi_node() -> None:
+    config = MultiNodeConfig.from_yaml()
+    env_dict = config.envs
+    # perf_cmd = config.perf_cmd
+    # acc_cmd = config.acc_cmd
+    server_port = config.server_port if not config.disaggregated_prefill else config.proxy_port
+    server_host = config.cluster_ips[0]
+    with config.launch_server_proxy(DISAGGREGATED_PREFILL_PROXY_SCRIPT):
+        with RemoteOpenAIServer(
+                model=config.model,
+                vllm_serve_args=config.server_cmd,
+                server_port=server_port,
+                server_host=server_host,
+                env_dict=env_dict,
+                auto_port=False,
+                max_wait_seconds=2000,
+        ) as remote_server:
+            # base_url = remote_server.url_root
+            if config.is_master:
+                pass
+                # TODO: enable perf and acc test
+                # subprocess.run(perf_cmd, check=True)
+                # subprocess.run(acc_cmd, check=True)
+            else:
+                remote_server.hang_until_terminated()