[1/N] Refactor nightly test structure (#5479)

### What this PR does / why we need it? This patch is a series of refactoring actions, including clarifying the directory structure of nightly tests, refactoring the config retrieval logic, and optimizing the workflow, etc. This is the first step: refactoring the directory structure of nightly to make it more readable and logical. - vLLM version: v0.13.0 - vLLM main: 5326c89803 Signed-off-by: wangli <wangli858794774@gmail.com>
2025-12-30 19:03:02 +08:00
parent c85cc045f8
commit e760aae1df
59 changed files with 475 additions and 471 deletions
--- a/tests/e2e/nightly/multi_node/scripts/test_multi_node.py
+++ b/tests/e2e/nightly/multi_node/scripts/test_multi_node.py
@@ -0,0 +1,46 @@
+import pytest
+
+from tests.e2e.conftest import RemoteOpenAIServer
+from tests.e2e.nightly.multi_node.scripts.multi_node_config import (
+    MultiNodeConfigLoader, ProxyLauncher)
+from tools.aisbench import run_aisbench_cases
+
+
+@pytest.mark.asyncio
+async def test_multi_node() -> None:
+    config = MultiNodeConfigLoader.from_yaml()
+
+    with ProxyLauncher(
+            nodes=config.nodes,
+            disagg_cfg=config.disagg_cfg,
+            envs=config.envs,
+            proxy_port=config.proxy_port,
+            cur_index=config.cur_index,
+    ) as proxy:
+
+        with RemoteOpenAIServer(
+                model=config.model,
+                vllm_serve_args=config.server_cmd,
+                server_port=config.server_port,
+                server_host=config.master_ip,
+                env_dict=config.envs,
+                auto_port=False,
+                proxy_port=proxy.proxy_port,
+                disaggregated_prefill=config.disagg_cfg,
+                nodes_info=config.nodes,
+                max_wait_seconds=2800,
+        ) as server:
+
+            host, port = config.benchmark_endpoint
+
+            if config.is_master:
+                run_aisbench_cases(
+                    model=config.model,
+                    port=port,
+                    aisbench_cases=[config.acc_cmd, config.perf_cmd],
+                    host_ip=host,
+                )
+            else:
+                # We should keep listening on the master node's server url determining when to exit.
+                server.hang_until_terminated(
+                    f"http://{host}:{config.server_port}/health")