[CI][Nightly] Support local debugging for multi-node CI test cases (#4489)

### What this PR does / why we need it?
 This patch mainly doing the following things:
1. Make k8s/lws optional for multi-node testing, allowing developers to
run multi-node tests locally by actively passing in the IP addresses of
all nodes.
2. Allows passing a custom proxy script path in the config file to load
the proxy.

- vLLM version: v0.11.2

---------

Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
Li Wang
2025-11-27 17:20:29 +08:00
committed by GitHub
parent 1fd56b1106
commit b220de33e8
4 changed files with 89 additions and 23 deletions

View File

@@ -7,8 +7,8 @@ from modelscope import snapshot_download # type: ignore
from requests.exceptions import ConnectionError, HTTPError, Timeout
from tests.e2e.conftest import RemoteOpenAIServer
from tests.e2e.nightly.multi_node.config.multi_node_config import (
DISAGGREGATED_PREFILL_PROXY_SCRIPT, MultiNodeConfig)
from tests.e2e.nightly.multi_node.config.multi_node_config import \
MultiNodeConfig
from tools.aisbench import run_aisbench_cases
prompts = [
@@ -100,8 +100,10 @@ async def test_multi_node() -> None:
disaggregated_prefill = config.disaggregated_prefill
server_port = config.server_port
proxy_port = config.proxy_port
server_host = config.cluster_ips[0]
with config.launch_server_proxy(DISAGGREGATED_PREFILL_PROXY_SCRIPT):
server_host = config.node_info.ip
proxy_script = config.envs.get("DISAGGREGATED_PREFILL_PROXY_SCRIPT", \
'examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py')
with config.launch_server_proxy(proxy_script):
with RemoteOpenAIServer(
model=local_model_path,
vllm_serve_args=config.server_cmd,