From 9030106a14a6108ff3877a2e05d4c33d8f442741 Mon Sep 17 00:00:00 2001 From: jiangyunfan1 Date: Mon, 27 Oct 2025 23:09:15 +0800 Subject: [PATCH] [TEST]Add 2P1D multi node cases for nightly test (#3764) ### What this PR does / why we need it? This PR adds the 2P1D multi node func/acc/perf test cases, we need test them daily ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? by running the test - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/c9461e05a4ed3557cfbf4b15ded1e26761cc39ca --------- Signed-off-by: jiangyunfan1 Signed-off-by: wangli Co-authored-by: wangli --- .../config/models/DeepSeek-R1-W8A8.yaml | 23 ++-- .../multi_node/config/models/DeepSeek-V3.yaml | 19 --- .../multi_node/config/models/GLM-4_5.yaml | 19 --- .../config/models/Qwen3-235B-A3B.yaml | 19 --- .../config/models/Qwen3-235B-W8A8.yaml | 19 --- .../multi_node/config/multi_node_config.py | 13 ++- tests/e2e/nightly/multi_node/scripts/run.sh | 12 ++ .../e2e/nightly/multi_node/test_multi_node.py | 109 ++++++++++++++++-- tools/aisbench.py | 2 + 9 files changed, 134 insertions(+), 101 deletions(-) diff --git a/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8.yaml b/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8.yaml index e3b1db18..a8e49290 100644 --- a/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8.yaml +++ b/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8.yaml @@ -144,20 +144,21 @@ deployment: benchmarks: perf: case_type: performance - dataset_path: vllm-ascend/GSM8K-in3500-bs400 + dataset_path: vllm-ascend/GSM8K-in3500-bs2800 request_conf: vllm_api_stream_chat dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_str_perf - num_prompts: 1 - max_out_len: 2 - batch_size: 1 - baseline: 5 + num_prompts: 2800 + max_out_len: 1500 + batch_size: 700 + request_rate: 11.2 + baseline: 1 threshold: 0.97 acc: case_type: accuracy - dataset_path: vllm-ascend/AIME2024 + dataset_path: vllm-ascend/gsm8k request_conf: vllm_api_general_chat - dataset_conf: aime2024/aime2024_gen_0_shot_chat_prompt - max_out_len: 10 - batch_size: 32 - baseline: 1 - threshold: 1 + dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_chat_prompt + max_out_len: 32768 + batch_size: 512 + baseline: 95 + threshold: 5 diff --git a/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml b/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml index 94ecb61d..c5b34c9d 100644 --- a/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml +++ b/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml @@ -97,22 +97,3 @@ deployment: } }' benchmarks: - perf: - case_type: performance - dataset_path: vllm-ascend/GSM8K-in3500-bs400 - request_conf: vllm_api_stream_chat - dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_str_perf - num_prompts: 1 - max_out_len: 2 - batch_size: 1 - baseline: 5 - threshold: 0.97 - acc: - case_type: accuracy - dataset_path: vllm-ascend/AIME2024 - request_conf: vllm_api_general_chat - dataset_conf: aime2024/aime2024_gen_0_shot_chat_prompt - max_out_len: 10 - batch_size: 32 - baseline: 1 - threshold: 1 diff --git a/tests/e2e/nightly/multi_node/config/models/GLM-4_5.yaml b/tests/e2e/nightly/multi_node/config/models/GLM-4_5.yaml index 70c7cc7e..d40915f2 100644 --- a/tests/e2e/nightly/multi_node/config/models/GLM-4_5.yaml +++ b/tests/e2e/nightly/multi_node/config/models/GLM-4_5.yaml @@ -47,22 +47,3 @@ deployment: --no-enable-prefix-caching --gpu-memory-utilization 0.9 benchmarks: - perf: - case_type: performance - dataset_path: vllm-ascend/GSM8K-in3500-bs400 - request_conf: vllm_api_stream_chat - dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_str_perf - num_prompts: 1 - max_out_len: 2 - batch_size: 1 - baseline: 5 - threshold: 0.97 - acc: - case_type: accuracy - dataset_path: vllm-ascend/AIME2024 - request_conf: vllm_api_general_chat - dataset_conf: aime2024/aime2024_gen_0_shot_chat_prompt - max_out_len: 10 - batch_size: 32 - baseline: 1 - threshold: 1 diff --git a/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml b/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml index f0ac5e88..b72bb542 100644 --- a/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml +++ b/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml @@ -47,22 +47,3 @@ deployment: --no-enable-prefix-caching --gpu-memory-utilization 0.9 benchmarks: - perf: - case_type: performance - dataset_path: vllm-ascend/GSM8K-in3500-bs400 - request_conf: vllm_api_stream_chat - dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_str_perf - num_prompts: 1 - max_out_len: 2 - batch_size: 1 - baseline: 5 - threshold: 0.97 - acc: - case_type: accuracy - dataset_path: vllm-ascend/AIME2024 - request_conf: vllm_api_general_chat - dataset_conf: aime2024/aime2024_gen_0_shot_chat_prompt - max_out_len: 10 - batch_size: 32 - baseline: 1 - threshold: 1 diff --git a/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-W8A8.yaml b/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-W8A8.yaml index ca7033a3..d4e4ce39 100644 --- a/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-W8A8.yaml +++ b/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-W8A8.yaml @@ -84,22 +84,3 @@ deployment: } }' benchmarks: - perf: - case_type: performance - dataset_path: vllm-ascend/GSM8K-in3500-bs400 - request_conf: vllm_api_stream_chat - dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_str_perf - num_prompts: 1 - max_out_len: 2 - batch_size: 1 - baseline: 5 - threshold: 0.97 - acc: - case_type: accuracy - dataset_path: vllm-ascend/AIME2024 - request_conf: vllm_api_general_chat - dataset_conf: aime2024/aime2024_gen_0_shot_chat_prompt - max_out_len: 10 - batch_size: 32 - baseline: 1 - threshold: 1 diff --git a/tests/e2e/nightly/multi_node/config/multi_node_config.py b/tests/e2e/nightly/multi_node/config/multi_node_config.py index 18ee6cc3..4ca8d773 100644 --- a/tests/e2e/nightly/multi_node/config/multi_node_config.py +++ b/tests/e2e/nightly/multi_node/config/multi_node_config.py @@ -50,8 +50,6 @@ class MultiNodeConfig: self.proxy_port = get_avaliable_port() self.perf_cmd = perf_cmd self.acc_cmd = acc_cmd - assert perf_cmd is not None, "perf_cmd must be provided" - assert acc_cmd is not None, "acc_cmd must be provided" self.cur_index = int(os.getenv("LWS_WORKER_INDEX", 0)) self.cur_ip = get_cur_ip() @@ -220,10 +218,10 @@ class MultiNodeConfig: server_port=server_port, server_cmd=server_cmd)) - benchmarks = config_data.get("benchmarks", {}) + benchmarks = config_data.get("benchmarks") or {} assert benchmarks is not None, "benchmarks must be provided" - perf_cmd = benchmarks["perf"] - acc_cmd = benchmarks["acc"] + perf_cmd = benchmarks.get("perf") + acc_cmd = benchmarks.get("acc") return cls(model=model, test_name=test_name, @@ -290,3 +288,8 @@ class MultiNodeConfig: subprocess.run(cmd, env=env, check=True) assert os.path.exists( str(ranktable_path)), "failed generate ranktable.json" + + +if __name__ == '__main__': + config = MultiNodeConfig.from_yaml() + print(config.perf_cmd) diff --git a/tests/e2e/nightly/multi_node/scripts/run.sh b/tests/e2e/nightly/multi_node/scripts/run.sh index 544bb034..f596dfe6 100644 --- a/tests/e2e/nightly/multi_node/scripts/run.sh +++ b/tests/e2e/nightly/multi_node/scripts/run.sh @@ -120,6 +120,17 @@ download_go() { print_success "Go $GOVER installed successfully" } +install_ais_bench() { + local AIS_BENCH="$SRC_DIR/benchmark" + git clone https://gitee.com/aisbench/benchmark.git $AIS_BENCH + cd $AIS_BENCH + git checkout v3.0-20250930-master + pip3 install -e ./ + pip3 install -r requirements/api.txt + pip3 install -r requirements/extra.txt + cd - +} + install_go() { # Check if Go is already installed if command -v go &> /dev/null; then @@ -167,6 +178,7 @@ main() { checkout_src install_sys_dependencies install_vllm + install_ais_bench # to speed up mooncake build process, install Go here install_go cd "$WORKSPACE/source_code" diff --git a/tests/e2e/nightly/multi_node/test_multi_node.py b/tests/e2e/nightly/multi_node/test_multi_node.py index 3808dc3f..9b3ac26c 100644 --- a/tests/e2e/nightly/multi_node/test_multi_node.py +++ b/tests/e2e/nightly/multi_node/test_multi_node.py @@ -1,13 +1,98 @@ +import time +from typing import Any, List, Optional, Union + +import httpx +import pytest +from modelscope import snapshot_download # type: ignore +from requests.exceptions import ConnectionError, HTTPError, Timeout + from tests.e2e.conftest import RemoteOpenAIServer from tests.e2e.nightly.multi_node.config.multi_node_config import ( DISAGGREGATED_PREFILL_PROXY_SCRIPT, MultiNodeConfig) +from tools.aisbench import run_aisbench_cases + +prompts = [ + "San Francisco is a", +] + +api_keyword_args = { + "max_tokens": 10, +} -def test_multi_node() -> None: +def get_local_model_path_with_retry( + model: str, + revision: str = "master", + max_retries: int = 5, + delay: int = 5, +) -> Optional[str]: + for attempt in range(1, max_retries + 1): + try: + local_model_path = snapshot_download( + model_id=model, + revision=revision, + ) + return local_model_path + + except HTTPError: + continue + + except (ConnectionError, Timeout): + continue + + if attempt < max_retries: + time.sleep(delay) + return None + + +async def get_completions(url: str, model: str, prompts: Union[str, List[str]], + **api_kwargs: Any) -> List[str]: + """ + Asynchronously send HTTP requests to a /v1/completions endpoint. + + Args: + url: Full endpoint URL, e.g. "http://localhost:1025/v1/completions" + model: Model name or local model path + prompts: A single prompt string or a list of prompts + **api_kwargs: Additional parameters (e.g., max_tokens, temperature) + + Returns: + List[str]: A list of generated texts corresponding to each prompt + """ + headers = {"Content-Type": "application/json"} + + if isinstance(prompts, str): + prompts = [prompts] + + results = [] + async with httpx.AsyncClient(timeout=600.0) as client: + for prompt in prompts: + payload = {"model": model, "prompt": prompt, **api_kwargs} + + response = await client.post(url, headers=headers, json=payload) + if response.status_code != 200: + raise RuntimeError( + f"Request failed with status {response.status_code}: {response.text}" + ) + + resp_json = response.json() + choices = resp_json.get("choices", []) + if not choices or not choices[0].get("text"): + raise ValueError("Empty response from server") + + results.append(choices[0]["text"]) + + return results + + +@pytest.mark.asyncio +async def test_multi_node() -> None: config = MultiNodeConfig.from_yaml() + local_model_path = get_local_model_path_with_retry(config.model) + assert local_model_path is not None, "can not find any local weight for test" env_dict = config.envs - # perf_cmd = config.perf_cmd - # acc_cmd = config.acc_cmd + perf_cmd = config.perf_cmd + acc_cmd = config.acc_cmd nodes_info = config.nodes_info disaggregated_prefill = config.disaggregated_prefill server_port = config.server_port @@ -15,7 +100,7 @@ def test_multi_node() -> None: server_host = config.cluster_ips[0] with config.launch_server_proxy(DISAGGREGATED_PREFILL_PROXY_SCRIPT): with RemoteOpenAIServer( - model=config.model, + model=local_model_path, vllm_serve_args=config.server_cmd, server_port=server_port, server_host=server_host, @@ -26,11 +111,17 @@ def test_multi_node() -> None: nodes_info=nodes_info, max_wait_seconds=2000, ) as remote_server: - # base_url = remote_server.url_root if config.is_master: - pass - # TODO: enable perf and acc test - # subprocess.run(perf_cmd, check=True) - # subprocess.run(acc_cmd, check=True) + port = proxy_port if disaggregated_prefill else server_port + base_url = f"http://localhost:{port}/v1/completions" + _ = await get_completions(url=base_url, + model=local_model_path, + prompts=prompts, + api_kwargs=api_keyword_args) + # aisbench test + if acc_cmd: + run_aisbench_cases(local_model_path, port, acc_cmd) + if perf_cmd: + run_aisbench_cases(local_model_path, port, perf_cmd) else: remote_server.hang_until_terminated() diff --git a/tools/aisbench.py b/tools/aisbench.py index 3f319977..f81e4069 100644 --- a/tools/aisbench.py +++ b/tools/aisbench.py @@ -231,6 +231,8 @@ class AisbenchRunner: def run_aisbench_cases(model, port, aisbench_cases): + if isinstance(aisbench_cases, dict): + aisbench_cases = [aisbench_cases] aisbench_results = [] aisbench_errors = [] for aisbench_case in aisbench_cases: