[TEST]Add 2P1D multi node cases for nightly test (#3764)

### What this PR does / why we need it? This PR adds the 2P1D multi node func/acc/perf test cases, we need test them daily ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? by running the test - vLLM version: v0.11.0rc3 - vLLM main: c9461e05a4 --------- Signed-off-by: jiangyunfan1 <jiangyunfan1@h-partners.com> Signed-off-by: wangli <wangli858794774@gmail.com> Co-authored-by: wangli <wangli858794774@gmail.com>
2025-10-27 23:09:15 +08:00
parent d64bdd06ae
commit 9030106a14
9 changed files with 134 additions and 101 deletions
--- a/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8.yaml
+++ b/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8.yaml
@@ -144,20 +144,21 @@ deployment:
 benchmarks:
  perf:
    case_type: performance
-    dataset_path: vllm-ascend/GSM8K-in3500-bs400
+    dataset_path: vllm-ascend/GSM8K-in3500-bs2800
    request_conf: vllm_api_stream_chat
    dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_str_perf
-    num_prompts: 1
-    max_out_len: 2
-    batch_size: 1
-    baseline: 5
+    num_prompts: 2800
+    max_out_len: 1500
+    batch_size: 700
+    request_rate: 11.2
+    baseline: 1
    threshold: 0.97
  acc:
    case_type: accuracy
-    dataset_path: vllm-ascend/AIME2024
+    dataset_path: vllm-ascend/gsm8k
    request_conf: vllm_api_general_chat
-    dataset_conf: aime2024/aime2024_gen_0_shot_chat_prompt
-    max_out_len: 10
-    batch_size: 32
-    baseline: 1
-    threshold: 1
+    dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_chat_prompt
+    max_out_len: 32768
+    batch_size: 512
+    baseline: 95
+    threshold: 5
--- a/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml
+++ b/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml
@@ -97,22 +97,3 @@ deployment:
            }
        }'
 benchmarks:
-  perf:
-    case_type: performance
-    dataset_path: vllm-ascend/GSM8K-in3500-bs400
-    request_conf: vllm_api_stream_chat
-    dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_str_perf
-    num_prompts: 1
-    max_out_len: 2
-    batch_size: 1
-    baseline: 5
-    threshold: 0.97
-  acc:
-    case_type: accuracy
-    dataset_path: vllm-ascend/AIME2024
-    request_conf: vllm_api_general_chat
-    dataset_conf: aime2024/aime2024_gen_0_shot_chat_prompt
-    max_out_len: 10
-    batch_size: 32
-    baseline: 1
-    threshold: 1
--- a/tests/e2e/nightly/multi_node/config/models/GLM-4_5.yaml
+++ b/tests/e2e/nightly/multi_node/config/models/GLM-4_5.yaml
@@ -47,22 +47,3 @@ deployment:
        --no-enable-prefix-caching
        --gpu-memory-utilization 0.9
 benchmarks:
-  perf:
-    case_type: performance
-    dataset_path: vllm-ascend/GSM8K-in3500-bs400
-    request_conf: vllm_api_stream_chat
-    dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_str_perf
-    num_prompts: 1
-    max_out_len: 2
-    batch_size: 1
-    baseline: 5
-    threshold: 0.97
-  acc:
-    case_type: accuracy
-    dataset_path: vllm-ascend/AIME2024
-    request_conf: vllm_api_general_chat
-    dataset_conf: aime2024/aime2024_gen_0_shot_chat_prompt
-    max_out_len: 10
-    batch_size: 32
-    baseline: 1
-    threshold: 1
--- a/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml
+++ b/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml
@@ -47,22 +47,3 @@ deployment:
        --no-enable-prefix-caching
        --gpu-memory-utilization 0.9
 benchmarks:
-  perf:
-    case_type: performance
-    dataset_path: vllm-ascend/GSM8K-in3500-bs400
-    request_conf: vllm_api_stream_chat
-    dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_str_perf
-    num_prompts: 1
-    max_out_len: 2
-    batch_size: 1
-    baseline: 5
-    threshold: 0.97
-  acc:
-    case_type: accuracy
-    dataset_path: vllm-ascend/AIME2024
-    request_conf: vllm_api_general_chat
-    dataset_conf: aime2024/aime2024_gen_0_shot_chat_prompt
-    max_out_len: 10
-    batch_size: 32
-    baseline: 1
-    threshold: 1
--- a/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-W8A8.yaml
+++ b/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-W8A8.yaml
@@ -84,22 +84,3 @@ deployment:
            }
        }'
 benchmarks:
-  perf:
-    case_type: performance
-    dataset_path: vllm-ascend/GSM8K-in3500-bs400
-    request_conf: vllm_api_stream_chat
-    dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_str_perf
-    num_prompts: 1
-    max_out_len: 2
-    batch_size: 1
-    baseline: 5
-    threshold: 0.97
-  acc:
-    case_type: accuracy
-    dataset_path: vllm-ascend/AIME2024
-    request_conf: vllm_api_general_chat
-    dataset_conf: aime2024/aime2024_gen_0_shot_chat_prompt
-    max_out_len: 10
-    batch_size: 32
-    baseline: 1
-    threshold: 1
--- a/tests/e2e/nightly/multi_node/config/multi_node_config.py
+++ b/tests/e2e/nightly/multi_node/config/multi_node_config.py
@@ -50,8 +50,6 @@ class MultiNodeConfig:
        self.proxy_port = get_avaliable_port()
        self.perf_cmd = perf_cmd
        self.acc_cmd = acc_cmd
-        assert perf_cmd is not None, "perf_cmd must be provided"
-        assert acc_cmd is not None, "acc_cmd must be provided"

        self.cur_index = int(os.getenv("LWS_WORKER_INDEX", 0))
        self.cur_ip = get_cur_ip()
@@ -220,10 +218,10 @@ class MultiNodeConfig:
                         server_port=server_port,
                         server_cmd=server_cmd))

-        benchmarks = config_data.get("benchmarks", {})
+        benchmarks = config_data.get("benchmarks") or {}
        assert benchmarks is not None, "benchmarks must be provided"
-        perf_cmd = benchmarks["perf"]
-        acc_cmd = benchmarks["acc"]
+        perf_cmd = benchmarks.get("perf")
+        acc_cmd = benchmarks.get("acc")

        return cls(model=model,
                   test_name=test_name,
@@ -290,3 +288,8 @@ class MultiNodeConfig:
        subprocess.run(cmd, env=env, check=True)
        assert os.path.exists(
            str(ranktable_path)), "failed generate ranktable.json"
+
+
+if __name__ == '__main__':
+    config = MultiNodeConfig.from_yaml()
+    print(config.perf_cmd)