From e56b0017a3f580a6d35a879e3eafc2c1717caa49 Mon Sep 17 00:00:00 2001 From: jiangyunfan1 Date: Tue, 28 Oct 2025 23:33:15 +0800 Subject: [PATCH] [TEST]Add aisbench log and A2 cases (#3841) ### What this PR does / why we need it? This PR adds 2 more A2 caces which we need to test daily. It also enhances the logging for aisbench test failures to improve issues identification ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running the test - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/releases/v0.11.1 --------- Signed-off-by: jiangyunfan1 --- .../vllm_ascend_test_nightly_a2.yaml | 3 + .../models/test_deepseek_r1_0528_w8a8.py | 5 +- .../models/DeepSeep-R1-W8A8-A2-torchair.yaml | 64 +++++++++++++++++++ .../config/models/DeepSeep-R1-W8A8-A2.yaml | 4 +- .../e2e/nightly/multi_node/test_multi_node.py | 6 +- tools/aisbench.py | 22 ++++--- 6 files changed, 87 insertions(+), 17 deletions(-) create mode 100644 tests/e2e/nightly/multi_node/config/models/DeepSeep-R1-W8A8-A2-torchair.yaml diff --git a/.github/workflows/vllm_ascend_test_nightly_a2.yaml b/.github/workflows/vllm_ascend_test_nightly_a2.yaml index 8380346f..83fbb4d6 100644 --- a/.github/workflows/vllm_ascend_test_nightly_a2.yaml +++ b/.github/workflows/vllm_ascend_test_nightly_a2.yaml @@ -73,6 +73,9 @@ jobs: - name: multi-node-deepseek-dp config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2.yaml size: 2 + - name: multi-node-deepseek-dp-torchair + config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2-torchair.yaml + size: 2 uses: ./.github/workflows/_e2e_nightly_multi_node.yaml with: soc_version: a2 diff --git a/tests/e2e/nightly/models/test_deepseek_r1_0528_w8a8.py b/tests/e2e/nightly/models/test_deepseek_r1_0528_w8a8.py index 19c9d01c..38f54a04 100644 --- a/tests/e2e/nightly/models/test_deepseek_r1_0528_w8a8.py +++ b/tests/e2e/nightly/models/test_deepseek_r1_0528_w8a8.py @@ -133,4 +133,7 @@ async def test_models(model: str, mode: str) -> None: if mode in ["single", "no_chunkprefill"]: return # aisbench test - run_aisbench_cases(model, port, aisbench_cases) + run_aisbench_cases(model, + port, + aisbench_cases, + server_args=server_args) diff --git a/tests/e2e/nightly/multi_node/config/models/DeepSeep-R1-W8A8-A2-torchair.yaml b/tests/e2e/nightly/multi_node/config/models/DeepSeep-R1-W8A8-A2-torchair.yaml new file mode 100644 index 00000000..42b70f76 --- /dev/null +++ b/tests/e2e/nightly/multi_node/config/models/DeepSeep-R1-W8A8-A2-torchair.yaml @@ -0,0 +1,64 @@ +test_name: "test DeepSeek-R1-W8A8 torchair on A2" +model: "vllm-ascend/DeepSeek-R1-0528-W8A8" +num_nodes: 2 +npu_per_node: 8 +env_common: + VLLM_USE_MODELSCOPE: true + HCCL_BUFFSIZE: 1024 + SERVER_PORT: 8080 + OMP_PROC_BIND: false + OMP_NUM_THREADS: 10 + + +deployment: + - + server_cmd: > + vllm serve vllm-ascend/DeepSeek-R1-0528-W8A8 + --host 0.0.0.0 + --port $SERVER_PORT + --data-parallel-size 4 + --data-parallel-size-local 2 + --data-parallel-address $LOCAL_IP + --data-parallel-rpc-port 13399 + --no-enable-prefix-caching + --max-num-seqs 16 + --tensor-parallel-size 4 + --max-model-len 36864 + --max-num-batched-tokens 6000 + --enable-expert-parallel + --trust-remote-code + --quantization ascend + --gpu-memory-utilization 0.9 + --speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}' + --additional-config '{"ascend_scheduler_config":{"enabled":false},"torchair_graph_config":{"enabled":true,"enable_multistream_moe":true},"chunked_prefill_for_mla":true,"enable_weight_nz_layout":true}' + + - + server_cmd: > + vllm serve vllm-ascend/DeepSeek-R1-0528-W8A8 + --headless + --data-parallel-size 4 + --data-parallel-rpc-port 13399 + --data-parallel-size-local 2 + --data-parallel-start-rank 2 + --data-parallel-address $MASTER_IP + --no-enable-prefix-caching + --max-num-seqs 16 + --tensor-parallel-size 4 + --max-model-len 36864 + --max-num-batched-tokens 6000 + --enable-expert-parallel + --trust-remote-code + --quantization ascend + --gpu-memory-utilization 0.9 + --speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}' + --additional-config '{"ascend_scheduler_config":{"enabled":false},"torchair_graph_config":{"enabled":true,"enable_multistream_moe":true},"chunked_prefill_for_mla":true,"enable_weight_nz_layout":true}' +benchmarks: + acc: + case_type: accuracy + dataset_path: vllm-ascend/gsm8k + request_conf: vllm_api_general_chat + dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_chat_prompt + max_out_len: 32768 + batch_size: 512 + baseline: 95 + threshold: 5 diff --git a/tests/e2e/nightly/multi_node/config/models/DeepSeep-R1-W8A8-A2.yaml b/tests/e2e/nightly/multi_node/config/models/DeepSeep-R1-W8A8-A2.yaml index 6f7774c4..cf44bc8f 100644 --- a/tests/e2e/nightly/multi_node/config/models/DeepSeep-R1-W8A8-A2.yaml +++ b/tests/e2e/nightly/multi_node/config/models/DeepSeep-R1-W8A8-A2.yaml @@ -30,7 +30,7 @@ deployment: --quantization ascend --gpu-memory-utilization 0.9 --enforce-eager - --speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}' \ + --speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}' --additional-config '{"ascend_scheduler_config":{"enabled":false},"chunked_prefill_for_mla":true,"enable_weight_nz_layout":true}' - @@ -52,6 +52,6 @@ deployment: --quantization ascend --gpu-memory-utilization 0.9 --enforce-eager - --speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}' \ + --speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}' --additional-config '{"ascend_scheduler_config":{"enabled":false},"chunked_prefill_for_mla":true,"enable_weight_nz_layout":true}' benchmarks: diff --git a/tests/e2e/nightly/multi_node/test_multi_node.py b/tests/e2e/nightly/multi_node/test_multi_node.py index 1f6af1da..19bdf64a 100644 --- a/tests/e2e/nightly/multi_node/test_multi_node.py +++ b/tests/e2e/nightly/multi_node/test_multi_node.py @@ -117,9 +117,7 @@ async def test_multi_node() -> None: if config.is_master: port = proxy_port if disaggregated_prefill else server_port # aisbench test - if acc_cmd: - run_aisbench_cases(local_model_path, port, acc_cmd) - if perf_cmd: - run_aisbench_cases(local_model_path, port, perf_cmd) + aisbench_cases = [acc_cmd, perf_cmd] + run_aisbench_cases(local_model_path, port, aisbench_cases) else: remote_server.hang_until_terminated() diff --git a/tools/aisbench.py b/tools/aisbench.py index 75b8fd92..e8c8159c 100644 --- a/tools/aisbench.py +++ b/tools/aisbench.py @@ -16,6 +16,7 @@ # import hashlib import json +import logging import os import re import subprocess @@ -188,8 +189,8 @@ class AisbenchRunner: line).group(1) return if "ERROR" in line: - raise RuntimeError( - "Some errors happen to Aisbench task.") from None + error_msg = f"Some errors happened to Aisbench runtime, the first error is {line}" + raise RuntimeError(error_msg) from None def _wait_for_task(self): self._wait_for_exp_folder() @@ -201,8 +202,8 @@ class AisbenchRunner: self.result_line = line return if "ERROR" in line: - raise RuntimeError( - "Some errors happen to Aisbench task.") from None + error_msg = f"Some errors happened to Aisbench runtime, the first error is {line}" + raise RuntimeError(error_msg) from None def _get_result_performance(self): result_dir = re.search(r'Performance Result files locate in (.*)', @@ -237,12 +238,12 @@ class AisbenchRunner: assert self.baseline - self.threshold <= acc_value <= self.baseline + self.threshold, f"Accuracy verification failed. The accuracy of {self.dataset_path} is {acc_value}, which is not within {self.threshold} relative to baseline {self.baseline}." -def run_aisbench_cases(model, port, aisbench_cases): - if isinstance(aisbench_cases, dict): - aisbench_cases = [aisbench_cases] +def run_aisbench_cases(model, port, aisbench_cases, server_args=""): aisbench_results = [] aisbench_errors = [] for aisbench_case in aisbench_cases: + if not aisbench_case: + continue try: with AisbenchRunner(model, port, aisbench_case) as aisbench: aisbench_results.append(aisbench.result) @@ -251,9 +252,10 @@ def run_aisbench_cases(model, port, aisbench_cases): aisbench_errors.append([aisbench_case, e]) print(e) for failed_case, error_info in aisbench_errors: - print( - f"The following aisbench case failed: {failed_case}, reason is {error_info}." - ) + error_msg = f"The following aisbench case failed: {failed_case}, reason is {error_info}" + if server_args: + error_msg += f"\nserver_args are {server_args}" + logging.error(error_msg) assert not aisbench_errors, "some aisbench cases failed, info were shown above." return aisbench_results