From 863a5a5a17c456eca448d7726f4cb081b906cb8e Mon Sep 17 00:00:00 2001 From: Nengjun Ma Date: Tue, 9 Dec 2025 23:05:41 +0800 Subject: [PATCH] Add gsm8k accuracy test for multi-note Qwen3-235B-A22B (#4802) ### What this PR does / why we need it? As there is not accuracy test for qwen3-235B-A22B model Test result: dataset version metric mode vllm-api-general-chat --------- --------- -------- ------ ----------------------- gsm8k 7cd45e accuracy gen 96.29 Times long for test case running: 30mintues - vLLM version: v0.12.0 - vLLM main: https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9 Signed-off-by: leo-pony Co-authored-by: Mengqing Cao --- .../multi_node/config/models/Qwen3-235B-A3B.yaml | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml b/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml index dee18570..dd3c7db7 100644 --- a/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml +++ b/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml @@ -23,7 +23,7 @@ deployment: --tensor-parallel-size 8 --seed 1024 --enable-expert-parallel - --max-num-seqs 16 + --max-num-seqs 32 --max-model-len 8192 --max-num-batched-tokens 8192 --trust-remote-code @@ -40,7 +40,7 @@ deployment: --data-parallel-rpc-port 13389 --tensor-parallel-size 8 --seed 1024 - --max-num-seqs 16 + --max-num-seqs 32 --max-model-len 8192 --max-num-batched-tokens 8192 --enable-expert-parallel @@ -48,4 +48,12 @@ deployment: --no-enable-prefix-caching --gpu-memory-utilization 0.9 benchmarks: - + acc: + case_type: accuracy + dataset_path: vllm-ascend/gsm8k + request_conf: vllm_api_general_chat + dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_chat_prompt + max_out_len: 7680 + batch_size: 512 + baseline: 95 + threshold: 3