From 0c04bf1e3692693da07455177d1c70932c814358 Mon Sep 17 00:00:00 2001 From: zhangxinyuehfad <59153331+zhangxinyuehfad@users.noreply.github.com> Date: Thu, 18 Sep 2025 23:58:23 +0800 Subject: [PATCH] [Fixbug] Fix accuracy for DeepSeek-V2-Lite (#3016) ### What this PR does / why we need it? Fix accuracy for DeepSeek-V2-Lite ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? CI passed - vLLM version: v0.10.2 - vLLM main: https://github.com/vllm-project/vllm/commit/66072b36dbf1707440ff43d57273d9e9974349d7 Signed-off-by: hfadzxy --- tests/e2e/models/configs/DeepSeek-V2-Lite.yaml | 2 ++ tests/e2e/models/test_lm_eval_correctness.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml b/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml index 7df0544..571a2e4 100644 --- a/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml +++ b/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml @@ -7,6 +7,8 @@ tasks: - name: "exact_match,flexible-extract" value: 0.375 tensor_parallel_size: 2 +batch_size: 8 +gpu_memory_utilization: 0.7 apply_chat_template: False fewshot_as_multiturn: False trust_remote_code: True diff --git a/tests/e2e/models/test_lm_eval_correctness.py b/tests/e2e/models/test_lm_eval_correctness.py index 18768e1..7d023b1 100644 --- a/tests/e2e/models/test_lm_eval_correctness.py +++ b/tests/e2e/models/test_lm_eval_correctness.py @@ -84,7 +84,7 @@ def generate_report(tp_size, eval_config, report_data, report_dir, env_config): apply_chat_template=eval_config.get("apply_chat_template", True), fewshot_as_multiturn=eval_config.get("fewshot_as_multiturn", True), limit=eval_config.get("limit", "N/A"), - batch_size="auto", + batch_size=eval_config.get("batch_size", "auto"), num_fewshot=eval_config.get("num_fewshot", "N/A"), rows=report_data["rows"], parallel_mode=parallel_mode) @@ -110,7 +110,7 @@ def test_lm_eval_correctness_param(config_filename, tp_size, report_dir, "apply_chat_template": eval_config.get("apply_chat_template", True), "fewshot_as_multiturn": eval_config.get("fewshot_as_multiturn", True), "limit": eval_config.get("limit", None), - "batch_size": "auto", + "batch_size": eval_config.get("batch_size", "auto"), } for s in ["num_fewshot", "fewshot_as_multiturn", "apply_chat_template"]: val = eval_config.get(s, None)