From 7132ae853297047401853730be267cb4e9f40293 Mon Sep 17 00:00:00 2001 From: SILONG ZENG <2609716663@qq.com> Date: Wed, 10 Dec 2025 14:13:56 +0800 Subject: [PATCH] [CI]Cleanup accurary test (#4861) ### What this PR does / why we need it? Delete accuracy testing of some models: - Qwen2-VL-7B-Instruct - Qwen2.5-VL-7B-Instruct - gemma-2-9b-it - DeepSeek-V2-Lite - vLLM version: v0.12.0 - vLLM main: https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9 Signed-off-by: MrZ20 <2609716663@qq.com> --- .../workflows/vllm_ascend_test_nightly_a2.yaml | 3 --- tests/e2e/models/configs/DeepSeek-V2-Lite.yaml | 16 ---------------- .../e2e/models/configs/Qwen2-VL-7B-Instruct.yaml | 10 ---------- .../models/configs/Qwen2.5-VL-7B-Instruct.yaml | 9 --------- tests/e2e/models/configs/accuracy.txt | 4 ---- tests/e2e/models/configs/gemma-2-9b-it.yaml | 11 ----------- 6 files changed, 53 deletions(-) delete mode 100644 tests/e2e/models/configs/DeepSeek-V2-Lite.yaml delete mode 100644 tests/e2e/models/configs/Qwen2-VL-7B-Instruct.yaml delete mode 100644 tests/e2e/models/configs/Qwen2.5-VL-7B-Instruct.yaml delete mode 100644 tests/e2e/models/configs/gemma-2-9b-it.yaml diff --git a/.github/workflows/vllm_ascend_test_nightly_a2.yaml b/.github/workflows/vllm_ascend_test_nightly_a2.yaml index a18cd5d4..d13e79f1 100644 --- a/.github/workflows/vllm_ascend_test_nightly_a2.yaml +++ b/.github/workflows/vllm_ascend_test_nightly_a2.yaml @@ -82,7 +82,6 @@ jobs: - os: linux-aarch64-a2-1 model_list: - Qwen3-8B - - Qwen2.5-VL-7B-Instruct - Qwen2-Audio-7B-Instruct - Qwen3-8B-W8A8 - Qwen3-VL-8B-Instruct @@ -91,7 +90,6 @@ jobs: - os: linux-aarch64-a2-1 model_list: - ERNIE-4.5-21B-A3B-PT - - gemma-2-9b-it - gemma-3-4b-it - internlm-7b - InternVL3_5-8B-hf @@ -101,7 +99,6 @@ jobs: model_list: - Qwen3-30B-A3B - Qwen3-VL-30B-A3B-Instruct - - DeepSeek-V2-Lite - Qwen3-30B-A3B-W8A8 - os: linux-aarch64-a2-4 model_list: diff --git a/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml b/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml deleted file mode 100644 index c23be35b..00000000 --- a/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml +++ /dev/null @@ -1,16 +0,0 @@ -model_name: "deepseek-ai/DeepSeek-V2-Lite" -hardware: "Atlas A2 Series" -tasks: -- name: "gsm8k" - metrics: - - name: "exact_match,strict-match" - value: 0.385 - - name: "exact_match,flexible-extract" - value: 0.385 -tensor_parallel_size: 2 -batch_size: 32 -gpu_memory_utilization: 0.7 -apply_chat_template: False -fewshot_as_multiturn: False -trust_remote_code: True -enforce_eager: False diff --git a/tests/e2e/models/configs/Qwen2-VL-7B-Instruct.yaml b/tests/e2e/models/configs/Qwen2-VL-7B-Instruct.yaml deleted file mode 100644 index 576b4de6..00000000 --- a/tests/e2e/models/configs/Qwen2-VL-7B-Instruct.yaml +++ /dev/null @@ -1,10 +0,0 @@ -model_name: "Qwen/Qwen2-VL-7B-Instruct" -hardware: "Atlas A2 Series" -model: "vllm-vlm" -tasks: -- name: "mmmu_val" - metrics: - - name: "acc,none" - value: 0.50 -max_model_len: 8192 -gpu_memory_utilization: 0.7 diff --git a/tests/e2e/models/configs/Qwen2.5-VL-7B-Instruct.yaml b/tests/e2e/models/configs/Qwen2.5-VL-7B-Instruct.yaml deleted file mode 100644 index 85489899..00000000 --- a/tests/e2e/models/configs/Qwen2.5-VL-7B-Instruct.yaml +++ /dev/null @@ -1,9 +0,0 @@ -model_name: "Qwen/Qwen2.5-VL-7B-Instruct" -hardware: "Atlas A2 Series" -model: "vllm-vlm" -tasks: -- name: "mmmu_val" - metrics: - - name: "acc,none" - value: 0.51 -max_model_len: 8192 diff --git a/tests/e2e/models/configs/accuracy.txt b/tests/e2e/models/configs/accuracy.txt index c15d7986..b4ab5419 100644 --- a/tests/e2e/models/configs/accuracy.txt +++ b/tests/e2e/models/configs/accuracy.txt @@ -1,9 +1,6 @@ -DeepSeek-V2-Lite.yaml -Qwen2.5-VL-7B-Instruct.yaml Qwen3-30B-A3B.yaml Qwen3-8B.yaml Qwen2-7B.yaml -Qwen2-VL-7B-Instruct.yaml Qwen2-Audio-7B-Instruct.yaml Qwen3-VL-30B-A3B-Instruct.yaml Qwen3-VL-8B-Instruct.yaml @@ -11,7 +8,6 @@ Qwen2.5-Omni-7B.yaml Meta-Llama-3.1-8B-Instruct.yaml InternVL3_5-8B.yaml ERNIE-4.5-21B-A3B-PT.yaml -gemma-2-9b-it.yaml gemma-3-4b-it.yaml internlm-7b.yaml Molmo-7B-D-0924.yaml diff --git a/tests/e2e/models/configs/gemma-2-9b-it.yaml b/tests/e2e/models/configs/gemma-2-9b-it.yaml deleted file mode 100644 index 050e2f03..00000000 --- a/tests/e2e/models/configs/gemma-2-9b-it.yaml +++ /dev/null @@ -1,11 +0,0 @@ -model_name: "LLM-Research/gemma-2-9b-it" -hardware: "Atlas A2 Series" -tasks: -- name: "gsm8k" - metrics: - - name: "exact_match,strict-match" - value: 0.46 - - name: "exact_match,flexible-extract" - value: 0.79 -num_fewshot: 5 -gpu_memory_utilization: 0.8