[CI]Cleanup accurary test (#4861)

### What this PR does / why we need it? Delete accuracy testing of some models: - Qwen2-VL-7B-Instruct - Qwen2.5-VL-7B-Instruct - gemma-2-9b-it - DeepSeek-V2-Lite - vLLM version: v0.12.0 - vLLM main: ad32e3e19c Signed-off-by: MrZ20 <2609716663@qq.com>
2025-12-10 14:13:56 +08:00
parent e32014ac1d
commit 7132ae8532
6 changed files with 0 additions and 53 deletions
--- a/.github/workflows/vllm_ascend_test_nightly_a2.yaml
+++ b/.github/workflows/vllm_ascend_test_nightly_a2.yaml
@@ -82,7 +82,6 @@ jobs:
          - os: linux-aarch64-a2-1
            model_list:
              - Qwen3-8B
-              - Qwen2.5-VL-7B-Instruct
              - Qwen2-Audio-7B-Instruct
              - Qwen3-8B-W8A8
              - Qwen3-VL-8B-Instruct
@@ -91,7 +90,6 @@ jobs:
          - os: linux-aarch64-a2-1
            model_list:
              - ERNIE-4.5-21B-A3B-PT
-              - gemma-2-9b-it
              - gemma-3-4b-it
              - internlm-7b
              - InternVL3_5-8B-hf
@@ -101,7 +99,6 @@ jobs:
            model_list:
              - Qwen3-30B-A3B
              - Qwen3-VL-30B-A3B-Instruct
-              - DeepSeek-V2-Lite
              - Qwen3-30B-A3B-W8A8
          - os: linux-aarch64-a2-4
            model_list:
--- a/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml
+++ b/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml
@@ -1,16 +0,0 @@
-model_name: "deepseek-ai/DeepSeek-V2-Lite"
-hardware: "Atlas A2 Series"
-tasks:
- name: "gsm8k"
-  metrics:
-  - name: "exact_match,strict-match"
-    value: 0.385
-  - name: "exact_match,flexible-extract"
-    value: 0.385
-tensor_parallel_size: 2
-batch_size: 32
-gpu_memory_utilization: 0.7
-apply_chat_template: False
-fewshot_as_multiturn: False
-trust_remote_code: True
-enforce_eager: False
--- a/tests/e2e/models/configs/Qwen2-VL-7B-Instruct.yaml
+++ b/tests/e2e/models/configs/Qwen2-VL-7B-Instruct.yaml
@@ -1,10 +0,0 @@
-model_name: "Qwen/Qwen2-VL-7B-Instruct"
-hardware: "Atlas A2 Series"
-model: "vllm-vlm"
-tasks:
- name: "mmmu_val"
-  metrics:
-  - name: "acc,none"
-    value: 0.50
-max_model_len: 8192
-gpu_memory_utilization: 0.7
--- a/tests/e2e/models/configs/Qwen2.5-VL-7B-Instruct.yaml
+++ b/tests/e2e/models/configs/Qwen2.5-VL-7B-Instruct.yaml
@@ -1,9 +0,0 @@
-model_name: "Qwen/Qwen2.5-VL-7B-Instruct"
-hardware: "Atlas A2 Series"
-model: "vllm-vlm"
-tasks:
- name: "mmmu_val"
-  metrics:
-  - name: "acc,none"
-    value: 0.51
-max_model_len: 8192
--- a/tests/e2e/models/configs/accuracy.txt
+++ b/tests/e2e/models/configs/accuracy.txt
@@ -1,9 +1,6 @@
-DeepSeek-V2-Lite.yaml
-Qwen2.5-VL-7B-Instruct.yaml
 Qwen3-30B-A3B.yaml
 Qwen3-8B.yaml
 Qwen2-7B.yaml
-Qwen2-VL-7B-Instruct.yaml
 Qwen2-Audio-7B-Instruct.yaml
 Qwen3-VL-30B-A3B-Instruct.yaml
 Qwen3-VL-8B-Instruct.yaml
@@ -11,7 +8,6 @@ Qwen2.5-Omni-7B.yaml
 Meta-Llama-3.1-8B-Instruct.yaml
 InternVL3_5-8B.yaml
 ERNIE-4.5-21B-A3B-PT.yaml
-gemma-2-9b-it.yaml
 gemma-3-4b-it.yaml
 internlm-7b.yaml
 Molmo-7B-D-0924.yaml
--- a/tests/e2e/models/configs/gemma-2-9b-it.yaml
+++ b/tests/e2e/models/configs/gemma-2-9b-it.yaml
@@ -1,11 +0,0 @@
-model_name: "LLM-Research/gemma-2-9b-it"
-hardware: "Atlas A2 Series"
-tasks:
- name: "gsm8k"
-  metrics:
-  - name: "exact_match,strict-match"
-    value: 0.46
-  - name: "exact_match,flexible-extract"
-    value: 0.79
-num_fewshot: 5
-gpu_memory_utilization: 0.8