[CI]Cleanup accurary test (#4861)

### What this PR does / why we need it? Delete accuracy testing of some models: - Qwen2-VL-7B-Instruct - Qwen2.5-VL-7B-Instruct - gemma-2-9b-it - DeepSeek-V2-Lite - vLLM version: v0.12.0 - vLLM main: ad32e3e19c Signed-off-by: MrZ20 <2609716663@qq.com>
2025-12-10 14:13:56 +08:00
parent e32014ac1d
commit 7132ae8532
6 changed files with 0 additions and 53 deletions
--- a/.github/workflows/vllm_ascend_test_nightly_a2.yaml
+++ b/.github/workflows/vllm_ascend_test_nightly_a2.yaml
@@ -82,7 +82,6 @@ jobs:
          - os: linux-aarch64-a2-1
            model_list:
              - Qwen3-8B
              - Qwen2.5-VL-7B-Instruct
              - Qwen2-Audio-7B-Instruct
              - Qwen3-8B-W8A8
              - Qwen3-VL-8B-Instruct
@@ -91,7 +90,6 @@ jobs:
          - os: linux-aarch64-a2-1
            model_list:
              - ERNIE-4.5-21B-A3B-PT
              - gemma-2-9b-it
              - gemma-3-4b-it
              - internlm-7b
              - InternVL3_5-8B-hf
@@ -101,7 +99,6 @@ jobs:
            model_list:
              - Qwen3-30B-A3B
              - Qwen3-VL-30B-A3B-Instruct
              - DeepSeek-V2-Lite
              - Qwen3-30B-A3B-W8A8
          - os: linux-aarch64-a2-4
            model_list:
--- a/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml
+++ b/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml
@@ -1,16 +0,0 @@
 model_name: "deepseek-ai/DeepSeek-V2-Lite"
 hardware: "Atlas A2 Series"
 tasks:
 - name: "gsm8k"
  metrics:
  - name: "exact_match,strict-match"
    value: 0.385
  - name: "exact_match,flexible-extract"
    value: 0.385
 tensor_parallel_size: 2
 batch_size: 32
 gpu_memory_utilization: 0.7
 apply_chat_template: False
 fewshot_as_multiturn: False
 trust_remote_code: True
 enforce_eager: False
--- a/tests/e2e/models/configs/Qwen2-VL-7B-Instruct.yaml
+++ b/tests/e2e/models/configs/Qwen2-VL-7B-Instruct.yaml
@@ -1,10 +0,0 @@
 model_name: "Qwen/Qwen2-VL-7B-Instruct"
 hardware: "Atlas A2 Series"
 model: "vllm-vlm"
 tasks:
 - name: "mmmu_val"
  metrics:
  - name: "acc,none"
    value: 0.50
 max_model_len: 8192
 gpu_memory_utilization: 0.7
--- a/tests/e2e/models/configs/Qwen2.5-VL-7B-Instruct.yaml
+++ b/tests/e2e/models/configs/Qwen2.5-VL-7B-Instruct.yaml
@@ -1,9 +0,0 @@
 model_name: "Qwen/Qwen2.5-VL-7B-Instruct"
 hardware: "Atlas A2 Series"
 model: "vllm-vlm"
 tasks:
 - name: "mmmu_val"
  metrics:
  - name: "acc,none"
    value: 0.51
 max_model_len: 8192
--- a/tests/e2e/models/configs/accuracy.txt
+++ b/tests/e2e/models/configs/accuracy.txt
@@ -1,9 +1,6 @@
 DeepSeek-V2-Lite.yaml
 Qwen2.5-VL-7B-Instruct.yaml
 Qwen3-30B-A3B.yaml
 Qwen3-8B.yaml
 Qwen2-7B.yaml
 Qwen2-VL-7B-Instruct.yaml
 Qwen2-Audio-7B-Instruct.yaml
 Qwen3-VL-30B-A3B-Instruct.yaml
 Qwen3-VL-8B-Instruct.yaml
@@ -11,7 +8,6 @@ Qwen2.5-Omni-7B.yaml
 Meta-Llama-3.1-8B-Instruct.yaml
 InternVL3_5-8B.yaml
 ERNIE-4.5-21B-A3B-PT.yaml
 gemma-2-9b-it.yaml
 gemma-3-4b-it.yaml
 internlm-7b.yaml
 Molmo-7B-D-0924.yaml
--- a/tests/e2e/models/configs/gemma-2-9b-it.yaml
+++ b/tests/e2e/models/configs/gemma-2-9b-it.yaml
@@ -1,11 +0,0 @@
 model_name: "LLM-Research/gemma-2-9b-it"
 hardware: "Atlas A2 Series"
 tasks:
 - name: "gsm8k"
  metrics:
  - name: "exact_match,strict-match"
    value: 0.46
  - name: "exact_match,flexible-extract"
    value: 0.79
 num_fewshot: 5
 gpu_memory_utilization: 0.8