From 1c677c3b8751345b969e2d1f6ea0cf4180ba9938 Mon Sep 17 00:00:00 2001 From: Canlin Guo Date: Wed, 12 Nov 2025 09:05:55 +0800 Subject: [PATCH] [Test][Accuracy] Add accuracy evaluation config for InternVL3_5-8B (#3964) ### What this PR does / why we need it? To continuously monitor the accuracy of the InternVL3_5-8B model, this PR adds the corresponding configuration file to the CI. We need to add the `-hf` suffix to avoid incompatibility with the `lm-eval` preprocessor. ### How was this patch tested? `pytest -sv ./tests/e2e/models/test_lm_eval_correctness.py --config ./tests/e2e/models/configs/InternVL3_5-8B.yaml` - vLLM version: v0.11.0 - vLLM main: https://github.com/vllm-project/vllm/commit/83f478bb19489b41e9d208b47b4bb5a95ac171ac Signed-off-by: gcanlin --- tests/e2e/models/configs/InternVL3_5-8B.yaml | 11 +++++++++++ tests/e2e/models/configs/accuracy.txt | 1 + 2 files changed, 12 insertions(+) create mode 100644 tests/e2e/models/configs/InternVL3_5-8B.yaml diff --git a/tests/e2e/models/configs/InternVL3_5-8B.yaml b/tests/e2e/models/configs/InternVL3_5-8B.yaml new file mode 100644 index 00000000..3628785a --- /dev/null +++ b/tests/e2e/models/configs/InternVL3_5-8B.yaml @@ -0,0 +1,11 @@ +model_name: "OpenGVLab/InternVL3_5-8B-hf" +runner: "linux-aarch64-a2-1" +hardware: "Atlas A2 Series" +model: "vllm-vlm" +tasks: + - name: "mmmu_val" + metrics: + - name: "acc,none" + value: 0.58 +max_model_len: 40960 +trust_remote_code: True diff --git a/tests/e2e/models/configs/accuracy.txt b/tests/e2e/models/configs/accuracy.txt index 5a839071..daa23e97 100644 --- a/tests/e2e/models/configs/accuracy.txt +++ b/tests/e2e/models/configs/accuracy.txt @@ -9,3 +9,4 @@ Qwen3-VL-30B-A3B-Instruct.yaml Qwen3-VL-8B-Instruct.yaml Qwen2.5-Omni-7B.yaml Meta-Llama-3.1-8B-Instruct.yaml +InternVL3_5-8B.yaml \ No newline at end of file