diff --git a/.github/workflows/_e2e_nightly_single_node_models.yaml b/.github/workflows/_e2e_nightly_single_node_models.yaml index 3b804cda..c587722b 100644 --- a/.github/workflows/_e2e_nightly_single_node_models.yaml +++ b/.github/workflows/_e2e_nightly_single_node_models.yaml @@ -59,7 +59,7 @@ jobs: name: ${{inputs.model_list}} accuracy test runs-on: ${{ inputs.runner }} container: - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11 + image: "${{ inputs.image }}" env: VLLM_USE_MODELSCOPE: True GHA_VLLM_ASCEND_VERSION: ${{ inputs.vllm-ascend }} @@ -111,6 +111,12 @@ jobs: . /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl" + - name: Install tensorflow (for Molmo-7B-D-0924) + if: ${{ inputs.runner == 'linux-aarch64-a2-1' && contains(inputs.model_list, 'Molmo-7B-D-0924') }} + shell: bash -l {0} + run: | + pip install tensorflow --no-cache-dir + - name: Resolve vllm-ascend version run: | VERSION_INPUT="${{ inputs.vllm-ascend }}" @@ -172,6 +178,7 @@ jobs: id: report env: VLLM_WORKER_MULTIPROC_METHOD: spawn + HF_DATASETS_OFFLINE: True VLLM_USE_MODELSCOPE: True VLLM_CI_RUNNER: ${{ inputs.runner }} VLLM_VERSION: ${{ env.GHA_VLLM_VERSION }} diff --git a/.github/workflows/vllm_ascend_test_nightly_a2.yaml b/.github/workflows/vllm_ascend_test_nightly_a2.yaml index aaa0e1af..54e33b48 100644 --- a/.github/workflows/vllm_ascend_test_nightly_a2.yaml +++ b/.github/workflows/vllm_ascend_test_nightly_a2.yaml @@ -114,6 +114,15 @@ jobs: - Qwen3-VL-8B-Instruct - Qwen2.5-Omni-7B - Meta-Llama-3.1-8B-Instruct + - os: linux-aarch64-a2-1 + model_list: + - ERNIE-4.5-21B-A3B-PT + - gemma-2-9b-it + - gemma-3-4b-it + - internlm-7b + - InternVL3_5-8B-hf + - llava-1.5-7b-hf + - Molmo-7B-D-0924 - os: linux-aarch64-a2-2 model_list: - Qwen3-30B-A3B @@ -128,5 +137,5 @@ jobs: vllm: v0.11.2 runner: ${{ matrix.test_config.os }} model_list: ${{ toJson(matrix.test_config.model_list) }} - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11 + image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11' upload: false diff --git a/tests/e2e/models/configs/ERNIE-4.5-21B-A3B-PT.yaml b/tests/e2e/models/configs/ERNIE-4.5-21B-A3B-PT.yaml new file mode 100644 index 00000000..ae39aab9 --- /dev/null +++ b/tests/e2e/models/configs/ERNIE-4.5-21B-A3B-PT.yaml @@ -0,0 +1,9 @@ +model_name: "PaddlePaddle/ERNIE-4.5-21B-A3B-PT" +hardware: "Atlas A2 Series" +tasks: +- name: "gsm8k" + metrics: + - name: "exact_match,flexible-extract" + value: 0.71 +num_fewshot: 5 +trust_remote_code: True diff --git a/tests/e2e/models/configs/InternVL3_5-8B.yaml b/tests/e2e/models/configs/InternVL3_5-8B-hf.yaml similarity index 100% rename from tests/e2e/models/configs/InternVL3_5-8B.yaml rename to tests/e2e/models/configs/InternVL3_5-8B-hf.yaml diff --git a/tests/e2e/models/configs/Molmo-7B-D-0924.yaml b/tests/e2e/models/configs/Molmo-7B-D-0924.yaml new file mode 100644 index 00000000..68951a40 --- /dev/null +++ b/tests/e2e/models/configs/Molmo-7B-D-0924.yaml @@ -0,0 +1,13 @@ +model_name: "LLM-Research/Molmo-7B-D-0924" +hardware: "Atlas A2 Series" +model: "vllm-vlm" +tasks: +- name: "ceval-valid" + metrics: + - name: "acc,none" + value: 0.71 +max_model_len: 4096 +trust_remote_code: True +apply_chat_template: False +fewshot_as_multiturn: False +gpu_memory_utilization: 0.8 diff --git a/tests/e2e/models/configs/accuracy.txt b/tests/e2e/models/configs/accuracy.txt index daa23e97..c15d7986 100644 --- a/tests/e2e/models/configs/accuracy.txt +++ b/tests/e2e/models/configs/accuracy.txt @@ -9,4 +9,10 @@ Qwen3-VL-30B-A3B-Instruct.yaml Qwen3-VL-8B-Instruct.yaml Qwen2.5-Omni-7B.yaml Meta-Llama-3.1-8B-Instruct.yaml -InternVL3_5-8B.yaml \ No newline at end of file +InternVL3_5-8B.yaml +ERNIE-4.5-21B-A3B-PT.yaml +gemma-2-9b-it.yaml +gemma-3-4b-it.yaml +internlm-7b.yaml +Molmo-7B-D-0924.yaml +llava-1.5-7b-hf.yaml diff --git a/tests/e2e/models/configs/gemma-2-9b-it.yaml b/tests/e2e/models/configs/gemma-2-9b-it.yaml new file mode 100644 index 00000000..050e2f03 --- /dev/null +++ b/tests/e2e/models/configs/gemma-2-9b-it.yaml @@ -0,0 +1,11 @@ +model_name: "LLM-Research/gemma-2-9b-it" +hardware: "Atlas A2 Series" +tasks: +- name: "gsm8k" + metrics: + - name: "exact_match,strict-match" + value: 0.46 + - name: "exact_match,flexible-extract" + value: 0.79 +num_fewshot: 5 +gpu_memory_utilization: 0.8 diff --git a/tests/e2e/models/configs/gemma-3-4b-it.yaml b/tests/e2e/models/configs/gemma-3-4b-it.yaml new file mode 100644 index 00000000..42366800 --- /dev/null +++ b/tests/e2e/models/configs/gemma-3-4b-it.yaml @@ -0,0 +1,13 @@ +model_name: "LLM-Research/gemma-3-4b-it" +hardware: "Atlas A2 Series" +tasks: +- name: "gsm8k" + metrics: + - name: "exact_match,strict-match" + value: 0.59 + - name: "exact_match,flexible-extract" + value: 0.59 +num_fewshot: 5 +apply_chat_template: False +fewshot_as_multiturn: False +gpu_memory_utilization: 0.7 diff --git a/tests/e2e/models/configs/internlm-7b.yaml b/tests/e2e/models/configs/internlm-7b.yaml new file mode 100644 index 00000000..ceccc53d --- /dev/null +++ b/tests/e2e/models/configs/internlm-7b.yaml @@ -0,0 +1,13 @@ +model_name: "Shanghai_AI_Laboratory/internlm-7b" +hardware: "Atlas A2 Series" +tasks: +- name: "ceval-valid" + metrics: + - name: "acc,none" + value: 0.42 +num_fewshot: 5 +max_model_len: 2048 +trust_remote_code: True +dtype: "bfloat16" +apply_chat_template: False +fewshot_as_multiturn: False diff --git a/tests/e2e/models/configs/llava-1.5-7b-hf.yaml b/tests/e2e/models/configs/llava-1.5-7b-hf.yaml new file mode 100644 index 00000000..7bd69de9 --- /dev/null +++ b/tests/e2e/models/configs/llava-1.5-7b-hf.yaml @@ -0,0 +1,11 @@ +model_name: "llava-hf/llava-1.5-7b-hf" +hardware: "Atlas A2 Series" +model: "vllm-vlm" +tasks: +- name: "ceval-valid" + metrics: + - name: "acc,none" + value: 0.30 +trust_remote_code: True +gpu_memory_utilization: 0.8 +dtype: "bfloat16" diff --git a/tests/e2e/models/test_lm_eval_correctness.py b/tests/e2e/models/test_lm_eval_correctness.py index a0862b80..3d0ce6be 100644 --- a/tests/e2e/models/test_lm_eval_correctness.py +++ b/tests/e2e/models/test_lm_eval_correctness.py @@ -39,10 +39,11 @@ def env_config() -> EnvConfig: def build_model_args(eval_config, tp_size): trust_remote_code = eval_config.get("trust_remote_code", False) max_model_len = eval_config.get("max_model_len", 4096) + dtype = eval_config.get("dtype", "auto") model_args = { "pretrained": eval_config["model_name"], "tensor_parallel_size": tp_size, - "dtype": "auto", + "dtype": dtype, "trust_remote_code": trust_remote_code, "max_model_len": max_model_len, }