diff --git a/.github/workflows/accuracy_test.yaml b/.github/workflows/accuracy_test.yaml index 6086bbb..2eb2661 100644 --- a/.github/workflows/accuracy_test.yaml +++ b/.github/workflows/accuracy_test.yaml @@ -90,7 +90,6 @@ jobs: }} strategy: matrix: - vllm_use_version: [1] # the accuracy test will run: # 1. workflow_dispatch with models input # - all: Qwen/Qwen3-30B-A3B, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base @@ -122,7 +121,7 @@ jobs: ) }} fail-fast: false - name: ${{ matrix.model_name }} accuracy V${{ matrix.vllm_use_version }} + name: ${{ matrix.model_name }} accuracy container: image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10 env: @@ -236,15 +235,14 @@ jobs: echo "vLLM: ${{ env.GHA_VLLM_VERSION }}" echo "vLLM Ascend: ${{ env.GHA_VLLM_ASCEND_VERSION }}" - - name: Run Accuracy Test for V${{ matrix.vllm_use_version }} + - name: Run Accuracy Test id: report working-directory: ./benchmarks env: PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 - VLLM_USE_V1: ${{ matrix.vllm_use_version }} run: | model_base_name=$(basename ${{ matrix.model_name }}) - markdown_name="${model_base_name}-V${{ matrix.vllm_use_version }}" + markdown_name="${model_base_name}" echo "markdown_name=$markdown_name" echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT mkdir -p ./accuracy @@ -259,7 +257,6 @@ jobs: --vllm_version "${{ env.GHA_VLLM_VERSION }}" \ --vllm_commit "${{ env.VLLM_COMMIT }}" \ --vllm_ascend_commit "${{ env.VLLM_ASCEND_COMMIT }}" \ - --vllm_use_v1 "$VLLM_USE_V1" - name: Generate step summary if: ${{ always() }} @@ -282,7 +279,7 @@ jobs: echo "contains_fail=false" >> $GITHUB_OUTPUT fi - - name: Upload Report for V${{ matrix.vllm_use_version }} + - name: Upload Report if: ${{ github.event_name == 'workflow_dispatch' && steps.check_report.outputs.contains_fail == 'false' }} uses: actions/upload-artifact@v4 with: diff --git a/benchmarks/scripts/run_accuracy.py b/benchmarks/scripts/run_accuracy.py index e7cd5c6..35b59bf 100644 --- a/benchmarks/scripts/run_accuracy.py +++ b/benchmarks/scripts/run_accuracy.py @@ -168,7 +168,6 @@ def generate_md(model_name, tasks_list, args, datasets): **Software Environment**: CANN: {args.cann_version}, PyTorch: {args.torch_version}, torch-npu: {args.torch_npu_version} **Hardware Environment**: Atlas A2 Series **Datasets**: {datasets} -**vLLM Engine**: V{args.vllm_use_v1} **Parallel Mode**: {PARALLEL_MODE[model_name]} **Execution Mode**: {EXECUTION_MODE[model_name]} **Command**: @@ -310,6 +309,5 @@ if __name__ == "__main__": parser.add_argument("--cann_version", type=str, required=False) parser.add_argument("--vllm_commit", type=str, required=False) parser.add_argument("--vllm_ascend_commit", type=str, required=False) - parser.add_argument("--vllm_use_v1", type=str, required=False) args = parser.parse_args() main(args)