vllm-ascend vnpu v1

2025-12-26 07:37:35 +00:00
parent 2f1aed98cc
commit 135cc0a505
168 changed files with 28337 additions and 9 deletions
--- a/.github.backup/workflows/_accuracy_test.yaml
+++ b/.github.backup/workflows/_accuracy_test.yaml
@@ -0,0 +1,175 @@
+name: 'accuracy test'
+
+on:
+  workflow_call:
+    inputs:
+      vllm:
+        required: true
+        type: string
+      vllm-ascend:
+        required: false
+        type: string
+        default: main
+      runner:
+        required: true
+        type: string
+      image:
+        required: true
+        type: string
+      model_name:
+        required: true
+        type: string
+      upload:
+        required: false
+        type: boolean
+        default: false
+
+jobs:
+  accuracy_tests:
+
+    runs-on: ${{ inputs.runner }}
+    name: ${{ inputs.model_name }} accuracy
+    container:
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
+      env:
+        VLLM_USE_MODELSCOPE: True
+        # 1. If version specified (work_dispatch), do specified branch accuracy test
+        # 2. If no version (labeled PR), do accuracy test by default ref:
+        # The branch, tag or SHA to checkout. When checking out the repository that
+        # triggered a workflow, this defaults to the reference or SHA for that event.
+        # Otherwise, uses the default branch.
+        GHA_VLLM_ASCEND_VERSION: ${{ inputs.vllm-ascend }}
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set model name as output
+        id: set_output
+        run: |
+          echo "model_name=${{ inputs.model_name }}" >> $GITHUB_OUTPUT
+
+      - name: Config mirrors
+        run: |
+          sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
+          pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
+          pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
+          apt-get update -y
+          apt install git -y
+
+      - name: Install system dependencies
+        run: |
+          apt-get -y install `cat packages.txt`
+          apt-get -y install gcc g++ cmake libnuma-dev
+
+      - name: Checkout vllm-project/vllm repo
+        uses: actions/checkout@v4
+        with:
+          repository: vllm-project/vllm
+          ref: ${{ inputs.vllm }}
+          path: ./vllm-empty
+
+      - name: Install vllm-project/vllm from source
+        working-directory: ./vllm-empty
+        run: |
+          VLLM_TARGET_DEVICE=empty pip install -e .
+
+      - name: Resolve vllm-ascend version
+        run: |
+          VERSION_INPUT="${{ inputs.vllm-ascend }}"
+          
+          if [[ "$VERSION_INPUT" == "latest" ]]; then
+            TAGS=$(git ls-remote --tags --sort=-v:refname https://github.com/vllm-project/vllm-ascend "v*" | cut -f2 | sed 's|refs/tags/||')
+            LATEST_TAG=$(echo "$TAGS" | head -n1)
+            if [[ -z "$LATEST_TAG" ]]; then
+              RESOLVED_VERSION="main"
+            else
+              RESOLVED_VERSION="$LATEST_TAG"
+            fi
+          else
+            RESOLVED_VERSION="$VERSION_INPUT"
+          fi
+          echo "GHA_VLLM_ASCEND_VERSION=$RESOLVED_VERSION" >> $GITHUB_ENV
+
+      - name: Checkout vllm-project/vllm-ascend repo
+        uses: actions/checkout@v4
+        with:
+          repository: vllm-project/vllm-ascend
+          path: ./vllm-ascend
+          ref: ${{ env.GHA_VLLM_ASCEND_VERSION }}
+
+      - name: Install vllm-project/vllm-ascend
+        working-directory: ./vllm-ascend
+        env:
+          PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
+        run: |
+          pip install -r requirements-dev.txt
+          pip install -v -e .
+
+      - name: Get vLLM commit hash and URL
+        working-directory: ./vllm-empty
+        run: |
+          VLLM_COMMIT=$(git rev-parse --short=7 HEAD)
+          echo "VLLM_COMMIT=$VLLM_COMMIT" >> $GITHUB_ENV
+
+      - name: Get vLLM-Ascend commit hash and URL
+        working-directory: ./vllm-ascend
+        run: |
+          VLLM_ASCEND_COMMIT=$(git rev-parse --short=7 HEAD)
+          echo "VLLM_ASCEND_COMMIT=$VLLM_ASCEND_COMMIT" >> $GITHUB_ENV
+
+      - name: Collect version info
+        run: |
+          for dir in /usr/local/Ascend/ascend-toolkit/*; do
+            dname=$(basename "$dir")
+            if [ "$dname" != "latest" ]; then
+              TOOLKIT_DIR="$dname"
+              break
+            fi
+          done
+          INFO_FILE="/usr/local/Ascend/ascend-toolkit/${TOOLKIT_DIR}/$(uname -i)-linux/ascend_toolkit_install.info"
+          GHA_CANN_VERSION=$(grep "version=" "$INFO_FILE" \
+                           | head -n1 \
+                           | cut -d'=' -f2 \
+                           | tr -d '"')
+          {
+            echo "GHA_CANN_VERSION=$GHA_CANN_VERSION"
+            pip show torch | grep "Version:" | awk '{print "GHA_TORCH_VERSION="$2}'
+            pip show torch_npu | grep "Version:" | awk '{print "GHA_TORCH_NPU_VERSION="$2}'
+            pip show vllm | grep "Version:" | awk '{print "GHA_VLLM_VERSION="$2}' | sed 's/+.*//'
+          } >> "$GITHUB_ENV"
+
+      - name: Run accuracy test
+        id: report
+        env:
+          VLLM_WORKER_MULTIPROC_METHOD: spawn
+          VLLM_USE_MODELSCOPE: True
+          VLLM_VERSION: ${{ env.GHA_VLLM_VERSION }}
+          VLLM_COMMIT: ${{ env.VLLM_COMMIT }}
+          VLLM_ASCEND_VERSION: ${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}
+          VLLM_ASCEND_COMMIT: ${{ env.VLLM_ASCEND_COMMIT }}
+          CANN_VERSION: ${{ env.GHA_CANN_VERSION }}
+          TORCH_VERSION: ${{ env.GHA_TORCH_VERSION }}
+          TORCH_NPU_VERSION: ${{ env.GHA_TORCH_NPU_VERSION }}
+        run: |
+          model_base_name=$(basename ${{ inputs.model_name }})
+          markdown_name="${model_base_name}"
+          echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
+          mkdir -p ./benchmarks/accuracy
+          pytest -sv ./tests/e2e/models/test_lm_eval_correctness.py \
+          --config ./tests/e2e/models/configs/${{ inputs.model_name }}.yaml
+
+      - name: Generate step summary
+        if: ${{ always() }}
+        run: |
+          cat ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md >> $GITHUB_STEP_SUMMARY
+
+      - name: Upload Report
+        if: ${{ inputs.upload == true }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: "report-${{ env.GHA_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}"
+          path: ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md
+          if-no-files-found: warn
+          retention-days: 90
+          overwrite: true
--- a/.github.backup/workflows/_e2e_test.yaml
+++ b/.github.backup/workflows/_e2e_test.yaml
@@ -0,0 +1,199 @@
+name: 'e2e test'
+
+on:
+  workflow_call:
+    inputs:
+      vllm:
+        required: true
+        type: string
+      runner:
+        required: true
+        type: string
+      image:
+        required: true
+        type: string
+      type:
+        required: true
+        type: string
+
+jobs:
+  e2e:
+    name: singlecard
+    runs-on: ${{ inputs.runner }}-1
+    container:
+      image: ${{ inputs.image }}
+      env:
+        VLLM_LOGGING_LEVEL: ERROR
+        VLLM_USE_MODELSCOPE: True
+    steps:
+      - name: Check npu and CANN info
+        run: |
+          npu-smi info
+          cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+
+      - name: Config mirrors
+        run: |
+          sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
+          pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
+          pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
+          apt-get update -y
+          apt install git -y
+
+      - name: Checkout vllm-project/vllm-ascend repo
+        uses: actions/checkout@v4
+
+      - name: Install system dependencies
+        run: |
+          apt-get -y install `cat packages.txt`
+          apt-get -y install gcc g++ cmake libnuma-dev
+
+      - name: Checkout vllm-project/vllm repo
+        uses: actions/checkout@v4
+        with:
+          repository: vllm-project/vllm
+          ref: ${{ inputs.vllm }}
+          path: ./vllm-empty
+          fetch-depth: 1
+
+      - name: Install vllm-project/vllm from source
+        working-directory: ./vllm-empty
+        run: |
+          VLLM_TARGET_DEVICE=empty pip install -e .
+
+      - name: Install vllm-project/vllm-ascend
+        env:
+          PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
+        run: |
+          pip install -r requirements-dev.txt
+          pip install -v -e .
+
+      - name: Run vllm-project/vllm-ascend test
+        env:
+          VLLM_WORKER_MULTIPROC_METHOD: spawn
+          VLLM_USE_MODELSCOPE: True
+          PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
+        if: ${{ inputs.type == 'light' }}
+        run: |
+          pytest -sv tests/e2e/singlecard/test_aclgraph.py
+          pytest -sv tests/e2e/singlecard/test_quantization.py
+          pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
+
+      - name: Run e2e test
+        env:
+          VLLM_WORKER_MULTIPROC_METHOD: spawn
+          VLLM_USE_MODELSCOPE: True
+          PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
+        if: ${{ inputs.type == 'full' }}
+        run: |
+          # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
+          # the test separately.
+
+          pytest -sv tests/e2e/singlecard/test_aclgraph.py
+          pytest -sv tests/e2e/singlecard/test_aclgraph_mem.py
+          pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py
+          pytest -sv tests/e2e/singlecard/test_bge_model.py
+          pytest -sv tests/e2e/singlecard/test_camem.py
+          pytest -sv tests/e2e/singlecard/test_chunked.py
+          pytest -sv tests/e2e/singlecard/test_embedding.py
+          pytest -sv tests/e2e/singlecard/test_embedding_aclgraph.py
+          pytest -sv tests/e2e/singlecard/test_guided_decoding.py
+          pytest -sv tests/e2e/singlecard/test_ilama_lora.py
+          pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
+          pytest -sv tests/e2e/singlecard/test_quantization.py
+          pytest -sv tests/e2e/singlecard/test_sampler.py
+          pytest -sv tests/e2e/singlecard/test_vlm.py
+
+          # ------------------------------------ v1 spec decode test ------------------------------------ #
+          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
+          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
+          # Fix me: test_eagle_correctness OOM error
+          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
+
+          pytest -sv tests/e2e/singlecard/ops/
+
+  e2e-2-cards:
+    name: multicard
+    runs-on: ${{ inputs.runner }}-2
+    container:
+      image: ${{ inputs.image }}
+      env:
+        VLLM_LOGGING_LEVEL: ERROR
+        VLLM_USE_MODELSCOPE: True
+    steps:
+      - name: Check npu and CANN info
+        run: |
+          npu-smi info
+          cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+
+      - name: Config mirrors
+        run: |
+          sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
+          pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
+          pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
+          apt-get update -y
+          apt install git -y
+
+      - name: Checkout vllm-project/vllm-ascend repo
+        uses: actions/checkout@v4
+
+      - name: Install system dependencies
+        run: |
+          apt-get -y install `cat packages.txt`
+          apt-get -y install gcc g++ cmake libnuma-dev
+
+      - name: Checkout vllm-project/vllm repo
+        uses: actions/checkout@v4
+        with:
+          repository: vllm-project/vllm
+          ref: ${{ inputs.vllm }}
+          path: ./vllm-empty
+          fetch-depth: 1
+
+      - name: Install vllm-project/vllm from source
+        working-directory: ./vllm-empty
+        run: |
+          VLLM_TARGET_DEVICE=empty pip install -e .
+
+      - name: Install vllm-project/vllm-ascend
+        env:
+          PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
+        run: |
+          pip install -r requirements-dev.txt
+          pip install -v -e .
+
+      - name: Run vllm-project/vllm-ascend test (light)
+        env:
+          VLLM_WORKER_MULTIPROC_METHOD: spawn
+          VLLM_USE_MODELSCOPE: True
+        if: ${{ inputs.type == 'light' }}
+        run: |
+          pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
+
+      - name: Run vllm-project/vllm-ascend test (full)
+        env:
+          VLLM_WORKER_MULTIPROC_METHOD: spawn
+          VLLM_USE_MODELSCOPE: True
+        if: ${{ inputs.type == 'full' }}
+        run: |
+          pytest -sv tests/e2e/multicard/test_data_parallel.py
+          pytest -sv tests/e2e/multicard/test_expert_parallel.py
+          pytest -sv tests/e2e/multicard/test_external_launcher.py
+          pytest -sv tests/e2e/multicard/test_single_request_aclgraph.py
+          pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
+          pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
+
+          # To avoid oom, we need to run the test in a single process.
+          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
+          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
+          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W8A8
+          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC_new_version
+          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC_old_version
+          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
+          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
+          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_flashcomm_v1
+          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight
+
+          pytest -sv tests/e2e/multicard/test_pipeline_parallel.py
+          pytest -sv tests/e2e/multicard/test_prefix_caching.py
+          pytest -sv tests/e2e/multicard/test_qwen3_moe.py
+          pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
--- a/.github.backup/workflows/accuracy_test.yaml
+++ b/.github.backup/workflows/accuracy_test.yaml
@@ -0,0 +1,72 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+#
+
+# This test will be triggered:
+# - PR labeled with: 'accuracy-test' & 'ready-for-test'
+name: ascend test / accuracy
+
+on:
+  pull_request:
+    branches:
+      - 'main'
+      - '*-dev'
+    types: [ labeled, synchronize ]
+
+# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
+# declared as "shell: bash -el {0}" on steps that need to be properly activated.
+# It's used to activate ascend-toolkit environment variables.
+defaults:
+  run:
+    shell: bash -el {0}
+
+# only cancel in-progress runs of the same workflow
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  run:
+    name: ""
+    strategy:
+      matrix:
+        # Only top series models should be listed in here
+        include:
+          - runner: a2-1
+            model_name: Qwen3-8B
+          - runner: a2-1
+            model_name: Qwen2.5-VL-7B-Instruct
+          - runner: a2-1
+            model_name: Qwen2-Audio-7B-Instruct
+          - runner: a2-2
+            model_name: Qwen3-30B-A3B
+          - runner: a2-2
+            model_name: Qwen3-VL-30B-A3B-Instruct
+          - runner: a2-2
+            model_name: DeepSeek-V2-Lite
+      fail-fast: false
+    # test will be triggered when tag 'accuracy-test' & 'ready-for-test'
+    if:  >-
+      ${{
+      contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
+      contains(github.event.pull_request.labels.*.name, 'ready-for-test')
+      }}
+    uses: ./.github/workflows/_accuracy_test.yaml
+    with:
+      vllm: v0.11.0
+      runner:  linux-aarch64-${{ matrix.runner }}
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
+      model_name: ${{ matrix.model_name }}
--- a/.github.backup/workflows/format_pr_body.yaml
+++ b/.github.backup/workflows/format_pr_body.yaml
@@ -0,0 +1,57 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+#
+
+name: format / pr body
+
+on:
+  # The PR updated when PR opened and push new commits
+  pull_request_target:
+    types: [opened, synchronize]
+    branches:
+      - 'main'
+
+permissions:
+  pull-requests: write
+
+jobs:
+  update-description:
+    name: update vLLM version
+    runs-on: ubuntu-latest
+
+    steps:
+
+      - name: Get vLLM version
+        run: |
+          VLLM_COMMIT=v0.11.0
+          echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
+
+      - name: Checkout repository
+        uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
+
+      - name: Set up Python
+        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
+
+      - name: Get vLLM release version
+        run: |
+          VLLM_VERSION=$(python3 docs/source/conf.py | jq .ci_vllm_version | tr -d '"')
+          echo "VLLM_VERSION=$VLLM_VERSION" >> $GITHUB_ENV
+
+      - name: Update PR description
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          bash .github/format_pr_body.sh "${{ github.event.number }}" "${{ env.VLLM_VERSION }}" "${{ env.VLLM_COMMIT }}"
--- a/.github.backup/workflows/image_310p_openeuler.yml
+++ b/.github.backup/workflows/image_310p_openeuler.yml
@@ -0,0 +1,135 @@
+name: 'image / openEuler / 310p'
+# This is a docker build check and publish job:
+# 1. PR Triggered docker image build check
+#   - is for image build check
+#   - Enable on main/*-dev branch
+#   - push: ${{ github.event_name != 'pull_request' }} ==> false
+# 2. branches push trigger image publish
+#   - is for branch/dev/nightly image
+#   - commits are merge into main/*-dev  ==> vllm-ascend:main-310p-openeuler / vllm-ascend:*-dev-310p-openeuler
+# 3. tags push trigger image publish
+#   - is for final release image
+#   - Publish when tag with v* (pep440 version)  ===>  vllm-ascend:v1.2.3-310p-openeuler / vllm-ascend:v1.2.3rc1-310p-openeuler
+on:
+  pull_request:
+    branches:
+      - 'main'
+      - '*-dev'
+    paths:
+      - '.github/workflows/image_310p_openeuler.yml'
+      - 'Dockerfile.310p.openEuler'
+      - 'vllm_ascend/**'
+      - 'setup.py'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+      - 'cmake/**'
+      - 'CMakeLists.txt'
+      - 'csrc/**'
+    types: [ labeled ]
+  push:
+    # Publish image when tagging, the Dockerfile in tag will be build as tag image
+    branches:
+      - 'main'
+      - '*-dev'
+    tags:
+      - 'v*'
+    paths:
+      - '.github/workflows/image_310p_openeuler.yml'
+      - 'Dockerfile.310p.openEuler'
+      - 'vllm_ascend/**'
+      - 'setup.py'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+      - 'cmake/**'
+      - 'CMakeLists.txt'
+      - 'csrc/**'
+
+# only cancel in-progress runs of the same workflow
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    name: vllm-ascend image build
+    # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
+    # Push event or PR with both 'ready' and 'ready-for-test' labels
+    runs-on: >-
+      ${{
+          github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
+          'ubuntu-latest' ||
+          'ubuntu-24.04-arm'
+      }}
+    if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+        persist-credentials: false
+
+    - name: Print
+      run: |
+        lscpu
+
+    - name: Docker meta
+      id: meta
+      uses: docker/metadata-action@v5
+      with:
+        # TODO(yikun): add more hub image and a note on release policy for container image
+        images: |
+          quay.io/ascend/vllm-ascend
+        # Note for test case
+        # https://github.com/marketplace/actions/docker-metadata-action#typeref
+        # 1. branch job pulish per main/*-dev branch commits
+        # 2. main and dev pull_request is build only, so the tag pr-N-310p-openeuler is fine
+        # 3. only pep440 matched tag will be published:
+        #    - v0.7.1 --> v0.7.1-310p-openeuler
+        #    - pre/post/dev: v0.7.1rc1-310p-openeuler/v0.7.1rc1-310p-openeuler/v0.7.1rc1.dev1-310p-openeuler/v0.7.1.post1-310p-openeuler, no latest
+        #      which follow the rule from vLLM with prefix v
+        # TODO(yikun): the post release might be considered as latest release
+        tags: |
+          type=ref,event=branch,suffix=-310p-openeuler
+          type=ref,event=pr,suffix=-310p-openeuler
+          type=pep440,pattern={{raw}},suffix=-310p-openeuler
+        flavor:
+          latest=false
+
+    - name: Free up disk space
+      uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
+      with:
+        tool-cache: true
+        docker-images: false
+
+    - name: Build - Set up QEMU
+      uses: docker/setup-qemu-action@v3
+
+    - name: Build - Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+
+    - name: Publish - Login to Quay Container Registry
+      if: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
+      uses: docker/login-action@v3
+      with:
+        registry: quay.io
+        username: ${{ vars.QUAY_USERNAME }}
+        password: ${{ secrets.QUAY_PASSWORD }}
+
+    - name: Build and push 310p
+      uses: docker/build-push-action@v6
+      with:
+        platforms: >-
+          ${{
+              github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
+              'linux/amd64,linux/arm64' ||
+              'linux/arm64'
+          }}
+        # use the current repo path as the build context, ensure .git is contained
+        context: .
+        # only trigger when tag, branch/main push
+        push: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
+        labels: ${{ steps.meta.outputs.labels }}
+        tags: ${{ steps.meta.outputs.tags }}
+        file: Dockerfile.310p.openEuler
+        build-args: |
+          PIP_INDEX_URL=https://pypi.org/simple
+        provenance: false
--- a/.github.backup/workflows/image_310p_ubuntu.yml
+++ b/.github.backup/workflows/image_310p_ubuntu.yml
@@ -0,0 +1,131 @@
+name: 'image / Ubuntu / 310p'
+# This is a docker build check and publish job:
+# 1. PR Triggered docker image build check
+#   - is for image build check
+#   - Enable on main/*-dev branch
+#   - push: ${{ github.event_name != 'pull_request' }} ==> false
+# 2. branches push trigger image publish
+#   - is for branch/dev/nightly image
+#   - commits are merge into main/*-dev  ==> vllm-ascend:main-310p / vllm-ascend:*-dev-310p
+# 3. tags push trigger image publish
+#   - is for final release image
+#   - Publish when tag with v* (pep440 version)  ===>  vllm-ascend:v1.2.3-310p / vllm-ascend:v1.2.3rc1-310p
+on:
+  pull_request:
+    branches:
+      - 'main'
+      - '*-dev'
+    paths:
+      - '.github/workflows/image_310p_ubuntu.yml'
+      - 'Dockerfile.310p'
+      - 'vllm_ascend/**'
+      - 'setup.py'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+      - 'cmake/**'
+      - 'CMakeLists.txt'
+      - 'csrc/**'
+    types: [ labeled ]
+  push:
+    # Publish image when tagging, the Dockerfile in tag will be build as tag image
+    branches:
+      - 'main'
+      - '*-dev'
+    tags:
+      - 'v*'
+    paths:
+      - '.github/workflows/image_310p_ubuntu.yml'
+      - 'Dockerfile.310p'
+      - 'vllm_ascend/**'
+      - 'setup.py'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+      - 'cmake/**'
+      - 'CMakeLists.txt'
+      - 'csrc/**'
+
+# only cancel in-progress runs of the same workflow
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+
+  build:
+    name: vllm-ascend image build
+    # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
+    # Push event or PR with both 'ready' and 'ready-for-test' labels
+    runs-on: ubuntu-latest
+    if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+        persist-credentials: false
+
+    - name: Print
+      run: |
+        lscpu
+
+    - name: Docker meta
+      id: meta
+      uses: docker/metadata-action@v5
+      with:
+        # TODO(yikun): add more hub image and a note on release policy for container image
+        images: |
+          quay.io/ascend/vllm-ascend
+        # Note for test case
+        # https://github.com/marketplace/actions/docker-metadata-action#typeref
+        # 1. branch job pulish per main/*-dev branch commits
+        # 2. main and dev pull_request is build only, so the tag pr-N is fine
+        # 3. only pep440 matched tag will be published:
+        #    - v0.7.1 --> v0.7.1-310p
+        #    - pre/post/dev: v0.7.1rc1-310p/v0.7.1rc1-310p/v0.7.1rc1.dev1-310p/v0.7.1.post1-310p, no latest
+        #      which follow the rule from vLLM with prefix v
+        # TODO(yikun): the post release might be considered as latest release
+        tags: |
+          type=ref,event=branch,suffix=-310p
+          type=ref,event=pr,suffix=-310p
+          type=pep440,pattern={{raw}},suffix=-310p
+        flavor:
+          latest=false
+
+    - name: Free up disk space
+      uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
+      with:
+        tool-cache: true
+        docker-images: false
+
+    - name: Build - Set up QEMU
+      uses: docker/setup-qemu-action@v3
+
+    - name: Build - Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+
+    - name: Publish - Login to Quay Container Registry
+      if: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
+      uses: docker/login-action@v3
+      with:
+        registry: quay.io
+        username: ${{ vars.QUAY_USERNAME }}
+        password: ${{ secrets.QUAY_PASSWORD }}
+
+    - name: Build and push 310p
+      uses: docker/build-push-action@v6
+      with:
+        platforms: >-
+          ${{
+              github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
+              'linux/amd64,linux/arm64' ||
+              'linux/amd64'
+          }}
+        # use the current repo path as the build context, ensure .git is contained
+        context: .
+        file: Dockerfile.310p
+        # only trigger when tag, branch/main push
+        push: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
+        labels: ${{ steps.meta.outputs.labels }}
+        tags: ${{ steps.meta.outputs.tags }}
+        build-args: |
+          PIP_INDEX_URL=https://pypi.org/simple
+        provenance: false
--- a/.github.backup/workflows/image_a3_openeuler.yml
+++ b/.github.backup/workflows/image_a3_openeuler.yml
@@ -0,0 +1,135 @@
+name: 'image / openEuler / a3'
+# This is a docker build check and publish job:
+# 1. PR Triggered docker image build check
+#   - is for image build check
+#   - Enable on main/*-dev branch
+#   - push: ${{ github.event_name != 'pull_request' }} ==> false
+# 2. branches push trigger image publish
+#   - is for branch/dev/nightly image
+#   - commits are merge into main/*-dev  ==> vllm-ascend:main / vllm-ascend:*-dev
+# 3. tags push trigger image publish
+#   - is for final release image
+#   - Publish when tag with v* (pep440 version)  ===>  vllm-ascend:v1.2.3-a3-openeuler / vllm-ascend:v1.2.3rc1-a3-openeuler
+on:
+  pull_request:
+    branches:
+      - 'main'
+      - '*-dev'
+    paths:
+      - '.github/workflows/image_a3_openeuler.yml'
+      - 'Dockerfile.a3.openEuler'
+      - 'vllm_ascend/**'
+      - 'setup.py'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+      - 'cmake/**'
+      - 'CMakeLists.txt'
+      - 'csrc/**'
+    types: [ labeled ]
+  push:
+    # Publish image when tagging, the Dockerfile in tag will be build as tag image
+    branches:
+      - 'main'
+      - '*-dev'
+    tags:
+      - 'v*'
+    paths:
+      - '.github/workflows/image_a3_openeuler.yml'
+      - 'Dockerfile.a3.openEuler'
+      - 'vllm_ascend/**'
+      - 'setup.py'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+      - 'cmake/**'
+      - 'CMakeLists.txt'
+      - 'csrc/**'
+
+# only cancel in-progress runs of the same workflow
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    name: vllm-ascend image build
+    # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
+    # Push event or PR with both 'ready' and 'ready-for-test' labels
+    runs-on: >-
+      ${{
+          github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
+          'ubuntu-latest' ||
+          'ubuntu-24.04-arm'
+      }}
+    if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+        persist-credentials: false
+
+    - name: Print
+      run: |
+        lscpu
+    - name: Docker meta
+      id: meta
+      uses: docker/metadata-action@v5
+      with:
+        # TODO(yikun): add more hub image and a note on release policy for container image
+        images: |
+          quay.io/ascend/vllm-ascend
+        # Note for test case
+        # https://github.com/marketplace/actions/docker-metadata-action#typeref
+        # 1. branch job pulish per main/*-dev branch commits
+        # 2. main and dev pull_request is build only, so the tag pr-N-a3-openeuler is fine
+        # 3. only pep440 matched tag will be published:
+        #    - v0.7.1 --> v0.7.1-a3-openeuler
+        #    - pre/post/dev: v0.7.1rc1-a3-openeuler/v0.7.1rc1-a3-openeuler/v0.7.1rc1.dev1-a3-openeuler/v0.7.1.post1-a3-openeuler, no latest
+        #      which follow the rule from vLLM with prefix v
+        # TODO(yikun): the post release might be considered as latest release
+        tags: |
+          type=ref,event=branch,suffix=-a3-openeuler
+          type=ref,event=pr,suffix=-a3-openeuler
+          type=pep440,pattern={{raw}},suffix=-a3-openeuler
+        flavor:
+          latest=false
+
+    - name: Free up disk space
+      uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
+      with:
+        tool-cache: true
+        docker-images: false
+
+    - name: Build - Set up QEMU
+      uses: docker/setup-qemu-action@v3
+
+    - name: Build - Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+
+    - name: Publish - Login to Quay Container Registry
+      if: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
+      uses: docker/login-action@v3
+      with:
+        registry: quay.io
+        username: ${{ vars.QUAY_USERNAME }}
+        password: ${{ secrets.QUAY_PASSWORD }}
+
+    - name: Build and push a3
+      uses: docker/build-push-action@v6
+      with:
+        platforms: >-
+          ${{
+              github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
+              'linux/amd64,linux/arm64' ||
+              'linux/arm64'
+          }}
+        # use the current repo path as the build context, ensure .git is contained
+        context: .
+        # only trigger when tag, branch/main push
+        push: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
+        labels: ${{ steps.meta.outputs.labels }}
+        tags: ${{ steps.meta.outputs.tags }}
+        file: Dockerfile.a3.openEuler
+        build-args: |
+          PIP_INDEX_URL=https://pypi.org/simple
+        provenance: false
+
--- a/.github.backup/workflows/image_a3_ubuntu.yml
+++ b/.github.backup/workflows/image_a3_ubuntu.yml
@@ -0,0 +1,131 @@
+name: 'image / Ubuntu / a3'
+# This is a docker build check and publish job:
+# 1. PR Triggered docker image build check
+#   - is for image build check
+#   - Enable on main/*-dev branch
+#   - push: ${{ github.event_name != 'pull_request' }} ==> false
+# 2. branches push trigger image publish
+#   - is for branch/dev/nightly image
+#   - commits are merge into main/*-dev  ==> vllm-ascend:main / vllm-ascend:*-dev
+# 3. tags push trigger image publish
+#   - is for final release image
+#   - Publish when tag with v* (pep440 version)  ===>  vllm-ascend:v1.2.3-a3|vllm-ascend:v1.2.3rc1-a3
+on:
+  pull_request:
+    branches:
+      - 'main'
+      - '*-dev'
+    paths:
+      - '.github/workflows/image_a3_ubuntu.yml'
+      - 'Dockerfile.a3'
+      - 'vllm_ascend/**'
+      - 'setup.py'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+      - 'cmake/**'
+      - 'CMakeLists.txt'
+      - 'csrc/**'
+    types: [ labeled ]
+  push:
+    # Publish image when tagging, the Dockerfile in tag will be build as tag image
+    branches:
+      - 'main'
+      - '*-dev'
+    tags:
+      - 'v*'
+    paths:
+      - '.github/workflows/image_a3_ubuntu.yml'
+      - 'Dockerfile.a3'
+      - 'vllm_ascend/**'
+      - 'setup.py'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+      - 'cmake/**'
+      - 'CMakeLists.txt'
+      - 'csrc/**'
+
+# only cancel in-progress runs of the same workflow
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+
+  build:
+    name: vllm-ascend image build
+    # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
+    # Push event or PR with both 'ready' and 'ready-for-test' labels
+    runs-on: ubuntu-latest
+    if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+        persist-credentials: false
+
+    - name: Print
+      run: |
+        lscpu
+    - name: Docker meta
+      id: meta
+      uses: docker/metadata-action@v5
+      with:
+        # TODO(yikun): add more hub image and a note on release policy for container image
+        images: |
+          quay.io/ascend/vllm-ascend
+        # Note for test case
+        # https://github.com/marketplace/actions/docker-metadata-action#typeref
+        # 1. branch job pulish per main/*-dev branch commits
+        # 2. main and dev pull_request is build only, so the tag pr-N-a3 is fine
+        # 3. only pep440 matched tag will be published:
+        #    - v0.7.1 --> v0.7.1-a3
+        #    - pre/post/dev: v0.7.1rc1-a3/v0.7.1rc1-a3/v0.7.1rc1.dev1-a3/v0.7.1.post1-a3, no latest
+        #      which follow the rule from vLLM with prefix v
+        # TODO(yikun): the post release might be considered as latest release
+        tags: |
+          type=ref,event=branch,suffix=-a3
+          type=ref,event=pr,suffix=-a3
+          type=pep440,pattern={{raw}},suffix=-a3
+        flavor:
+          latest=false
+
+    - name: Free up disk space
+      uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
+      with:
+        tool-cache: true
+        docker-images: false
+
+    - name: Build - Set up QEMU
+      uses: docker/setup-qemu-action@v3
+
+    - name: Build - Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+
+    - name: Publish - Login to Quay Container Registry
+      if: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
+      uses: docker/login-action@v3
+      with:
+        registry: quay.io
+        username: ${{ vars.QUAY_USERNAME }}
+        password: ${{ secrets.QUAY_PASSWORD }}
+
+    - name: Build and push a3
+      uses: docker/build-push-action@v6
+      with:
+        platforms: >-
+          ${{
+              github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
+              'linux/amd64,linux/arm64' ||
+              'linux/amd64'
+          }}
+        # use the current repo path as the build context, ensure .git is contained
+        context: .
+        file: Dockerfile.a3
+        # only trigger when tag, branch/main push
+        push: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
+        labels: ${{ steps.meta.outputs.labels }}
+        tags: ${{ steps.meta.outputs.tags }}
+        build-args: |
+          PIP_INDEX_URL=https://pypi.org/simple
+        provenance: false
+
--- a/.github.backup/workflows/image_openeuler.yml
+++ b/.github.backup/workflows/image_openeuler.yml
@@ -0,0 +1,134 @@
+name: 'image / openEuler'
+# This is a docker build check and publish job:
+# 1. PR Triggered docker image build check
+#   - is for image build check
+#   - Enable on main/*-dev branch
+#   - push: ${{ github.event_name != 'pull_request' }} ==> false
+# 2. branches push trigger image publish
+#   - is for branch/dev/nightly image
+#   - commits are merge into main/*-dev  ==> vllm-ascend:main-openeuler / vllm-ascend:*-dev-openeuler
+#   - is for final release image
+#   - Publish when tag with v* (pep440 version)  ===>  vllm-ascend:v1.2.3-openeuler / vllm-ascend:v1.2.3rc1-openeuler
+on:
+  pull_request:
+    branches:
+      - 'main'
+      - '*-dev'
+    paths:
+      - '.github/workflows/image_openeuler.yml'
+      - 'Dockerfile.openEuler'
+      - 'vllm_ascend/**'
+      - 'setup.py'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+      - 'cmake/**'
+      - 'CMakeLists.txt'
+      - 'csrc/**'
+    types: [ labeled ]
+  push:
+    # Publish image when tagging, the Dockerfile in tag will be build as tag image
+    branches:
+      - 'main'
+      - '*-dev'
+    tags:
+      - 'v*'
+    paths:
+      - '.github/workflows/image_openeuler.yml'
+      - 'Dockerfile.openEuler'
+      - 'vllm_ascend/**'
+      - 'setup.py'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+      - 'cmake/**'
+      - 'CMakeLists.txt'
+      - 'csrc/**'
+
+# only cancel in-progress runs of the same workflow
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    name: vllm-ascend image build
+    # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
+    # Push event or PR with both 'ready' and 'ready-for-test' labels
+    runs-on: >-
+      ${{
+          github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
+          'ubuntu-latest' ||
+          'ubuntu-24.04-arm'
+      }}
+    if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+        persist-credentials: false
+
+    - name: Print
+      run: |
+        lscpu
+
+    - name: Docker meta
+      id: meta
+      uses: docker/metadata-action@v5
+      with:
+        # TODO(yikun): add more hub image and a note on release policy for container image
+        images: |
+          quay.io/ascend/vllm-ascend
+        # Note for test case
+        # https://github.com/marketplace/actions/docker-metadata-action#typeref
+        # 1. branch job pulish per main/*-dev branch commits
+        # 2. main and dev pull_request is build only, so the tag pr-N-openeuler is fine
+        # 3. only pep440 matched tag will be published:
+        #    - v0.7.1 --> v0.7.1-openeuler
+        #    - pre/post/dev: v0.7.1rc1-openeuler/v0.7.1rc1-openeuler/v0.7.1rc1.dev1-openeuler/v0.7.1.post1-openeuler, no latest
+        #      which follow the rule from vLLM with prefix v
+        # TODO(yikun): the post release might be considered as latest release
+        tags: |
+          type=ref,event=branch,suffix=-openeuler
+          type=ref,event=pr,suffix=-openeuler
+          type=pep440,pattern={{raw}},suffix=-openeuler
+        flavor:
+          latest=true
+
+    - name: Free up disk space
+      uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
+      with:
+        tool-cache: true
+        docker-images: false
+
+    - name: Build - Set up QEMU
+      uses: docker/setup-qemu-action@v3
+
+    - name: Build - Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+
+    - name: Publish - Login to Quay Container Registry
+      if: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
+      uses: docker/login-action@v3
+      with:
+        registry: quay.io
+        username: ${{ vars.QUAY_USERNAME }}
+        password: ${{ secrets.QUAY_PASSWORD }}
+
+    - name: Build and push 910b
+      uses: docker/build-push-action@v6
+      with:
+        platforms: >-
+          ${{
+              github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
+              'linux/amd64,linux/arm64' ||
+              'linux/arm64'
+          }}
+        # use the current repo path as the build context, ensure .git is contained
+        context: .
+        # only trigger when tag, branch/main push
+        push: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
+        labels: ${{ steps.meta.outputs.labels }}
+        tags: ${{ steps.meta.outputs.tags }}
+        file: Dockerfile.openEuler
+        build-args: |
+          PIP_INDEX_URL=https://pypi.org/simple
+        provenance: false
--- a/.github.backup/workflows/image_ubuntu.yml
+++ b/.github.backup/workflows/image_ubuntu.yml
@@ -0,0 +1,131 @@
+name: 'image / Ubuntu'
+# This is a docker build check and publish job:
+# 1. PR Triggered docker image build check
+#   - is for image build check
+#   - Enable on main/*-dev branch
+#   - push: ${{ github.event_name != 'pull_request' }} ==> false
+# 2. branches push trigger image publish
+#   - is for branch/dev/nightly image
+#   - commits are merge into main/*-dev  ==> vllm-ascend:main / vllm-ascend:*-dev
+# 3. tags push trigger image publish
+#   - is for final release image
+#   - Publish when tag with v* (pep440 version)  ===>  vllm-ascend:v1.2.3 / vllm-ascend:v1.2.3rc1
+on:
+  pull_request:
+    branches:
+      - 'main'
+      - '*-dev'
+    paths:
+      - '.github/workflows/image_ubuntu.yml'
+      - 'Dockerfile'
+      - 'vllm_ascend/**'
+      - 'setup.py'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+      - 'cmake/**'
+      - 'CMakeLists.txt'
+      - 'csrc/**'
+    types: [ labeled ]
+  push:
+    # Publish image when tagging, the Dockerfile in tag will be build as tag image
+    branches:
+      - 'main'
+      - '*-dev'
+    tags:
+      - 'v*'
+    paths:
+      - '.github/workflows/image_ubuntu.yml'
+      - 'Dockerfile'
+      - 'vllm_ascend/**'
+      - 'setup.py'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+      - 'cmake/**'
+      - 'CMakeLists.txt'
+      - 'csrc/**'
+
+# only cancel in-progress runs of the same workflow
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+
+  build:
+    name: vllm-ascend image build
+    # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
+    # Push event or PR with both 'ready' and 'ready-for-test' labels
+    runs-on: ubuntu-latest
+    if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+        persist-credentials: false
+
+    - name: Print
+      run: |
+        lscpu
+
+    - name: Docker meta
+      id: meta
+      uses: docker/metadata-action@v5
+      with:
+        # TODO(yikun): add more hub image and a note on release policy for container image
+        images: |
+          quay.io/ascend/vllm-ascend
+        # Note for test case
+        # https://github.com/marketplace/actions/docker-metadata-action#typeref
+        # 1. branch job pulish per main/*-dev branch commits
+        # 2. main and dev pull_request is build only, so the tag pr-N is fine
+        # 3. only pep440 matched tag will be published:
+        #    - v0.7.1 --> v0.7.1, latest
+        #    - pre/post/dev: v0.7.1rc1/v0.7.1rc1/v0.7.1rc1.dev1/v0.7.1.post1, no latest
+        #      which follow the rule from vLLM with prefix v
+        # TODO(yikun): the post release might be considered as latest release
+        tags: |
+            type=ref,event=branch
+            type=ref,event=pr
+            type=pep440,pattern={{raw}}
+        flavor:
+          latest=true
+
+    - name: Free up disk space
+      uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
+      with:
+        tool-cache: true
+        docker-images: false
+
+    - name: Build - Set up QEMU
+      uses: docker/setup-qemu-action@v3
+
+    - name: Build - Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+
+    - name: Publish - Login to Quay Container Registry
+      if: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
+      uses: docker/login-action@v3
+      with:
+        registry: quay.io
+        username: ${{ vars.QUAY_USERNAME }}
+        password: ${{ secrets.QUAY_PASSWORD }}
+
+    - name: Build and push 910b
+      uses: docker/build-push-action@v6
+      with:
+        platforms: >-
+          ${{
+              github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
+              'linux/amd64,linux/arm64' ||
+              'linux/amd64'
+          }}
+        # use the current repo path as the build context, ensure .git is contained
+        context: .
+        file: Dockerfile
+        # only trigger when tag, branch/main push
+        push: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
+        labels: ${{ steps.meta.outputs.labels }}
+        tags: ${{ steps.meta.outputs.tags }}
+        build-args: |
+          PIP_INDEX_URL=https://pypi.org/simple
+        provenance: false
--- a/.github.backup/workflows/label_merge_conflict.yml
+++ b/.github.backup/workflows/label_merge_conflict.yml
@@ -0,0 +1,20 @@
+name: "Merge Conflict Labeler"
+on:
+  # So that PRs touching the same files as the push are updated
+  push:
+  # So that the `dirtyLabel` is removed if conflicts are resolve
+  # We recommend `pull_request_target` so that github secrets are available.
+  # In `pull_request` we wouldn't be able to change labels of fork PRs
+  pull_request_target:
+    types: [synchronize]
+
+jobs:
+  main:
+    runs-on: ubuntu-latest
+    steps:
+      - name: check if prs are dirty
+        uses: eps1lon/actions-label-merge-conflict@v3
+        with:
+          dirtyLabel: "merge-conflicts"
+          repoToken: "${{ secrets.GITHUB_TOKEN }}"
+          commentOnDirty: "This pull request has conflicts, please resolve those before we can evaluate the pull request."
--- a/.github.backup/workflows/labeler.yml
+++ b/.github.backup/workflows/labeler.yml
@@ -0,0 +1,18 @@
+name: Pull Request Labeler
+
+on: pull_request_target
+
+jobs:
+  label:
+    name: Label
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: write
+    steps:
+      - name: Label the PR
+        uses: actions/labeler@v6
+        with:
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
+          configuration-path: .github/labeler.yml
+          sync-labels: true
--- a/.github.backup/workflows/matchers/actionlint.json
+++ b/.github.backup/workflows/matchers/actionlint.json
@@ -0,0 +1,17 @@
+{
+  "problemMatcher": [
+    {
+      "owner": "actionlint",
+      "pattern": [
+        {
+          "regexp": "^(?:\\x1b\\[\\d+m)?(.+?)(?:\\x1b\\[\\d+m)*:(?:\\x1b\\[\\d+m)*(\\d+)(?:\\x1b\\[\\d+m)*:(?:\\x1b\\[\\d+m)*(\\d+)(?:\\x1b\\[\\d+m)*: (?:\\x1b\\[\\d+m)*(.+?)(?:\\x1b\\[\\d+m)* \\[(.+?)\\]$",
+          "file": 1,
+          "line": 2,
+          "column": 3,
+          "message": 4,
+          "code": 5
+        }
+      ]
+    }
+  ]
+}
--- a/.github.backup/workflows/matchers/mypy.json
+++ b/.github.backup/workflows/matchers/mypy.json
@@ -0,0 +1,16 @@
+{
+  "problemMatcher": [
+    {
+      "owner": "mypy",
+      "pattern": [
+        {
+          "regexp": "^(.+):(\\d+):\\s(error|warning):\\s(.+)$",
+          "file": 1,
+          "line": 2,
+          "severity": 3,
+          "message": 4
+        }
+      ]
+    }
+  ]
+}
--- a/.github.backup/workflows/matchers/ruff.json
+++ b/.github.backup/workflows/matchers/ruff.json
@@ -0,0 +1,17 @@
+{
+    "problemMatcher": [
+      {
+        "owner": "ruff",
+        "pattern": [
+          {
+            "regexp": "^(.+?):(\\d+):(\\d+): (\\w+): (.+)$",
+            "file": 1,
+            "line": 2,
+            "column": 3,
+            "code": 4,
+            "message": 5
+          }
+        ]
+      }
+    ]
+  }
--- a/.github.backup/workflows/multi_node_test.yaml
+++ b/.github.backup/workflows/multi_node_test.yaml
@@ -0,0 +1,118 @@
+name: 'e2e test / multi-dp'
+
+on:
+    schedule:
+      - cron: "0 */4 * * *"
+    workflow_dispatch:
+
+# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
+# declared as "shell: bash -el {0}" on steps that need to be properly activated.
+# It's used to activate ascend-toolkit environment variables.
+defaults:
+  run:
+    shell: bash -el {0}
+
+# only cancel in-progress runs of the same workflow
+# and ignore the lint / 8 cards test type
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  e2e:
+    # This is a runner with no NPU for k8s controller
+    runs-on: linux-aarch64-a3-0
+    container:
+      image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
+      env:
+        KUBECONFIG: /tmp/kubeconfig
+        KUBECTL: /root/.cache/.kube/kubectl
+        NAMESPACE: vllm-project
+        LEADER_POD: vllm-0
+    steps:
+        - name: Install system denpendencies
+          run: |
+           # configure apt and pip source
+           sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
+           pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+
+           apt-get update -y && apt-get install -y git curl
+
+           TOKEN=`echo -n "x-access-token:${{ secrets.ADMIN_PTA }}" | base64`
+           git config --global http.https://gh-proxy.test.osinfra.cn/.extraheader "AUTHORIZATION: basic $TOKEN"
+
+        - name: Install kubectl
+          run: |
+            install -o root -g root -m 0755 $KUBECTL /usr/local/bin/kubectl
+
+            # get kubeconfig from secret
+            echo "${{ secrets.KUBECONFIG_B64 }}" | base64 -d > $KUBECONFIG
+
+        - name: Checkout code
+          uses: actions/checkout@v4
+
+        - name: Prepare scripts
+          run: |
+            # prepare for lws entrypoint scripts
+            install -D tests/e2e/multi_node/scripts/run.sh /root/.cache/tests/run.sh
+
+        - name: Launch cluster
+          run: |
+            kubectl apply -f tests/e2e/multi_node/scripts/lws.yaml
+          
+        - name: Waiting for pod ready
+          run: |
+            echo "waiting for Pod [$LEADER_POD] in namespace [$NAMESPACE] to Ready..."
+
+            while true; do
+              # get pod status
+              READY_STATUS=$(kubectl get pod "$LEADER_POD" -n "$NAMESPACE" -o jsonpath='{.status.containerStatuses[*].ready}')
+
+              if [[ "$READY_STATUS" == "true" ]]; then
+                echo "✅ Pod [$LEADER_POD] is Ready!"
+                break
+              else
+                echo "Pod [$LEADER_POD] not ready, waiting..."
+                sleep 3
+              fi
+            done
+
+        - name: Stream logs and monitor pod health
+          run: |
+            set -euo pipefail
+
+            echo "🚀 Start streaming logs for Pod [$LEADER_POD] ..."
+            kubectl logs -f "$LEADER_POD" -n "$NAMESPACE" &
+            LOG_PID=$!
+
+            echo "Start monitoring Pod [$LEADER_POD] status ..."
+            while true; do
+              STATUS=$(kubectl get pod "$LEADER_POD" -n "$NAMESPACE" -o jsonpath='{.status.phase}')
+              if [[ "$STATUS" != "Running" && "$STATUS" != "Succeeded" ]]; then
+                echo "❌ Pod [$LEADER_POD] exited abnormally with status: $STATUS"
+                kubectl describe pod "$LEADER_POD" -n "$NAMESPACE" || true
+                kubectl logs "$LEADER_POD" -n "$NAMESPACE" --previous --all-containers || true
+                kill $LOG_PID || true
+                exit 1
+              fi
+              sleep 5
+            done &
+
+            MONITOR_PID=$!
+            wait $LOG_PID || true
+            kill $MONITOR_PID || true
+
+        - name: Generate summary
+          if: always()
+          run: |
+            if [ -f "/root/.cache/test_summary.md" ]; then
+              cat /root/.cache/test_summary.md >> "$GITHUB_STEP_SUMMARY"
+            else
+              echo "No summary file found." >> "$GITHUB_STEP_SUMMARY"
+            fi
+
+        - name: Post process
+          if: always()
+          run: |
+            kubectl get pods -n $NAMESPACE
+            kubectl delete -f tests/e2e/multi_node/scripts/lws.yaml
--- a/.github.backup/workflows/nightly_benchmarks.yaml
+++ b/.github.backup/workflows/nightly_benchmarks.yaml
@@ -0,0 +1,206 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+# This file is a part of the vllm-ascend project.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+name: 'ascend test / performance'
+# This workflow runs nightly benchmarks for vllm-ascend.
+
+on:
+  schedule:
+    # Run benchmarks at 20:00 and 03:00 Beijing time (UTC+8)
+    - cron: "0 12 * * *"
+    - cron: "0 19 * * *"
+
+  workflow_dispatch:
+    # Allow manual triggering of the workflow
+
+  pull_request:
+    types: [ labeled ]
+
+# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
+# declared as "shell: bash -el {0}" on steps that need to be properly activated.
+# It's used to activate ascend-toolkit environment variables.
+defaults:
+  run:
+    shell: bash -el {0}
+
+# only 1 job can runs on static-8-01-cards
+concurrency:
+  group: static-8-01-cards
+  cancel-in-progress: false
+
+jobs:
+  test:
+    if: ${{ contains(github.event.pull_request.labels.*.name, 'performance-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
+
+    name: Benchmarks/vLLM=${{ matrix.vllm_branch }}, vLLM-Ascend=${{ matrix.vllm_ascend_branch }}, use_v1=${{ matrix.vllm_use_v1 }}
+    runs-on: 'linux-arm64-npu-static-8'
+    strategy:
+      matrix:
+        include:
+          - vllm_branch: v0.11.0
+            vllm_ascend_branch: main
+            vllm_use_v1: 1
+      max-parallel: 1
+    container:
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
+      volumes:
+        - /usr/local/dcmi:/usr/local/dcmi
+        - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
+        - /usr/local/Ascend/driver/:/usr/local/Ascend/driver/
+        # Use self-host cache speed up pip and model download
+        - /home/action/.cache:/github/home/.cache/
+      options: >-
+        --device /dev/davinci0
+        --device /dev/davinci1
+        --device /dev/davinci_manager
+        --device /dev/devmm_svm
+        --device /dev/hisi_hdc
+      env:
+        VLLM_USE_MODELSCOPE: True
+        ES_OM_DOMAIN: ${{ secrets.ES_OM_DOMAIN }}
+        ES_OM_AUTHORIZATION: ${{ secrets.ES_OM_AUTHORIZATION }}
+        VLLM_USE_V1: ${{ matrix.vllm_use_v1 }}
+    steps:
+      - name: Check npu and CANN info
+        run: |
+          npu-smi info
+          cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+
+      - name: Config mirrors
+        run: |
+          # keep using tuna's proxy since linux-arm64-npu-static-8 is in another region
+          sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
+          pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+
+      - name: Install system dependencies
+        run: |
+          apt-get update -y
+          apt-get -y install git jq wget curl lsof gcc g++ cmake libnuma-dev
+
+      - name: Config git
+        run: |
+          git config --global --add safe.directory "$GITHUB_WORKSPACE"
+          git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
+
+      - name: Checkout vllm-project/vllm-ascend repo
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Checkout vllm-project/vllm repo
+        uses: actions/checkout@v4
+        with:
+          repository: vllm-project/vllm
+          path: ./vllm-empty
+          ref: ${{  matrix.vllm_branch }}
+
+      - name: Install vllm-project/vllm from source
+        working-directory: ./vllm-empty
+        run: |
+          VLLM_TARGET_DEVICE=empty pip install -e .
+
+      - name: Install vllm-project/vllm-ascend
+        env:
+          PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
+        run: |
+          pip install -e .
+          pip install -r benchmarks/requirements-bench.txt
+
+      - name: Run current commit benchmarks
+        if: github.event_name != 'schedule' && github.event_name != 'workflow_dispatch'
+        run: |
+          # Sometimes we only want to run benchmarks on the current commit
+          # This is useful for debugging or a release benchmark
+          bash benchmarks/scripts/run-performance-benchmarks.sh
+          # Convert the benchmark results to markdown format
+          python3 benchmarks/scripts/convert_json_to_markdown.py
+
+      - name: Generate step summary
+        if: github.event_name != 'schedule' && github.event_name != 'workflow_dispatch'
+        run: |
+          cat ./benchmarks/results/benchmark_results.md >> $GITHUB_STEP_SUMMARY
+
+      - name: Upload benchmark artifacts
+        if: github.event_name != 'schedule' && github.event_name != 'workflow_dispatch'
+        uses: actions/upload-artifact@v4
+        with:
+          name: "benchmark-performance-${{ matrix.vllm_branch }}-${{ matrix.vllm_ascend_branch }}-report"
+          path: ./benchmarks/results/benchmark_results.md
+          if-no-files-found: warn
+          retention-days: 90
+          overwrite: true
+
+      - name: Install elastic_tool
+        if: github.event_name != 'pull_request'
+        run: |
+          pip install escli-tool==0.2.3
+
+      - name: Collect pr info from vllm-project/vllm-ascend
+        if: github.event_name != 'pull_request'
+        run: |
+          # Only get the pull request which may influences performance
+          git log --pretty=format:"%H %s" -- '**/*.py' ':!docs/*' ':!tests/*' ':!examples/*' ':!benchmarks/*' > commit_log.txt
+          escli check commit_log.txt
+      
+      - name: Prepare benchmark script in advance
+        if: github.event_name != 'pull_request'
+        # This is for the benchmark iteration, which will change the benchmark scripts while checkouting each commit.
+        # We need ensure the benchmark scripts always available.
+        run: |
+          # Prepare the benchmark script in advance
+          mkdir -p /github/home/benchmarks
+          cp -r benchmarks/* /github/home/benchmarks/
+
+      - name: Run benchmark iteration
+        env:
+          PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
+        if: github.event_name != 'pull_request'
+        run: |
+          while IFS= read -r line || [[ -n "$line" ]]; do
+            commit_id=${line%% *}
+            commit_title=${line#* }
+
+            git checkout $commit_id
+            commit_time=$(git show -s --format=%cd $commit_hash --date=iso-strict)
+            commit_time_no_tz=${commit_time::19}
+            pip install -e .
+
+            echo "------------------------"
+            echo "commit_id: $commit_id"
+            echo "commit_title: $commit_title"
+            echo "commit_time: $commit_time_no_tz"
+            echo "vllm branch: ${{ matrix.vllm_branch }}"
+            echo "vllm-ascend branch: ${{ matrix.vllm_ascend_branch }}"
+            echo "------------------------"
+
+            cd /github/home
+            ERROR_MSG=""
+            if ! bash benchmarks/scripts/run-performance-benchmarks.sh; then
+              ERROR_MSG="Benchmark failed to run"
+            fi
+            # send the result to es
+            escli add --vllm_branch ${{ matrix.vllm_branch }} \
+            --vllm_ascend_branch ${{ matrix.vllm_ascend_branch }} \
+            --commit_id $commit_id \
+            --commit_title "$commit_title" \
+            --created_at "$commit_time_no_tz" \
+            --res_dir ./benchmarks/results \
+            --error "$ERROR_MSG" \
+            --extra_feat '{"VLLM_USE_V1": "${{ matrix.vllm_use_v1 }}"}'
+            rm -rf ./benchmarks/results
+            cd -
+          done < commit_log.txt
--- a/.github.backup/workflows/pre-commit.yml
+++ b/.github.backup/workflows/pre-commit.yml
@@ -0,0 +1,43 @@
+name: pre-commit
+
+on:
+    workflow_call:
+      inputs:
+        vllm:
+          required: true
+          type: string
+
+permissions:
+  contents: read
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout vllm-project/vllm-ascend repo
+      uses: actions/checkout@v4
+    - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
+      with:
+        python-version: "3.11"
+    - run: echo "::add-matcher::.github/workflows/matchers/actionlint.json"
+    - run: echo "::add-matcher::.github/workflows/matchers/mypy.json"
+    - name: Checkout vllm-project/vllm repo
+      uses: actions/checkout@v4
+      with:
+        repository: vllm-project/vllm
+        path: ./vllm-empty
+        ref: ${{ inputs.vllm }}
+    - name: Install vllm
+      working-directory: vllm-empty
+      run: |
+        pip install -r requirements/build.txt --extra-index-url https://download.pytorch.org/whl/cpu
+        VLLM_TARGET_DEVICE=empty pip install .
+    - name: Install vllm-ascend dev
+      run: |
+        pip install -r requirements-dev.txt --extra-index-url https://download.pytorch.org/whl/cpu
+    - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
+      env:
+        SHELLCHECK_OPTS: "--exclude=SC2046,SC2006,SC2086" # Exclude SC2046, SC2006, SC2086 for actionlint
+      with:
+        extra_args: --all-files --hook-stage manual
+
--- a/.github.backup/workflows/release_code.yml
+++ b/.github.backup/workflows/release_code.yml
@@ -0,0 +1,75 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+#
+
+name: build / sdist
+
+on:
+  pull_request:
+    branches:
+      - 'main'
+      - '*-dev'
+    paths:
+      - '.github/workflows/release_code.yml'
+      - 'vllm_ascend/**'
+      - 'setup.py'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+      - 'cmake/**'
+      - 'CMakeLists.txt'
+      - 'csrc/**'
+  push:
+    tags:
+      - 'v*'
+
+jobs:
+  build:
+    name: release code
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.11"]
+    steps:
+      - uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
+
+      - name: Print
+        run: |
+          lscpu
+      
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          python3 -m pip install twine setuptools_scm
+
+      - name: Generate tar.gz
+        run: |
+          python3 setup.py sdist
+          ls dist
+
+      - name: Archive tar.gz
+        uses: actions/upload-artifact@v4
+        with:
+          name: vllm-ascend-src
+          path: dist/*
+
+      - name: Release
+        if: startsWith(github.ref, 'refs/tags/')
+        run: |
+          python3 -m twine upload dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}
--- a/.github.backup/workflows/release_whl.yml
+++ b/.github.backup/workflows/release_whl.yml
@@ -0,0 +1,125 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+#
+
+name: build / wheel
+
+on:
+  schedule:
+    # Runs at 23:00 UTC (7:00 AM Beijing) every day
+    - cron: '0 23 * * *'
+  pull_request:
+    branches:
+      - 'main'
+      - '*-dev'
+    paths:
+      - '.github/workflows/release_whl.yml'
+      - '.github/Dockerfile.buildwheel'
+      - 'vllm_ascend/**'
+      - 'setup.py'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+      - 'cmake/**'
+      - 'CMakeLists.txt'
+      - 'csrc/**'
+  push:
+    tags:
+      - 'v*'
+
+jobs:
+  build:
+    name: build and release wheel
+    strategy:
+      matrix:
+        os: [ubuntu-24.04, ubuntu-24.04-arm]
+        # PR only trigger latest version
+        python-version: ${{ fromJSON(
+          (github.event_name == 'pull_request' && '["3.11"]') ||
+          '["3.9", "3.10", "3.11"]'
+         ) }}
+    runs-on: ${{ matrix.os }}
+    steps:
+    - uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
+
+    - name: Print
+      run: |
+        lscpu
+
+    - name: Free up disk space
+      uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
+      with:
+        tool-cache: true
+        docker-images: false
+
+    - name: Build wheel
+      run: |
+        ls
+        docker build -f ./.github/Dockerfile.buildwheel \
+        --build-arg PY_VERSION=${{ matrix.python-version }} \
+        -t wheel:v1 .
+        docker run --rm \
+        -u $(id -u):$(id -g) \
+        -v $(pwd):/outpwd \
+        wheel:v1 \
+        bash -c "cp -r /workspace/vllm-ascend/dist /outpwd"
+        ls dist
+
+    - name: Set up Python ${{ matrix.python-version }}
+      if: startsWith(github.ref, 'refs/tags/')
+      uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
+      with:
+        python-version: ${{ matrix.python-version }}
+      
+    - name: Repair wheels with auditwheel
+      run: |
+        python3 -m pip install auditwheel
+        python3 -m pip install patchelf
+        mkdir -p dist/repaired
+        for whl in dist/*.whl; do
+          auditwheel repair "$whl" -w dist/repaired/ \
+          --exclude libplatform.so \
+          --exclude libregister.so \
+          --exclude libge_common_base.so \
+          --exclude libc10.so \
+          --exclude libc_sec.so \
+          --exclude "libascend*.so" \
+          --exclude "libtorch*.so" \
+          --exclude "liberror_manager.so"
+        done
+        rm -f dist/*.whl
+        mv dist/repaired/*.whl dist/
+        rmdir dist/repaired
+        ls dist
+
+    - name: Verify automatic platform tags
+      run: |
+        cd dist
+        for wheel in *.whl; do
+          echo "verification file: $wheel"
+          auditwheel show "$wheel"
+        done
+
+    - name: Archive wheel
+      uses: actions/upload-artifact@v4
+      with:
+        name: vllm-ascend-${{ matrix.os }}-py${{ matrix.python-version }}-wheel
+        path: dist/*
+
+    - name: Release
+      if: startsWith(github.ref, 'refs/tags/')
+      run: |
+        python3 -m pip install twine
+        python3 -m twine upload --verbose dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}
--- a/.github.backup/workflows/reminder_comment.yml
+++ b/.github.backup/workflows/reminder_comment.yml
@@ -0,0 +1,26 @@
+name: PR Reminder Comment Bot
+permissions:
+  pull-requests: write
+on:
+  pull_request_target:
+    types: [opened]
+jobs:
+  pr_reminder:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Remind to run full CI on PR
+        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
+        with:
+          script: |
+            github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: '👋 Hi! Thank you for contributing to the vLLM Ascend project. The following points will speed up your PR merge:‌‌\n\n' +
+                '- A PR should do only one thing, smaller PRs enable faster reviews.\n' +
+                '- Every PR should include unit tests and end-to-end tests ‌to ensure it works and is not broken by other future PRs.\n' +
+                '- Write the commit message by fulfilling the PR description to help reviewer and future developers understand.\n\n' +
+                'If CI fails, you can run linting and testing checks locally according [Contributing](https://vllm-ascend.readthedocs.io/zh-cn/latest/developer_guide/contribution/index.html) and [Testing](https://vllm-ascend.readthedocs.io/zh-cn/latest/developer_guide/contribution/testing.html).'
+            })
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.github.backup/workflows/vllm_ascend_dist.yaml
+++ b/.github.backup/workflows/vllm_ascend_dist.yaml
@@ -0,0 +1,100 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+#
+
+name: 'e2e test / a3-test'
+
+on:
+  workflow_call:
+
+  pull_request:
+    types: [ labeled ]
+
+# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
+# declared as "shell: bash -el {0}" on steps that need to be properly activated.
+# It's used to activate ascend-toolkit environment variables.
+defaults:
+  run:
+    shell: bash -el {0}
+
+# only cancel in-progress runs of the same workflow
+# and ignore the lint / 8 cards test type
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  e2e:
+    # only trigger e2e test after lint passed and the change is e2e related with pull request.
+    if: ${{ contains(github.event.pull_request.labels.*.name, 'dist-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'workflow_dispatch' }}
+    strategy:
+      matrix:
+        os: [linux-aarch64-a3-8]
+        vllm_version: [v0.11.0]
+    name: vLLM Ascend test
+    runs-on: ${{ matrix.os }}
+    container:
+      image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
+      env:
+        DEBIAN_FRONTEND: noninteractive
+    steps:
+      - name: Check npu and CANN info
+        run: |
+          npu-smi info
+          cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+
+      - name: Config mirrors
+        run: |
+          sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
+          pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+          apt-get update -y
+          apt install git -y
+          git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
+
+      - name: Checkout vllm-project/vllm-ascend repo
+        uses: actions/checkout@v4
+
+      - name: Install system dependencies
+        run: |
+          apt-get -y install `cat packages.txt`
+          apt-get -y install gcc g++ cmake libnuma-dev
+
+      - name: Checkout vllm-project/vllm repo
+        uses: actions/checkout@v4
+        with:
+          repository: vllm-project/vllm
+          ref: ${{ matrix.vllm_version }}
+          path: ./vllm-empty
+
+      - name: Install vllm-project/vllm from source
+        working-directory: ./vllm-empty
+        run: |
+          VLLM_TARGET_DEVICE=empty pip install -e .
+
+      - name: Install vllm-project/vllm-ascend
+        run: |
+          export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
+          export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
+          pip install -r requirements-dev.txt
+          pip install -v -e .
+
+      - name: Run vllm-project/vllm-ascend test for V1 Engine
+        env:
+          VLLM_WORKER_MULTIPROC_METHOD: spawn
+          VLLM_USE_MODELSCOPE: True
+        run: |
+          # TODO: enable more tests
+          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
--- a/.github.backup/workflows/vllm_ascend_doctest.yaml
+++ b/.github.backup/workflows/vllm_ascend_doctest.yaml
@@ -0,0 +1,87 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+#
+
+name: 'ascend test / doctest'
+
+on:
+  workflow_dispatch:
+  pull_request:
+    branches:
+      - 'main'
+      - '*-dev'
+    paths:
+      # If we are changing the doctest we should do a PR test
+      - '.github/workflows/vllm_ascend_doctest.yaml'
+      - 'tests/e2e/doctests/**'
+      - 'tests/e2e/common.sh'
+      - 'tests/e2e/run_doctests.sh'
+  schedule:
+    # Runs every 12 hours
+    - cron:  '0 */12 * * *'
+
+# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
+# declared as "shell: bash -el {0}" on steps that need to be properly activated.
+# It's used to activate ascend-toolkit environment variables.
+defaults:
+  run:
+    shell: bash -el {0}
+
+jobs:
+  test:
+    strategy:
+      # Each version should be tested
+      fail-fast: false
+      matrix:
+        vllm_verison: [v0.9.1-dev, v0.9.1-dev-openeuler, main, main-openeuler]
+    name: vLLM Ascend test
+    runs-on: linux-aarch64-a2-1
+    container:
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:${{ matrix.vllm_verison }}
+    steps:
+      - name: Check NPU/CANN and git info
+        run: |
+          echo "====> Print NPU/CANN info"
+          npu-smi info
+          cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+
+          echo "====> Print vllm-ascend git info"
+          cd /vllm-workspace/vllm-ascend
+          git --no-pager log -1 || true
+          echo "====> Print vllm git info"
+          cd /vllm-workspace/vllm
+          git --no-pager log -1 || true
+
+      - name: Checkout vllm-project/vllm-ascend repo
+        uses: actions/checkout@v4
+
+      - name: Run vllm-ascend/tests/e2e/run_doctests.sh
+        run: |
+          # PWD: /__w/vllm-ascend/vllm-ascend
+          # Make sure e2e tests are latest
+          echo "Replacing /vllm-workspace/vllm-ascend/tests/e2e ..."
+          rm -rf /vllm-workspace/vllm-ascend/tests/e2e
+          mkdir -p /vllm-workspace/vllm-ascend/tests
+          # Overwrite e2e and examples
+          cp -r tests/e2e /vllm-workspace/vllm-ascend/tests/
+          cp -r examples /vllm-workspace/vllm-ascend/
+
+          # Simulate container to enter directory
+          cd /workspace
+
+          # Run real test
+          echo "Test:"
+          /vllm-workspace/vllm-ascend/tests/e2e/run_doctests.sh
--- a/.github.backup/workflows/vllm_ascend_test.yaml
+++ b/.github.backup/workflows/vllm_ascend_test.yaml
@@ -0,0 +1,149 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+#
+
+name: 'ascend test'
+
+on:
+  push:
+    branches:
+      - 'main'
+  pull_request:
+    branches:
+      - 'main'
+      - '*-dev'
+# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
+# declared as "shell: bash -el {0}" on steps that need to be properly activated.
+# It's used to activate ascend-toolkit environment variables.
+defaults:
+  run:
+    shell: bash -el {0}
+
+# only cancel in-progress runs of the same workflow
+# and ignore the lint / 1 card / 4 cards test type
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  lint:
+    uses: ./.github/workflows/pre-commit.yml
+    with:
+      vllm: v0.11.0
+
+  changes:
+    runs-on: ubuntu-latest
+    outputs:
+      e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
+      ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dorny/paths-filter@v3
+        id: filter
+        with:
+          filters: |
+            e2e_tracker:
+              - '.github/workflows/vllm_ascend_test.yaml'
+              - 'vllm_ascend/**'
+              - 'csrc/**'
+              - 'cmake/**'
+              - 'tests/e2e/**'
+              - 'CMakeLists.txt'
+              - 'setup.py'
+              - 'requirements.txt'
+              - 'requirements-dev.txt'
+              - 'requirements-lint.txt'
+              - 'packages.txt'
+            ut_tracker:
+              - 'tests/ut/**'
+
+  ut:
+    needs: [lint, changes]
+    name: unit test
+    # only trigger unit test after lint passed and the change is e2e and ut related.
+    if: ${{ needs.lint.result == 'success' && (needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.ut_tracker == 'true') }}
+    runs-on: ubuntu-22.04-arm
+    container:
+      image: quay.io/ascend/cann:8.2.rc1-910b-ubuntu22.04-py3.11
+      env:
+        VLLM_LOGGING_LEVEL: ERROR
+        VLLM_USE_MODELSCOPE: True
+    strategy:
+      matrix:
+        vllm_version: [v0.11.0]
+    steps:
+      - name: Install packages
+        run: |
+          apt-get update -y
+          apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev curl gnupg2
+
+      - name: Checkout vllm-project/vllm repo
+        uses: actions/checkout@v4
+        with:
+          repository: vllm-project/vllm
+          ref: ${{ matrix.vllm_version }}
+          path: ./vllm-empty
+
+      - name: Install vllm-project/vllm from source
+        working-directory: ./vllm-empty
+        run: |
+          VLLM_TARGET_DEVICE=empty python3 -m pip install .
+          python3 -m pip uninstall -y triton
+
+      - name: Checkout vllm-project/vllm-ascend repo
+        uses: actions/checkout@v4
+
+      - name: Install vllm-project/vllm-ascend
+        run: |
+          export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
+          export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/arm64-linux/devlib
+          python3 -m pip install -r requirements-dev.txt
+          python3 -m pip install -v .
+
+      - name: Run unit test
+        env:
+          VLLM_WORKER_MULTIPROC_METHOD: spawn
+          TORCH_DEVICE_BACKEND_AUTOLOAD: 0
+        run: |
+          export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/arm64-linux/devlib
+          pytest -sv --cov --cov-report=xml:unittests-coverage.xml tests/ut \
+            --ignore tests/ut/attention/test_attention_v1.py
+      - name: Upload coverage to Codecov
+        # only upload coverage when commits merged
+        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
+        uses: codecov/codecov-action@v5
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+        with:
+          flags: unittests
+          name: vllm-ascend
+          verbose: true
+
+  e2e-light:
+    name: e2e-light
+    strategy:
+      matrix:
+        vllm_version: [v0.11.0]
+    # Note (yikun): If CI resource are limited we can split job into two chain jobs
+    needs: [lint, changes]
+    # only trigger e2e test after lint passed and the change is e2e related with pull request.
+    if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' && !contains(github.event.pull_request.labels.*.name, 'ready') }}
+    uses: ./.github/workflows/_e2e_test.yaml
+    with:
+      vllm: ${{ matrix.vllm_version }}
+      runner: linux-aarch64-a2
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
+      type: light
--- a/.github.backup/workflows/vllm_ascend_test_310p.yaml
+++ b/.github.backup/workflows/vllm_ascend_test_310p.yaml
@@ -0,0 +1,117 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+#
+
+name: 'e2e test / 310p-test'
+
+on:
+  push:
+    tags:
+      - 'v*'
+  schedule:
+    # Runs every 6 hours
+    - cron:  '0 */6 * * *'
+  pull_request:
+    types: [ labeled ]
+
+# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
+# declared as "shell: bash -el {0}" on steps that need to be properly activated.
+# It's used to activate ascend-toolkit environment variables.
+defaults:
+  run:
+    shell: bash -el {0}
+
+# only cancel in-progress runs of the same workflow
+# and ignore the lint / 1 card / 4 cards test type
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  e2e:
+    # e2e-310p-test will be triggered when tag 'e2e-310p-test' & 'ready-for-test' or schedule job
+    if: >- 
+      ${{ 
+        (contains(github.event.pull_request.labels.*.name, 'e2e-310p-test'))  && 
+        contains(github.event.pull_request.labels.*.name, 'ready-for-test') || 
+        github.event_name == 'schedule' || github.event_name == 'push' 
+        }}
+    strategy:
+      max-parallel: 2
+      matrix:
+        os: [linux-aarch64-310p-1, linux-aarch64-310p-4]
+        vllm_version: [v0.11.0]
+    name: 310p e2e test
+    runs-on: ${{ matrix.os }}
+    container:
+      # TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-310p-ubuntu22.04-py3.11
+      env:
+        VLLM_LOGGING_LEVEL: ERROR
+        VLLM_USE_MODELSCOPE: True
+    steps:        
+      - name: Check npu and CANN info
+        run: |
+          npu-smi info
+          cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+
+      - name: Config mirrors
+        run: |
+          sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
+          pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
+          pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
+          apt-get update -y
+          apt install git -y
+
+      - name: Checkout vllm-project/vllm-ascend repo
+        uses: actions/checkout@v4
+
+      - name: Install system dependencies
+        run: |
+          apt-get -y install `cat packages.txt`
+          apt-get -y install git vim wget net-tools gcc g++ cmake libnuma-dev curl gnupg2
+
+      - name: Checkout vllm-project/vllm repo
+        uses: actions/checkout@v4
+        with:
+          repository: vllm-project/vllm
+          ref: ${{ matrix.vllm_version }}
+          path: ./vllm-empty
+
+      - name: Install vllm-project/vllm from source
+        working-directory: ./vllm-empty
+        run: |
+          VLLM_TARGET_DEVICE=empty pip install -e .
+
+      - name: Install vllm-project/vllm-ascend
+        run: |
+          export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
+          export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
+          export SOC_VERSION=ASCEND310P3 
+          pip install -r requirements-dev.txt
+          pip install -v -e .
+
+      - name: Run e2e test
+        env:
+          VLLM_WORKER_MULTIPROC_METHOD: spawn
+          VLLM_USE_MODELSCOPE: True
+          PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
+        run: |
+          if [[ "${{ matrix.os }}" == "linux-aarch64-310p-1" ]]; then
+            pytest -sv tests/e2e/310p/test_offline_inference_310p.py
+          else
+            pytest -sv tests/e2e/310p/test_offline_inference_parallel_310p.py
+          fi
--- a/.github.backup/workflows/vllm_ascend_test_full.yaml
+++ b/.github.backup/workflows/vllm_ascend_test_full.yaml
@@ -0,0 +1,80 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+#
+name: 'ascend test / full'
+
+on:
+  pull_request:
+    branches:
+      - 'main'
+      - '*-dev'
+    types: [ labeled, synchronize ]
+
+# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
+# declared as "shell: bash -el {0}" on steps that need to be properly activated.
+# It's used to activate ascend-toolkit environment variables.
+defaults:
+  run:
+    shell: bash -el {0}
+
+# only cancel in-progress runs of the same workflow
+# and ignore the lint / 1 card / 4 cards test type
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  changes:
+    runs-on: ubuntu-latest
+    if: ${{ contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') }}
+    outputs:
+      e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
+      ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dorny/paths-filter@v3
+        id: filter
+        with:
+          filters: |
+            e2e_tracker:
+              - '.github/workflows/vllm_ascend_test.yaml'
+              - '.github/workflows/_e2e_test.yaml'
+              - 'vllm_ascend/**'
+              - 'csrc/**'
+              - 'cmake/**'
+              - 'tests/e2e/**'
+              - 'CMakeLists.txt'
+              - 'setup.py'
+              - 'requirements.txt'
+              - 'requirements-dev.txt'
+              - 'requirements-lint.txt'
+              - 'packages.txt'
+            ut_tracker:
+              - 'tests/ut/**'
+
+  e2e-test:
+    name: e2e-full
+    strategy:
+      matrix:
+        vllm_version: [v0.11.0]
+    needs: [changes]
+    if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
+    uses: ./.github/workflows/_e2e_test.yaml
+    with:
+      vllm: ${{ matrix.vllm_version }}
+      runner: linux-aarch64-a2
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
+      type: full
--- a/.github.backup/workflows/vllm_ascend_test_full_vllm_main.yaml
+++ b/.github.backup/workflows/vllm_ascend_test_full_vllm_main.yaml
@@ -0,0 +1,45 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+#
+name: 'ascend test / vllm main'
+
+on:
+  # Run 1-card and 2-cards e2e tests per 2h
+  schedule:
+    - cron: '0 */2 * * *'
+  workflow_dispatch:
+
+# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
+# declared as "shell: bash -el {0}" on steps that need to be properly activated.
+# It's used to activate ascend-toolkit environment variables.
+defaults:
+  run:
+    shell: bash -el {0}
+
+# only cancel in-progress runs of the same workflow
+# and ignore the lint / 1 card / 4 cards test type
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  e2e-test:
+    uses: ./.github/workflows/_e2e_test.yaml
+    with:
+      vllm: main
+      runner: linux-aarch64-a2
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
+      type: full
--- a/.github.backup/workflows/vllm_ascend_test_models.yaml
+++ b/.github.backup/workflows/vllm_ascend_test_models.yaml
@@ -0,0 +1,177 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+#
+
+# This test will be triggered:
+# 1. schedule
+# 2. pull_request change the related files
+# 3. workflow_dispatch with models input
+
+name: ascend test / models
+
+on:
+  schedule:
+    # Runs every 6 hours
+    - cron:  '0 */6 * * *'
+  pull_request:
+    branches:
+      - 'main'
+      - '*-dev'
+    paths:
+      - '.github/workflows/vllm_ascend_test_models.yaml'
+      - 'tests/e2e/models/test_lm_eval_correctness.py'
+  workflow_dispatch:
+    inputs:
+      vllm-ascend-version:
+        description: 'vllm-ascend:'
+        required: true
+        type: choice
+        # Current supported vLLM versions
+        options:
+          - latest
+          - main
+        default: main
+
+# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
+# declared as "shell: bash -el {0}" on steps that need to be properly activated.
+# It's used to activate ascend-toolkit environment variables.
+defaults:
+  run:
+    shell: bash -el {0}
+
+# only cancel in-progress runs of the same workflow
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  run:
+    strategy:
+      matrix:
+        include:
+          - model_name: Qwen3-8B
+            runner: a2-1
+          - model_name: Qwen2.5-VL-7B-Instruct
+            runner: a2-1
+          - model_name: Qwen2-Audio-7B-Instruct
+            runner: a2-1
+          - model_name: Qwen3-30B-A3B
+            runner: a2-2
+          - model_name: Qwen3-VL-30B-A3B-Instruct
+            runner: a2-2
+          - model_name: DeepSeek-V2-Lite
+            runner: a2-2
+      fail-fast: false
+    uses: ./.github/workflows/_accuracy_test.yaml
+    with:
+      vllm: v0.11.0
+      runner:  linux-aarch64-${{ matrix.runner }}
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
+      model_name: ${{ matrix.model_name }}
+      upload: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.vllm-ascend-version == 'latest' }}
+
+  create_pr:
+    runs-on: ubuntu-latest
+    needs: run
+    if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.vllm-ascend-version == 'latest' }}
+    env:
+      UPSTREAM_REPO: vllm-project/vllm-ascend
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          repository: vllm-ascend-ci/vllm-ascend
+          token: ${{ secrets.PAT_TOKEN }}
+          ref: main
+      
+      - name: Add upstream remote
+        run: |
+          git remote add upstream https://github.com/${{ env.UPSTREAM_REPO }}.git
+          git fetch upstream
+          git remote -v
+
+      - name: Set Git user info dynamically
+        run: |
+          git config user.name "${{ github.actor }}"
+          git config user.email "${{ github.actor }}@users.noreply.github.com"
+
+      - name: Create or switch to branch
+        run: |
+          TIMESTAMP=$(date +%Y%m%d%H%M%S)
+          BRANCH_NAME="auto-pr/accuracy-report-${TIMESTAMP}"
+          echo "BRANCH_NAME=${BRANCH_NAME}" >> $GITHUB_ENV
+          git checkout -B "${BRANCH_NAME}" upstream/main
+
+      - name: Download only current run reports
+        uses: actions/download-artifact@v5
+        with:
+          path: ./docs/source/developer_guide/evaluation/accuracy_report
+          pattern: report-*
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          run-id: ${{ github.run_id }}
+
+      - name: Delete old report
+        run: |
+          find ./docs/source/developer_guide/evaluation/accuracy_report -maxdepth 1 -type f -name '*.md' ! -name 'index.md' -delete
+          find ./docs/source/developer_guide/evaluation/accuracy_report -mindepth 2 -type f -name '*.md' -exec mv -f {} ./docs/source/developer_guide/evaluation/accuracy_report \;
+          find ./docs/source/developer_guide/evaluation/accuracy_report -mindepth 1 -type d -empty -delete
+      
+      - name: Update accuracy_report/index.md
+        run: |
+          REPORT_DIR="./docs/source/developer_guide/evaluation/accuracy_report"
+          INDEX_MD="$REPORT_DIR/index.md"
+          {
+            echo "# Accuracy Report"
+            echo ""
+            echo ":::{toctree}"
+            echo ":caption: Accuracy Report"
+            echo ":maxdepth: 1"
+            
+            for report in "$REPORT_DIR"/*.md; do
+              filename="$(basename "$report" .md)"
+              if [ "$filename" != "index" ]; then
+                echo "$filename"
+              fi
+            done
+            echo ":::"
+          } > "$INDEX_MD"
+
+      - name: push accuracy report
+        env:
+          GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
+        run: |
+          git add ./docs/source/developer_guide/evaluation/accuracy_report/*.md
+          git commit -s -m "[Doc] Update accuracy reports for ${{ env.BRANCH_NAME }}"
+          git push -f origin "${{ env.BRANCH_NAME }}"
+
+      - name: Create PR in upstream via API
+        uses: actions/github-script@v8
+        with:
+          github-token: ${{ secrets.PAT_TOKEN }}
+          script: |
+            const pr = await github.rest.pulls.create({
+              owner: 'vllm-project',
+              repo: 'vllm-ascend',
+              head: `vllm-ascend-ci:${{ env.BRANCH_NAME }}`,
+              base: 'main',
+              title: `[Doc] Update accuracy reports for ${{ env.BRANCH_NAME }}`,
+              body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for: All models
+            
+              - [Workflow run][1]
+              
+              [1]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`
+            });
+            core.info(`Created PR #${pr.data.number}`);
--- a/.github.backup/workflows/vllm_ascend_test_pd.yaml
+++ b/.github.backup/workflows/vllm_ascend_test_pd.yaml
@@ -0,0 +1,112 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+# This file is a part of the vllm-ascend project.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+name: 'e2e test / pd-disaggregation'
+
+on:
+  schedule:
+    # Runs at 23:00 UTC (7:00 AM Beijing) every day
+    - cron: '0 23 * * *'
+  pull_request:
+    types: [ labeled ]
+
+# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
+# declared as "shell: bash -el {0}" on steps that need to be properly activated.
+# It's used to activate ascend-toolkit environment variables.
+defaults:
+  run:
+    shell: bash -el {0}
+
+# only 1 job can runs on static-8-01-cards
+concurrency:
+  group: static-8-01-cards
+  cancel-in-progress: false
+
+jobs:
+  prefilling-decoding-disaggregation:
+    # pd-test will be triggered when tag 'pd-test' & 'ready-for-test' or schedule job
+    if: ${{ contains(github.event.pull_request.labels.*.name, 'pd-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' }}
+    strategy:
+      matrix:
+        vllm_verison: [
+            main, 
+            v0.9.1
+          ]
+    name: vLLM Ascend prefilling decoding disaggregation test
+    runs-on: linux-arm64-npu-static-8
+
+    container:
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
+      volumes:
+        - /usr/local/dcmi:/usr/local/dcmi
+        - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
+        - /usr/local/Ascend/driver/:/usr/local/Ascend/driver/
+        # Use self-host cache speed up pip and model download
+        - /home/action/.cache:/github/home/.cache/
+      options: >-
+        --device /dev/davinci0
+        --device /dev/davinci1
+        --device /dev/davinci_manager
+        --device /dev/devmm_svm
+        --device /dev/hisi_hdc
+      env:
+        VLLM_USE_MODELSCOPE: True
+    steps:
+      - name: Check npu and CANN info
+        run: |
+          npu-smi info
+          cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+
+      - name: Config mirrors
+        run: |
+          # keep using tuna's proxy since linux-arm64-npu-static-8 is in another region
+          sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
+          pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+          apt-get update -y
+          apt install git -y
+          git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
+
+      - name: Checkout vllm-project/vllm-ascend repo
+        uses: actions/checkout@v4
+
+      - name: Install system dependencies
+        run: |
+          apt-get -y install `cat packages.txt`
+          apt-get -y install gcc g++ cmake libnuma-dev
+
+      - name: Checkout vllm-project/vllm repo
+        uses: actions/checkout@v4
+        with:
+          repository: vllm-project/vllm
+          ref: ${{ matrix.vllm_verison }}
+          path: ./vllm-empty
+
+      - name: Install vllm-project/vllm from source
+        working-directory: ./vllm-empty
+        run: |
+          VLLM_TARGET_DEVICE=empty pip install -e .
+
+      - name: Install vllm-project/vllm-ascend
+        env:
+          PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
+        run: |
+          pip install -r requirements-dev.txt
+          pip install -v -e .
+
+      - name: Run vllm-project/vllm-ascend PD Disaggregation edge test
+        run: |
+          git config --global --add safe.directory/__w/vllm-ascend/vllm-ascend
+          bash tests/e2e/pd_disaggreate/run_edge_case_test.sh