diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml
index 9a73dcdc..5f084491 100644
--- a/.github/workflows/_e2e_test.yaml
+++ b/.github/workflows/_e2e_test.yaml
@@ -88,6 +88,7 @@ jobs:
           pip install uc-manager
           uv pip install -r requirements-dev.txt
           uv pip install -v -e .
+          uv pip install https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/torch_npu-2.9.0.post1%2Bgitee7ba04-cp311-cp311-manylinux_2_28_aarch64.whl
           uv pip install git+https://github.com/modelscope/modelscope.git@dbbcbf631fe6d10cc6446df2ad2fef24039fe7fe
 
       - name: Run vllm-project/vllm-ascend test
@@ -195,6 +196,7 @@ jobs:
           pip install uc-manager
           uv pip install -r requirements-dev.txt
           uv pip install -v -e .
+          uv pip install https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/torch_npu-2.9.0.post1%2Bgitee7ba04-cp311-cp311-manylinux_2_28_aarch64.whl
           uv pip install git+https://github.com/modelscope/modelscope.git@dbbcbf631fe6d10cc6446df2ad2fef24039fe7fe
       - name: Run e2e test
         env:
@@ -299,6 +301,7 @@ jobs:
           pip install uc-manager
           uv pip install -r requirements-dev.txt
           uv pip install -v -e .
+          uv pip install https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/torch_npu-2.9.0.post1%2Bgitee7ba04-cp311-cp311-manylinux_2_28_aarch64.whl
           uv pip install git+https://github.com/modelscope/modelscope.git@dbbcbf631fe6d10cc6446df2ad2fef24039fe7fe
       - name: Run vllm-project/vllm-ascend test (light)
         env:
@@ -403,6 +406,7 @@ jobs:
           pip install uc-manager
           uv pip install -r requirements-dev.txt
           uv pip install -v -e .
+          uv pip install https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/torch_npu-2.9.0.post1%2Bgitee7ba04-cp311-cp311-manylinux_2_28_aarch64.whl
           uv pip install git+https://github.com/modelscope/modelscope.git@dbbcbf631fe6d10cc6446df2ad2fef24039fe7fe
       - name: Run vllm-project/vllm-ascend test (full)
         env:
@@ -526,6 +530,7 @@ jobs:
           pip install uc-manager
           uv pip install -r requirements-dev.txt
           uv pip install -v -e .
+          uv pip install https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/torch_npu-2.9.0.post1%2Bgitee7ba04-cp311-cp311-manylinux_2_28_aarch64.whl
           uv pip install git+https://github.com/modelscope/modelscope.git@dbbcbf631fe6d10cc6446df2ad2fef24039fe7fe
 
       - name: Run vllm-project/vllm-ascend test for V1 Engine
@@ -623,6 +628,7 @@ jobs:
           pip install uc-manager
           uv pip install -r requirements-dev.txt
           uv pip install -v -e .
+          uv pip install https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/torch_npu-2.9.0.post1%2Bgitee7ba04-cp311-cp311-manylinux_2_28_aarch64.whl
           uv pip install git+https://github.com/modelscope/modelscope.git@dbbcbf631fe6d10cc6446df2ad2fef24039fe7fe
 
       - name: Run vllm-project/vllm-ascend test
@@ -699,6 +705,7 @@ jobs:
           pip install uc-manager
           uv pip install -r requirements-dev.txt
           uv pip install -v -e .
+          uv pip install https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/torch_npu-2.9.0.post1%2Bgitee7ba04-cp311-cp311-manylinux_2_28_aarch64.whl
           uv pip install git+https://github.com/modelscope/modelscope.git@dbbcbf631fe6d10cc6446df2ad2fef24039fe7fe
 
       - name: Run vllm-project/vllm-ascend test
diff --git a/.github/workflows/schedule_image_build_and_push.yaml b/.github/workflows/schedule_image_build_and_push.yaml
index 94e77728..61e55344 100644
--- a/.github/workflows/schedule_image_build_and_push.yaml
+++ b/.github/workflows/schedule_image_build_and_push.yaml
@@ -11,6 +11,13 @@
 #   - Publish when tag with v* (pep440 version)  ===>  vllm-ascend:v1.2.3 / vllm-ascend:v1.2.3rc1
 name: Image Build and Push
 on:
+  pull_request:
+    branches:
+      - 'releases/*'
+    paths: 
+      - 'Dockerfile*'
+      - '.github/workflows/schedule_image_build_and_push.yaml'
+    types: [ labeled, synchronize ]
   workflow_dispatch:
     inputs:
       tag:
@@ -33,6 +40,7 @@ concurrency:
 jobs:
   image_build:
     name: Image Build and Push
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'image-build')
     strategy:
       matrix:
         build_meta:
@@ -59,7 +67,7 @@ jobs:
       dockerfile: ${{ matrix.build_meta.dockerfile }}
       suffix: ${{ matrix.build_meta.suffix }}
       quay_username: ${{ vars.QUAY_USERNAME }}
-      should_push: ${{ github.repository_owner == 'vllm-project'}}
+      should_push: ${{ github.repository_owner == 'vllm-project' && github.event_name != 'pull_request' }}
       workflow_dispatch_tag: ${{ inputs.tag }}
     secrets:
       QUAY_PASSWORD: ${{ secrets.QUAY_PASSWORD }}
diff --git a/Dockerfile b/Dockerfile
index ff0bcdd5..0afde8f8 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -63,13 +63,25 @@ RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm
 RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
     source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
     source /usr/local/Ascend/nnal/atb/set_env.sh && \
-    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
+    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/$(uname -i)-linux/devlib && \
+    \
     python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
+    \
     if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
+    \
     PYTHON_TAG=$(python3 -c "import sys; print(f'cp{sys.version_info.major}{sys.version_info.minor}')") && \
-    ARCH=$(python3 -c "import platform; machine = platform.machine().lower(); arch_map = {'x86_64': 'x86_64', 'amd64': 'x86_64', 'aarch64': 'aarch64', 'arm64': 'aarch64'}; print(arch_map.get(machine, machine))") && \
+    ARCH=$(python3 -c "import platform; m=platform.machine().lower(); map={'x86_64':'x86_64','amd64':'x86_64','aarch64':'aarch64','arm64':'aarch64'}; print(map.get(m,m))") && \
+    \
+    if [ "$PYTHON_TAG" = "cp310" ] && [ "$ARCH" = "aarch64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgit4c901a4-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    elif [ "$PYTHON_TAG" = "cp311" ] && [ "$ARCH" = "x86_64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgitdc51c2d-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    elif [ "$PYTHON_TAG" = "cp310" ] && [ "$ARCH" = "x86_64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgita74051c-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    elif [ "$PYTHON_TAG" = "cp311" ] && [ "$ARCH" = "aarch64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgitee7ba04-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    else echo "Unsupported PYTHON_TAG=$PYTHON_TAG ARCH=$ARCH"; exit 1; fi && \
+    \
     TRITON_ASCEND_WHEEL="triton_ascend-3.2.0.dev20260322-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_27_${ARCH}.manylinux_2_28_${ARCH}.whl" && \
+    \
     python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${TRITON_ASCEND_WHEEL}" && \
+    python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${PTA_WHEEL}" && \
     python3 -m pip cache purge
 
 # Install clang-15 (for triton-ascend)
diff --git a/Dockerfile.310p b/Dockerfile.310p
index 8e2fd809..a275c798 100644
--- a/Dockerfile.310p
+++ b/Dockerfile.310p
@@ -56,6 +56,16 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
     export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
     python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
     if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
+    PYTHON_TAG=$(python3 -c "import sys; print(f'cp{sys.version_info.major}{sys.version_info.minor}')") && \
+    ARCH=$(python3 -c "import platform; m=platform.machine().lower(); map={'x86_64':'x86_64','amd64':'x86_64','aarch64':'aarch64','arm64':'aarch64'}; print(map.get(m,m))") && \
+    \
+    if [ "$PYTHON_TAG" = "cp310" ] && [ "$ARCH" = "aarch64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgit4c901a4-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    elif [ "$PYTHON_TAG" = "cp311" ] && [ "$ARCH" = "x86_64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgitdc51c2d-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    elif [ "$PYTHON_TAG" = "cp310" ] && [ "$ARCH" = "x86_64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgita74051c-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    elif [ "$PYTHON_TAG" = "cp311" ] && [ "$ARCH" = "aarch64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgitee7ba04-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    else echo "Unsupported PYTHON_TAG=$PYTHON_TAG ARCH=$ARCH"; exit 1; fi && \
+    \
+    python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${PTA_WHEEL}" && \
     python3 -m pip cache purge
 
 # Install modelscope (for fast download) and ray (for multinode)
diff --git a/Dockerfile.310p.openEuler b/Dockerfile.310p.openEuler
index 2e404715..0c95f133 100644
--- a/Dockerfile.310p.openEuler
+++ b/Dockerfile.310p.openEuler
@@ -52,6 +52,16 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
     export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/`uname -i`-openEuler-linux && \
     python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
     if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
+    PYTHON_TAG=$(python3 -c "import sys; print(f'cp{sys.version_info.major}{sys.version_info.minor}')") && \
+    ARCH=$(python3 -c "import platform; m=platform.machine().lower(); map={'x86_64':'x86_64','amd64':'x86_64','aarch64':'aarch64','arm64':'aarch64'}; print(map.get(m,m))") && \
+    \
+    if [ "$PYTHON_TAG" = "cp310" ] && [ "$ARCH" = "aarch64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgit4c901a4-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    elif [ "$PYTHON_TAG" = "cp311" ] && [ "$ARCH" = "x86_64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgitdc51c2d-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    elif [ "$PYTHON_TAG" = "cp310" ] && [ "$ARCH" = "x86_64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgita74051c-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    elif [ "$PYTHON_TAG" = "cp311" ] && [ "$ARCH" = "aarch64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgitee7ba04-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    else echo "Unsupported PYTHON_TAG=$PYTHON_TAG ARCH=$ARCH"; exit 1; fi && \
+    \
+    python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${PTA_WHEEL}" && \
     python3 -m pip cache purge
 
 # Install modelscope (for fast download) and ray (for multinode)
diff --git a/Dockerfile.a3 b/Dockerfile.a3
index 550e88ab..2c31d878 100644
--- a/Dockerfile.a3
+++ b/Dockerfile.a3
@@ -62,13 +62,25 @@ RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm
 RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
     source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
     source /usr/local/Ascend/nnal/atb/set_env.sh && \
-    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
+    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/$(uname -i)-linux/devlib && \
+    \
     python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
+    \
     if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
+    \
     PYTHON_TAG=$(python3 -c "import sys; print(f'cp{sys.version_info.major}{sys.version_info.minor}')") && \
-    ARCH=$(python3 -c "import platform; machine = platform.machine().lower(); arch_map = {'x86_64': 'x86_64', 'amd64': 'x86_64', 'aarch64': 'aarch64', 'arm64': 'aarch64'}; print(arch_map.get(machine, machine))") && \
+    ARCH=$(python3 -c "import platform; m=platform.machine().lower(); map={'x86_64':'x86_64','amd64':'x86_64','aarch64':'aarch64','arm64':'aarch64'}; print(map.get(m,m))") && \
+    \
+    if [ "$PYTHON_TAG" = "cp310" ] && [ "$ARCH" = "aarch64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgit4c901a4-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    elif [ "$PYTHON_TAG" = "cp311" ] && [ "$ARCH" = "x86_64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgitdc51c2d-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    elif [ "$PYTHON_TAG" = "cp310" ] && [ "$ARCH" = "x86_64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgita74051c-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    elif [ "$PYTHON_TAG" = "cp311" ] && [ "$ARCH" = "aarch64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgitee7ba04-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    else echo "Unsupported PYTHON_TAG=$PYTHON_TAG ARCH=$ARCH"; exit 1; fi && \
+    \
     TRITON_ASCEND_WHEEL="triton_ascend-3.2.0.dev20260322-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_27_${ARCH}.manylinux_2_28_${ARCH}.whl" && \
+    \
     python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${TRITON_ASCEND_WHEEL}" && \
+    python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${PTA_WHEEL}" && \
     python3 -m pip cache purge
 
 # Install clang-15 (for triton-ascend)
diff --git a/Dockerfile.a3.openEuler b/Dockerfile.a3.openEuler
index 10db5b44..4ed461a4 100644
--- a/Dockerfile.a3.openEuler
+++ b/Dockerfile.a3.openEuler
@@ -62,14 +62,26 @@ RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[a
 RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
     source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
     source /usr/local/Ascend/nnal/atb/set_env.sh && \
-    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
-    export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/`uname -i`-openEuler-linux && \
+    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/$(uname -i)-linux/devlib && \
+    export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/$(uname -i)-openEuler-linux && \
+    \
     python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
+    \
     if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
+    \
     PYTHON_TAG=$(python3 -c "import sys; print(f'cp{sys.version_info.major}{sys.version_info.minor}')") && \
-    ARCH=$(python3 -c "import platform; machine = platform.machine().lower(); arch_map = {'x86_64': 'x86_64', 'amd64': 'x86_64', 'aarch64': 'aarch64', 'arm64': 'aarch64'}; print(arch_map.get(machine, machine))") && \
+    ARCH=$(python3 -c "import platform; m=platform.machine().lower(); map={'x86_64':'x86_64','amd64':'x86_64','aarch64':'aarch64','arm64':'aarch64'}; print(map.get(m,m))") && \
+    \
+    if [ "$PYTHON_TAG" = "cp310" ] && [ "$ARCH" = "aarch64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgit4c901a4-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    elif [ "$PYTHON_TAG" = "cp311" ] && [ "$ARCH" = "x86_64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgitdc51c2d-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    elif [ "$PYTHON_TAG" = "cp310" ] && [ "$ARCH" = "x86_64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgita74051c-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    elif [ "$PYTHON_TAG" = "cp311" ] && [ "$ARCH" = "aarch64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgitee7ba04-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    else echo "Unsupported PYTHON_TAG=$PYTHON_TAG ARCH=$ARCH"; exit 1; fi && \
+    \
     TRITON_ASCEND_WHEEL="triton_ascend-3.2.0.dev20260322-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_27_${ARCH}.manylinux_2_28_${ARCH}.whl" && \
+    \
     python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${TRITON_ASCEND_WHEEL}" && \
+    python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${PTA_WHEEL}" && \
     python3 -m pip cache purge
 
 # Install clang (for triton-ascend)
diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler
index d6fb4b7e..4bb86318 100644
--- a/Dockerfile.openEuler
+++ b/Dockerfile.openEuler
@@ -62,14 +62,26 @@ RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[a
 RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
     source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
     source /usr/local/Ascend/nnal/atb/set_env.sh && \
-    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
-    export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/`uname -i`-openEuler-linux && \
+    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/$(uname -i)-linux/devlib && \
+    export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/$(uname -i)-openEuler-linux && \
+    \
     python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
+    \
     if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
+    \
     PYTHON_TAG=$(python3 -c "import sys; print(f'cp{sys.version_info.major}{sys.version_info.minor}')") && \
-    ARCH=$(python3 -c "import platform; machine = platform.machine().lower(); arch_map = {'x86_64': 'x86_64', 'amd64': 'x86_64', 'aarch64': 'aarch64', 'arm64': 'aarch64'}; print(arch_map.get(machine, machine))") && \
+    ARCH=$(python3 -c "import platform; m=platform.machine().lower(); map={'x86_64':'x86_64','amd64':'x86_64','aarch64':'aarch64','arm64':'aarch64'}; print(map.get(m,m))") && \
+    \
+    if [ "$PYTHON_TAG" = "cp310" ] && [ "$ARCH" = "aarch64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgit4c901a4-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    elif [ "$PYTHON_TAG" = "cp311" ] && [ "$ARCH" = "x86_64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgitdc51c2d-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    elif [ "$PYTHON_TAG" = "cp310" ] && [ "$ARCH" = "x86_64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgita74051c-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    elif [ "$PYTHON_TAG" = "cp311" ] && [ "$ARCH" = "aarch64" ]; then PTA_WHEEL="torch_npu-2.9.0.post1%2Bgitee7ba04-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_28_${ARCH}.whl"; \
+    else echo "Unsupported PYTHON_TAG=$PYTHON_TAG ARCH=$ARCH"; exit 1; fi && \
+    \
     TRITON_ASCEND_WHEEL="triton_ascend-3.2.0.dev20260322-${PYTHON_TAG}-${PYTHON_TAG}-manylinux_2_27_${ARCH}.manylinux_2_28_${ARCH}.whl" && \
+    \
     python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${TRITON_ASCEND_WHEEL}" && \
+    python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${PTA_WHEEL}" && \
     python3 -m pip cache purge
 
 # Install clang (for triton-ascend)