[CI] Add Triton Ascend in CI (#4921)

Add triton-ascend in UT and e2e - vLLM version: v0.12.0 - vLLM main: ad32e3e19c --------- Signed-off-by: Meihan-chen <jcccx.cmh@gmail.com>
2025-12-23 12:47:35 +08:00
parent 2e010e12dd
commit 592cfb6a6f
8 changed files with 85 additions and 36 deletions
--- a/.github/workflows/_e2e_test.yaml
+++ b/.github/workflows/_e2e_test.yaml
@@ -68,10 +68,23 @@ jobs:
          pip install -r requirements-dev.txt
          pip install -v -e .

-      - name: Run vllm-project/vllm-ascend test
+      - name: Run vllm-project/vllm-ascend test (non triton)
        env:
          VLLM_WORKER_MULTIPROC_METHOD: spawn
-          VLLM_USE_MODELSCOPE: True
+          PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
+        if: ${{ inputs.type == 'full' }}
+        run: |
+          pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_mem.py
+          pytest -sv --durations=0 tests/e2e/singlecard/test_camem.py
+
+      - name: Install Ascend toolkit & triton_ascend
+        shell: bash -l {0}
+        run: |
+          . /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
+          python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl"
+
+      - name: Run vllm-project/vllm-ascend test
+        env:
          PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
        if: ${{ inputs.type == 'light' }}
        run: |
@@ -83,7 +96,6 @@ jobs:
      - name: Run e2e test
        env:
          VLLM_WORKER_MULTIPROC_METHOD: spawn
-          VLLM_USE_MODELSCOPE: True
          PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
        if: ${{ inputs.type == 'full' }}
        run: |
@@ -92,9 +104,7 @@ jobs:

          pytest -sv --durations=0 tests/e2e/singlecard/test_completion_with_prompt_embeds.py
          pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_accuracy.py
-          pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_mem.py
          pytest -sv --durations=0 tests/e2e/singlecard/test_async_scheduling.py
-          pytest -sv --durations=0 tests/e2e/singlecard/test_camem.py
          pytest -sv --durations=0 tests/e2e/singlecard/test_guided_decoding.py
          # torch 2.8 doesn't work with lora, fix me
          #pytest -sv --durations=0 tests/e2e/singlecard/test_ilama_lora.py
@@ -165,10 +175,20 @@ jobs:
          pip install -r requirements-dev.txt
          pip install -v -e .

+      - name: Run vllm-project/vllm-ascend test (non triton)
+        if: ${{ inputs.type == 'full' }}
+        run: |
+          pytest -sv --durations=0 tests/e2e/multicard/test_aclgraph_capture_replay.py
+
+      - name: Install Ascend toolkit & triton_ascend
+        shell: bash -l {0}
+        run: |
+          . /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
+          python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl"
+
      - name: Run vllm-project/vllm-ascend test (light)
        env:
          VLLM_WORKER_MULTIPROC_METHOD: spawn
-          VLLM_USE_MODELSCOPE: True
        if: ${{ inputs.type == 'light' }}
        run: |
          pytest -sv --durations=0 tests/e2e/multicard/test_qwen3_moe.py::test_qwen3_moe_distributed_mp_tp2_ep
@@ -176,11 +196,9 @@ jobs:
      - name: Run vllm-project/vllm-ascend test (full)
        env:
          VLLM_WORKER_MULTIPROC_METHOD: spawn
-          VLLM_USE_MODELSCOPE: True
        if: ${{ inputs.type == 'full' }}
        run: |
          pytest -sv --durations=0 tests/e2e/multicard/test_quantization.py
-          pytest -sv --durations=0 tests/e2e/multicard/test_aclgraph_capture_replay.py
          pytest -sv --durations=0 tests/e2e/multicard/test_full_graph_mode.py
          pytest -sv --durations=0 tests/e2e/multicard/test_data_parallel.py
          pytest -sv --durations=0 tests/e2e/multicard/test_expert_parallel.py
@@ -259,11 +277,16 @@ jobs:
          pip install -r requirements-dev.txt
          pip install -v -e .

+      - name: Install Ascend toolkit & triton_ascend
+        shell: bash -l {0}
+        run: |
+          . /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
+          python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl"
+
      - name: Run vllm-project/vllm-ascend test for V1 Engine
        working-directory: ./vllm-ascend
        env:
          VLLM_WORKER_MULTIPROC_METHOD: spawn
-          VLLM_USE_MODELSCOPE: True
        run: |
          pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
          pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Kimi_K2_Thinking_W4A16
@@ -271,19 +294,4 @@ jobs:
          pytest -sv --durations=0 tests/e2e/multicard/long_sequence/test_basic.py
          pytest -sv --durations=0 tests/e2e/multicard/long_sequence/test_accuracy.py
          pytest -sv --durations=0 tests/e2e/multicard/long_sequence/test_mtp.py
-
-      - name: Install Ascend toolkit & triton_ascend (for Qwen3-Next-80B-A3B-Instruct)
-        shell: bash -l {0}
-        run: |
-          . /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
-          python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl"
-
-      - name: Run vllm-project/vllm-ascend Qwen3 Next test
-        working-directory: ./vllm-ascend
-        shell: bash -el {0}
-        env:
-          VLLM_WORKER_MULTIPROC_METHOD: spawn
-          VLLM_USE_MODELSCOPE: True
-        run: |
-          . /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
-          pytest -sv --durations=0 tests/e2e/multicard/test_qwen3_next.py
+          pytest -sv --durations=0 tests/e2e/multicard/test_qwen3_next.py
--- a/.github/workflows/pr_test_light.yaml
+++ b/.github/workflows/pr_test_light.yaml
@@ -127,6 +127,12 @@ jobs:
          python3 -m pip install -r requirements-dev.txt
          python3 -m pip install -v .

+      - name: Install Ascend toolkit & triton_ascend
+        shell: bash -l {0}
+        run: |
+          . /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
+          python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl"
+
      - name: Run unit test
        env:
          VLLM_WORKER_MULTIPROC_METHOD: spawn