[CI] Add Triton Ascend in CI (#4921)
Add triton-ascend in UT and e2e
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
---------
Signed-off-by: Meihan-chen <jcccx.cmh@gmail.com>
This commit is contained in:
58
.github/workflows/_e2e_test.yaml
vendored
58
.github/workflows/_e2e_test.yaml
vendored
@@ -68,10 +68,23 @@ jobs:
|
||||
pip install -r requirements-dev.txt
|
||||
pip install -v -e .
|
||||
|
||||
- name: Run vllm-project/vllm-ascend test
|
||||
- name: Run vllm-project/vllm-ascend test (non triton)
|
||||
env:
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
|
||||
if: ${{ inputs.type == 'full' }}
|
||||
run: |
|
||||
pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_mem.py
|
||||
pytest -sv --durations=0 tests/e2e/singlecard/test_camem.py
|
||||
|
||||
- name: Install Ascend toolkit & triton_ascend
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
. /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
|
||||
python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl"
|
||||
|
||||
- name: Run vllm-project/vllm-ascend test
|
||||
env:
|
||||
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
|
||||
if: ${{ inputs.type == 'light' }}
|
||||
run: |
|
||||
@@ -83,7 +96,6 @@ jobs:
|
||||
- name: Run e2e test
|
||||
env:
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
|
||||
if: ${{ inputs.type == 'full' }}
|
||||
run: |
|
||||
@@ -92,9 +104,7 @@ jobs:
|
||||
|
||||
pytest -sv --durations=0 tests/e2e/singlecard/test_completion_with_prompt_embeds.py
|
||||
pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_accuracy.py
|
||||
pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_mem.py
|
||||
pytest -sv --durations=0 tests/e2e/singlecard/test_async_scheduling.py
|
||||
pytest -sv --durations=0 tests/e2e/singlecard/test_camem.py
|
||||
pytest -sv --durations=0 tests/e2e/singlecard/test_guided_decoding.py
|
||||
# torch 2.8 doesn't work with lora, fix me
|
||||
#pytest -sv --durations=0 tests/e2e/singlecard/test_ilama_lora.py
|
||||
@@ -165,10 +175,20 @@ jobs:
|
||||
pip install -r requirements-dev.txt
|
||||
pip install -v -e .
|
||||
|
||||
- name: Run vllm-project/vllm-ascend test (non triton)
|
||||
if: ${{ inputs.type == 'full' }}
|
||||
run: |
|
||||
pytest -sv --durations=0 tests/e2e/multicard/test_aclgraph_capture_replay.py
|
||||
|
||||
- name: Install Ascend toolkit & triton_ascend
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
. /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
|
||||
python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl"
|
||||
|
||||
- name: Run vllm-project/vllm-ascend test (light)
|
||||
env:
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
if: ${{ inputs.type == 'light' }}
|
||||
run: |
|
||||
pytest -sv --durations=0 tests/e2e/multicard/test_qwen3_moe.py::test_qwen3_moe_distributed_mp_tp2_ep
|
||||
@@ -176,11 +196,9 @@ jobs:
|
||||
- name: Run vllm-project/vllm-ascend test (full)
|
||||
env:
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
if: ${{ inputs.type == 'full' }}
|
||||
run: |
|
||||
pytest -sv --durations=0 tests/e2e/multicard/test_quantization.py
|
||||
pytest -sv --durations=0 tests/e2e/multicard/test_aclgraph_capture_replay.py
|
||||
pytest -sv --durations=0 tests/e2e/multicard/test_full_graph_mode.py
|
||||
pytest -sv --durations=0 tests/e2e/multicard/test_data_parallel.py
|
||||
pytest -sv --durations=0 tests/e2e/multicard/test_expert_parallel.py
|
||||
@@ -259,11 +277,16 @@ jobs:
|
||||
pip install -r requirements-dev.txt
|
||||
pip install -v -e .
|
||||
|
||||
- name: Install Ascend toolkit & triton_ascend
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
. /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
|
||||
python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl"
|
||||
|
||||
- name: Run vllm-project/vllm-ascend test for V1 Engine
|
||||
working-directory: ./vllm-ascend
|
||||
env:
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
run: |
|
||||
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
|
||||
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Kimi_K2_Thinking_W4A16
|
||||
@@ -271,19 +294,4 @@ jobs:
|
||||
pytest -sv --durations=0 tests/e2e/multicard/long_sequence/test_basic.py
|
||||
pytest -sv --durations=0 tests/e2e/multicard/long_sequence/test_accuracy.py
|
||||
pytest -sv --durations=0 tests/e2e/multicard/long_sequence/test_mtp.py
|
||||
|
||||
- name: Install Ascend toolkit & triton_ascend (for Qwen3-Next-80B-A3B-Instruct)
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
. /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
|
||||
python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl"
|
||||
|
||||
- name: Run vllm-project/vllm-ascend Qwen3 Next test
|
||||
working-directory: ./vllm-ascend
|
||||
shell: bash -el {0}
|
||||
env:
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
run: |
|
||||
. /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
|
||||
pytest -sv --durations=0 tests/e2e/multicard/test_qwen3_next.py
|
||||
pytest -sv --durations=0 tests/e2e/multicard/test_qwen3_next.py
|
||||
6
.github/workflows/pr_test_light.yaml
vendored
6
.github/workflows/pr_test_light.yaml
vendored
@@ -127,6 +127,12 @@ jobs:
|
||||
python3 -m pip install -r requirements-dev.txt
|
||||
python3 -m pip install -v .
|
||||
|
||||
- name: Install Ascend toolkit & triton_ascend
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
. /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
|
||||
python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl"
|
||||
|
||||
- name: Run unit test
|
||||
env:
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
|
||||
Reference in New Issue
Block a user