From e54d294df33c35c18ccd4e6296bbcaacdf12cbe1 Mon Sep 17 00:00:00 2001 From: meihanc Date: Thu, 22 Jan 2026 19:01:28 +0800 Subject: [PATCH] [CI]Install clang in dokerfile for triton ascend (#4409) ### What this PR does / why we need it? Install clang in dokerfile for triton ascend - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/d68209402ddab3f54a09bc1f4de9a9495a283b60 Signed-off-by: Meihan-chen --- .github/workflows/_e2e_test.yaml | 8 ++++++++ Dockerfile | 8 ++++++++ Dockerfile.a3 | 8 ++++++++ Dockerfile.a3.openEuler | 5 +++++ Dockerfile.openEuler | 5 +++++ docs/source/tutorials/DeepSeek-V3.2.md | 12 ------------ docs/source/tutorials/Qwen3-235B-A22B.md | 10 +--------- docs/source/tutorials/Qwen3-Next.md | 11 ----------- tests/e2e/nightly/multi_node/scripts/run.sh | 13 ++++++------- 9 files changed, 41 insertions(+), 39 deletions(-) diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml index cf88963a..a24b1cf4 100644 --- a/.github/workflows/_e2e_test.yaml +++ b/.github/workflows/_e2e_test.yaml @@ -228,6 +228,14 @@ jobs: #pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_shared_expert_dp.py pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_single_request_aclgraph.py + - name: Run vllm-project/vllm-ascend test (non triton) + if: ${{ inputs.type == 'full' }} + env: + VLLM_WORKER_MULTIPROC_METHOD: spawn + run: | + python3 -m pip uninstall -y triton-ascend + pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py + e2e-4-cards: name: multicard-4 needs: [e2e-2-cards] diff --git a/Dockerfile b/Dockerfile index aadb6ea2..2c43ba8b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -64,6 +64,14 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip cache purge +# Install clang-15 (for triton-ascend) +RUN apt-get update -y && \ + apt-get -y install clang-15 && \ + update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 && \ + update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 && \ + rm -rf /var/cache/apt/* && \ + rm -rf /var/lib/apt/lists/* + # Install modelscope (for fast download) and ray (for multinode) RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ python3 -m pip cache purge diff --git a/Dockerfile.a3 b/Dockerfile.a3 index 960e7ef5..c6b48709 100644 --- a/Dockerfile.a3 +++ b/Dockerfile.a3 @@ -63,6 +63,14 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip cache purge +# Install clang-15 (for triton-ascend) +RUN apt-get update -y && \ + apt-get -y install clang-15 && \ + update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 && \ + update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 && \ + rm -rf /var/cache/apt/* && \ + rm -rf /var/lib/apt/lists/* + # Install modelscope (for fast download) and ray (for multinode) RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ python3 -m pip cache purge diff --git a/Dockerfile.a3.openEuler b/Dockerfile.a3.openEuler index f5c83bfe..75896e8a 100644 --- a/Dockerfile.a3.openEuler +++ b/Dockerfile.a3.openEuler @@ -66,6 +66,11 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip cache purge +# Install clang (for triton-ascend) +RUN yum update -y && \ + yum install -y clang && \ + rm -rf /var/cache/yum/* + # Install modelscope (for fast download) and ray (for multinode) RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ python3 -m pip cache purge diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler index c93cddf0..e634a3e9 100644 --- a/Dockerfile.openEuler +++ b/Dockerfile.openEuler @@ -66,6 +66,11 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip cache purge +# Install clang (for triton-ascend) +RUN yum update -y && \ + yum install -y clang && \ + rm -rf /var/cache/yum/* + # Install modelscope (for fast download) and ray (for multinode) RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ python3 -m pip cache purge diff --git a/docs/source/tutorials/DeepSeek-V3.2.md b/docs/source/tutorials/DeepSeek-V3.2.md index af42abe6..a20154d2 100644 --- a/docs/source/tutorials/DeepSeek-V3.2.md +++ b/docs/source/tutorials/DeepSeek-V3.2.md @@ -31,18 +31,6 @@ If you want to deploy multi-node environment, you need to verify multi-node comm You can using our official docker image to run `DeepSeek-V3.2` directly.. -:::{note} -We strongly recommend you to install clang make triton ascend stable enough. For Ubuntu, the command is - -```bash -apt-get -y clang-15 - -update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 -update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 -``` - -::: - :::::{tab-set} :sync-group: install diff --git a/docs/source/tutorials/Qwen3-235B-A22B.md b/docs/source/tutorials/Qwen3-235B-A22B.md index 0298dab7..64ff19b8 100644 --- a/docs/source/tutorials/Qwen3-235B-A22B.md +++ b/docs/source/tutorials/Qwen3-235B-A22B.md @@ -326,15 +326,7 @@ In this section, we provide simple scripts to re-produce our latest performance. - CANN 8.3.RC2 - torch_npu 2.8.0 - HDK/driver 25.3.RC1 -- triton_ascend 3.2.0.dev2025110717 - -**Notice:** -triton_ascend is required for reproducing best performance of Qwen3-235B in vLLM-Ascend. If it is not installed in your environment, please follow the instructions below: - -```bash -wget https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl -pip install triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl -``` +- triton_ascend 3.2.0 ### Single Node A3 (64G*16) diff --git a/docs/source/tutorials/Qwen3-Next.md b/docs/source/tutorials/Qwen3-Next.md index 74341dd6..043ae359 100644 --- a/docs/source/tutorials/Qwen3-Next.md +++ b/docs/source/tutorials/Qwen3-Next.md @@ -53,17 +53,6 @@ docker run --rm \ The Qwen3 Next is using [Triton Ascend](https://gitee.com/ascend/triton-ascend) which is currently experimental. In future versions, there may be behavioral changes related to stability, accuracy, and performance improvement. -### Install Clang - -We strongly recommend you to install clang make triton ascend stable enough. For Ubuntu, the command is - -```bash -apt-get -y clang-15 - -update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 -update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 -``` - ### Inference :::::{tab-set} diff --git a/tests/e2e/nightly/multi_node/scripts/run.sh b/tests/e2e/nightly/multi_node/scripts/run.sh index 95a7b9dc..f42b325e 100644 --- a/tests/e2e/nightly/multi_node/scripts/run.sh +++ b/tests/e2e/nightly/multi_node/scripts/run.sh @@ -125,13 +125,12 @@ install_extra_components() { echo "====> Extra components installation completed" } -install_clang() { - echo "====> Installing clang-15" - apt-get update && apt-get install -y clang-15 - update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 - update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 + +show_triton_ascend_info() { + echo "====> Check triton ascend info" clang -v - echo "====> Clang-15 installation completed" + which bishengir-compile + pip show triton-ascend } kill_npu_processes() { @@ -161,7 +160,7 @@ main() { check_npu_info check_and_config show_vllm_info - install_clang + show_triton_ascend_info if [[ "$CONFIG_YAML_PATH" == *"DeepSeek-V3_2-Exp-bf16.yaml" ]]; then install_extra_components fi