[CI]Install clang in dokerfile for triton ascend (#4409)
### What this PR does / why we need it?
Install clang in dokerfile for triton ascend
- vLLM version: v0.13.0
- vLLM main:
d68209402d
Signed-off-by: Meihan-chen <jcccx.cmh@gmail.com>
This commit is contained in:
8
.github/workflows/_e2e_test.yaml
vendored
8
.github/workflows/_e2e_test.yaml
vendored
@@ -228,6 +228,14 @@ jobs:
|
|||||||
#pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_shared_expert_dp.py
|
#pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_shared_expert_dp.py
|
||||||
pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_single_request_aclgraph.py
|
pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_single_request_aclgraph.py
|
||||||
|
|
||||||
|
- name: Run vllm-project/vllm-ascend test (non triton)
|
||||||
|
if: ${{ inputs.type == 'full' }}
|
||||||
|
env:
|
||||||
|
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||||
|
run: |
|
||||||
|
python3 -m pip uninstall -y triton-ascend
|
||||||
|
pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py
|
||||||
|
|
||||||
e2e-4-cards:
|
e2e-4-cards:
|
||||||
name: multicard-4
|
name: multicard-4
|
||||||
needs: [e2e-2-cards]
|
needs: [e2e-2-cards]
|
||||||
|
|||||||
@@ -64,6 +64,14 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|||||||
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
||||||
python3 -m pip cache purge
|
python3 -m pip cache purge
|
||||||
|
|
||||||
|
# Install clang-15 (for triton-ascend)
|
||||||
|
RUN apt-get update -y && \
|
||||||
|
apt-get -y install clang-15 && \
|
||||||
|
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 && \
|
||||||
|
update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 && \
|
||||||
|
rm -rf /var/cache/apt/* && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Install modelscope (for fast download) and ray (for multinode)
|
# Install modelscope (for fast download) and ray (for multinode)
|
||||||
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
|
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
|
||||||
python3 -m pip cache purge
|
python3 -m pip cache purge
|
||||||
|
|||||||
@@ -63,6 +63,14 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|||||||
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
||||||
python3 -m pip cache purge
|
python3 -m pip cache purge
|
||||||
|
|
||||||
|
# Install clang-15 (for triton-ascend)
|
||||||
|
RUN apt-get update -y && \
|
||||||
|
apt-get -y install clang-15 && \
|
||||||
|
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 && \
|
||||||
|
update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 && \
|
||||||
|
rm -rf /var/cache/apt/* && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Install modelscope (for fast download) and ray (for multinode)
|
# Install modelscope (for fast download) and ray (for multinode)
|
||||||
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
|
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
|
||||||
python3 -m pip cache purge
|
python3 -m pip cache purge
|
||||||
|
|||||||
@@ -66,6 +66,11 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|||||||
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
||||||
python3 -m pip cache purge
|
python3 -m pip cache purge
|
||||||
|
|
||||||
|
# Install clang (for triton-ascend)
|
||||||
|
RUN yum update -y && \
|
||||||
|
yum install -y clang && \
|
||||||
|
rm -rf /var/cache/yum/*
|
||||||
|
|
||||||
# Install modelscope (for fast download) and ray (for multinode)
|
# Install modelscope (for fast download) and ray (for multinode)
|
||||||
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
|
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
|
||||||
python3 -m pip cache purge
|
python3 -m pip cache purge
|
||||||
|
|||||||
@@ -66,6 +66,11 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|||||||
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
||||||
python3 -m pip cache purge
|
python3 -m pip cache purge
|
||||||
|
|
||||||
|
# Install clang (for triton-ascend)
|
||||||
|
RUN yum update -y && \
|
||||||
|
yum install -y clang && \
|
||||||
|
rm -rf /var/cache/yum/*
|
||||||
|
|
||||||
# Install modelscope (for fast download) and ray (for multinode)
|
# Install modelscope (for fast download) and ray (for multinode)
|
||||||
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
|
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
|
||||||
python3 -m pip cache purge
|
python3 -m pip cache purge
|
||||||
|
|||||||
@@ -31,18 +31,6 @@ If you want to deploy multi-node environment, you need to verify multi-node comm
|
|||||||
|
|
||||||
You can using our official docker image to run `DeepSeek-V3.2` directly..
|
You can using our official docker image to run `DeepSeek-V3.2` directly..
|
||||||
|
|
||||||
:::{note}
|
|
||||||
We strongly recommend you to install clang make triton ascend stable enough. For Ubuntu, the command is
|
|
||||||
|
|
||||||
```bash
|
|
||||||
apt-get -y clang-15
|
|
||||||
|
|
||||||
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20
|
|
||||||
update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20
|
|
||||||
```
|
|
||||||
|
|
||||||
:::
|
|
||||||
|
|
||||||
:::::{tab-set}
|
:::::{tab-set}
|
||||||
:sync-group: install
|
:sync-group: install
|
||||||
|
|
||||||
|
|||||||
@@ -326,15 +326,7 @@ In this section, we provide simple scripts to re-produce our latest performance.
|
|||||||
- CANN 8.3.RC2
|
- CANN 8.3.RC2
|
||||||
- torch_npu 2.8.0
|
- torch_npu 2.8.0
|
||||||
- HDK/driver 25.3.RC1
|
- HDK/driver 25.3.RC1
|
||||||
- triton_ascend 3.2.0.dev2025110717
|
- triton_ascend 3.2.0
|
||||||
|
|
||||||
**Notice:**
|
|
||||||
triton_ascend is required for reproducing best performance of Qwen3-235B in vLLM-Ascend. If it is not installed in your environment, please follow the instructions below:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
wget https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl
|
|
||||||
pip install triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl
|
|
||||||
```
|
|
||||||
|
|
||||||
### Single Node A3 (64G*16)
|
### Single Node A3 (64G*16)
|
||||||
|
|
||||||
|
|||||||
@@ -53,17 +53,6 @@ docker run --rm \
|
|||||||
|
|
||||||
The Qwen3 Next is using [Triton Ascend](https://gitee.com/ascend/triton-ascend) which is currently experimental. In future versions, there may be behavioral changes related to stability, accuracy, and performance improvement.
|
The Qwen3 Next is using [Triton Ascend](https://gitee.com/ascend/triton-ascend) which is currently experimental. In future versions, there may be behavioral changes related to stability, accuracy, and performance improvement.
|
||||||
|
|
||||||
### Install Clang
|
|
||||||
|
|
||||||
We strongly recommend you to install clang make triton ascend stable enough. For Ubuntu, the command is
|
|
||||||
|
|
||||||
```bash
|
|
||||||
apt-get -y clang-15
|
|
||||||
|
|
||||||
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20
|
|
||||||
update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20
|
|
||||||
```
|
|
||||||
|
|
||||||
### Inference
|
### Inference
|
||||||
|
|
||||||
:::::{tab-set}
|
:::::{tab-set}
|
||||||
|
|||||||
@@ -125,13 +125,12 @@ install_extra_components() {
|
|||||||
echo "====> Extra components installation completed"
|
echo "====> Extra components installation completed"
|
||||||
}
|
}
|
||||||
|
|
||||||
install_clang() {
|
|
||||||
echo "====> Installing clang-15"
|
show_triton_ascend_info() {
|
||||||
apt-get update && apt-get install -y clang-15
|
echo "====> Check triton ascend info"
|
||||||
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20
|
|
||||||
update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20
|
|
||||||
clang -v
|
clang -v
|
||||||
echo "====> Clang-15 installation completed"
|
which bishengir-compile
|
||||||
|
pip show triton-ascend
|
||||||
}
|
}
|
||||||
|
|
||||||
kill_npu_processes() {
|
kill_npu_processes() {
|
||||||
@@ -161,7 +160,7 @@ main() {
|
|||||||
check_npu_info
|
check_npu_info
|
||||||
check_and_config
|
check_and_config
|
||||||
show_vllm_info
|
show_vllm_info
|
||||||
install_clang
|
show_triton_ascend_info
|
||||||
if [[ "$CONFIG_YAML_PATH" == *"DeepSeek-V3_2-Exp-bf16.yaml" ]]; then
|
if [[ "$CONFIG_YAML_PATH" == *"DeepSeek-V3_2-Exp-bf16.yaml" ]]; then
|
||||||
install_extra_components
|
install_extra_components
|
||||||
fi
|
fi
|
||||||
|
|||||||
Reference in New Issue
Block a user