diff --git a/.github/workflows/mypy.yaml b/.github/workflows/mypy.yaml index 32a0684..df25183 100644 --- a/.github/workflows/mypy.yaml +++ b/.github/workflows/mypy.yaml @@ -38,7 +38,8 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.9", "3.10", "3.11", "3.12"] + # TODO(yikun): Add 3.12 back when torch-npu support 3.12 + python-version: ["3.9", "3.10", "3.11"] steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Set up Python ${{ matrix.python-version }} @@ -47,7 +48,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - pip install -r requirements-dev.txt + pip install -r requirements-dev.txt --extra-index-url https://download.pytorch.org/whl/cpu - name: Checkout vllm-project/vllm repo uses: actions/checkout@v4 diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index ed0761b..d6b7ced 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -97,25 +97,10 @@ jobs: run: | VLLM_TARGET_DEVICE=empty pip install -e . - - name: Install pta - run: | - if [ ! -d /root/.cache/pta ]; then - mkdir -p /root/.cache/pta - fi - - if [ ! -f /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl ]; then - cd /root/.cache/pta - rm -rf pytorch_v2.5.1_py310* - wget https://pytorch-package.obs.cn-north-4.myhuaweicloud.com/pta/Daily/v2.5.1/20250320.3/pytorch_v2.5.1_py310.tar.gz - tar -zxvf pytorch_v2.5.1_py310.tar.gz - fi - - pip install /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl - - name: Install vllm-project/vllm-ascend run: | pip install -r requirements-dev.txt - pip install -v --no-build-isolation -e . + pip install -v -e . - name: Run vllm-project/vllm-ascend test for V1 Engine env: diff --git a/CMakeLists.txt b/CMakeLists.txt index e3bbc10..2db15d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,8 @@ include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake) # Suppress potential warnings about unused manually-specified variables set(ignoreMe "${VLLM_PYTHON_PATH}") -set(PYTHON_SUPPORTED_VERSIONS "3.9" "3.10" "3.11" "3.12") +# TODO: Add 3.12 back when torch-npu support 3.12 +set(PYTHON_SUPPORTED_VERSIONS "3.9" "3.10" "3.11") find_package(pybind11 REQUIRED) diff --git a/Dockerfile b/Dockerfile index a5c307d..021ed59 100644 --- a/Dockerfile +++ b/Dockerfile @@ -39,21 +39,21 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} ARG VLLM_REPO=https://github.com/vllm-project/vllm.git ARG VLLM_TAG=v0.8.4 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /workspace/vllm -RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install /workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. -RUN python3 -m pip uninstall -y triton - -# Install torch-npu -RUN bash /workspace/vllm-ascend/pta_install.sh +RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \ + python3 -m pip uninstall -y triton && \ + python3 -m pip cache purge # Install vllm-ascend +# Append `libascend_hal.so` path (devlib) to LD_LIBRARY_PATH RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ source /usr/local/Ascend/nnal/atb/set_env.sh && \ - export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib:$LD_LIBRARY_PATH && \ - export LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/lib64:$LIBRARY_PATH && \ - python3 -m pip install -v /workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \ + python3 -m pip install -v -e /workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ + python3 -m pip cache purge # Install modelscope (for fast download) and ray (for multinode) -RUN python3 -m pip install modelscope ray +RUN python3 -m pip install modelscope ray && \ + python3 -m pip cache purge CMD ["/bin/bash"] diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler index a08a566..cce429c 100644 --- a/Dockerfile.openEuler +++ b/Dockerfile.openEuler @@ -42,9 +42,6 @@ RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install /workspace/vllm/ --extra-i # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. RUN python3 -m pip uninstall -y triton -# Install torch-npu -RUN bash /workspace/vllm-ascend/pta_install.sh - # Install vllm-ascend RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ source /usr/local/Ascend/nnal/atb/set_env.sh && \ diff --git a/README.md b/README.md index 9bedb0c..f16f771 100644 --- a/README.md +++ b/README.md @@ -36,9 +36,9 @@ By using vLLM Ascend plugin, popular open-source models, including Transformer-l - Hardware: Atlas 800I A2 Inference series, Atlas A2 Training series - OS: Linux - Software: - * Python >= 3.9 + * Python >= 3.9, < 3.12 * CANN >= 8.0.0 - * PyTorch >= 2.5.1, torch-npu >= 2.5.1.dev20250320 + * PyTorch >= 2.5.1, torch-npu >= 2.5.1 * vLLM (the same version as vllm-ascend) ## Getting Started diff --git a/README.zh.md b/README.zh.md index a9d4507..5bb9b1e 100644 --- a/README.zh.md +++ b/README.zh.md @@ -39,7 +39,7 @@ vLLM 昇腾插件 (`vllm-ascend`) 是一个由社区维护的让vLLM在Ascend NP - 软件: * Python >= 3.9 * CANN >= 8.0.RC2 - * PyTorch >= 2.5.1, torch-npu >= 2.5.1.dev20250320 + * PyTorch >= 2.5.1, torch-npu >= 2.5.1 * vLLM (与vllm-ascend版本一致) ## 开始使用 diff --git a/docs/source/developer_guide/versioning_policy.md b/docs/source/developer_guide/versioning_policy.md index 686c376..0082acc 100644 --- a/docs/source/developer_guide/versioning_policy.md +++ b/docs/source/developer_guide/versioning_policy.md @@ -61,16 +61,22 @@ As shown above: - `version` documentation: Corresponds to specific released versions (e.g., `v0.7.3`, `v0.7.3rc1`). No further updates after release. - `stable` documentation (**not yet released**): Official release documentation. Updates are allowed in real-time after release, typically based on vX.Y.Z-dev. Once stable documentation is available, non-stable versions should display a header warning: `You are viewing the latest developer preview docs. Click here to view docs for the latest stable release.`. +## Software Dependency Management +- `torch-npu`: Ascend Extension for PyTorch (torch-npu) releases a stable version to [PyPi](https://pypi.org/project/torch-npu) + every 3 months, a development version (aka the POC version) every month, and a nightly version every day. + The PyPi stable version **CAN** be used in vLLM Ascend final version, the monthly dev version **ONLY CANN** be used in + vLLM Ascend RC version for rapid iteration, the nightly version **CANNOT** be used in vLLM Ascend any version and branches. + ## Release Compatibility Matrix Following is the Release Compatibility Matrix for vLLM Ascend Plugin: -| vllm-ascend | vLLM | Python | Stable CANN | PyTorch/torch_npu | -|--------------|--------------| --- | --- | --- | -| v0.8.4rc1 | v0.8.4 | 3.9 - 3.12 | 8.0.0 | 2.5.1 / 2.5.1.dev20250320 | -| v0.7.3rc2 | v0.7.3 | 3.9 - 3.12 | 8.0.0 | 2.5.1 / 2.5.1.dev20250320 | -| v0.7.3rc1 | v0.7.3 | 3.9 - 3.12 | 8.0.0 | 2.5.1 / 2.5.1.dev20250308 | -| v0.7.1rc1 | v0.7.1 | 3.9 - 3.12 | 8.0.0 | 2.5.1 / 2.5.1.dev20250218 | +| vllm-ascend | vLLM | Python | Stable CANN | PyTorch/torch_npu | +|--------------|--------------|----------------| --- | --- | +| v0.8.4rc1 | v0.8.4 | >= 3.9, < 3.12 | 8.0.0 | 2.5.1 / 2.5.1.dev20250320 | +| v0.7.3rc2 | v0.7.3 | >= 3.9, < 3.12 | 8.0.0 | 2.5.1 / 2.5.1.dev20250320 | +| v0.7.3rc1 | v0.7.3 | >= 3.9, < 3.12 | 8.0.0 | 2.5.1 / 2.5.1.dev20250308 | +| v0.7.1rc1 | v0.7.1 | >= 3.9, < 3.12 | 8.0.0 | 2.5.1 / 2.5.1.dev20250218 | ## Release cadence diff --git a/docs/source/installation.md b/docs/source/installation.md index 337f1fb..1c89099 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -5,15 +5,15 @@ This document describes how to install vllm-ascend manually. ## Requirements - OS: Linux -- Python: 3.9 or higher +- Python: >= 3.9, < 3.12 - A hardware with Ascend NPU. It's usually the Atlas 800 A2 series. - Software: - | Software | Supported version | Note | - | ------------ | ----------------- | ---- | - | CANN | >= 8.0.0 | Required for vllm-ascend and torch-npu | - | torch-npu | >= 2.5.1.dev20250320 | Required for vllm-ascend | - | torch | >= 2.5.1 | Required for torch-npu and vllm | + | Software | Supported version | Note | + |-----------|-------------------|----------------------------------------| + | CANN | >= 8.0.0 | Required for vllm-ascend and torch-npu | + | torch-npu | >= 2.5.1 | Required for vllm-ascend | + | torch | >= 2.5.1 | Required for torch-npu and vllm | You have 2 way to install: - **Using pip**: first prepare env manually or via CANN image, then install `vllm-ascend` using pip. @@ -127,27 +127,6 @@ apt update -y apt install -y gcc g++ cmake libnuma-dev wget ``` -Current version depends on a unreleased `torch-npu`, you need to install manually: - -``` -# Once the packages are installed, you need to install `torch-npu` manually, -# because that vllm-ascend relies on an unreleased version of torch-npu. -# This step will be removed in the next vllm-ascend release. -# -# Here we take python 3.10 on aarch64 as an example. Feel free to install the correct version for your environment. See: -# -# https://pytorch-package.obs.cn-north-4.myhuaweicloud.com/pta/Daily/v2.5.1/20250320.3/pytorch_v2.5.1_py39.tar.gz -# https://pytorch-package.obs.cn-north-4.myhuaweicloud.com/pta/Daily/v2.5.1/20250320.3/pytorch_v2.5.1_py310.tar.gz -# https://pytorch-package.obs.cn-north-4.myhuaweicloud.com/pta/Daily/v2.5.1/20250320.3/pytorch_v2.5.1_py311.tar.gz -# -mkdir pta -cd pta -wget https://pytorch-package.obs.cn-north-4.myhuaweicloud.com/pta/Daily/v2.5.1/20250320.3/pytorch_v2.5.1_py310.tar.gz -tar -xvf pytorch_v2.5.1_py310.tar.gz -pip install ./torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl -cd .. -``` - **[Optinal]** Config the extra-index of `pip` if you are working on a **x86** machine, so that the torch with cpu could be found: ```bash @@ -181,13 +160,13 @@ or build from **source code**: # Install vLLM git clone --depth 1 --branch |vllm_version| https://github.com/vllm-project/vllm cd vllm -VLLM_TARGET_DEVICE=empty pip install . +VLLM_TARGET_DEVICE=empty pip install -e -v . cd .. # Install vLLM Ascend git clone --depth 1 --branch |vllm_ascend_version| https://github.com/vllm-project/vllm-ascend.git cd vllm-ascend -python setup.py develop +pip install -e -v . cd .. ``` diff --git a/pta_install.sh b/pta_install.sh deleted file mode 100755 index d72512c..0000000 --- a/pta_install.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash -set -ex -mkdir pta -cd pta || exit -wget https://pytorch-package.obs.cn-north-4.myhuaweicloud.com/pta/Daily/v2.5.1/20250320.3/pytorch_v2.5.1_py310.tar.gz -tar -zxvf pytorch_v2.5.1_py310.tar.gz - -if [ "$(uname -i)" == "aarch64" ] -then - python3 -m pip install ./torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl -else - python3 -m pip install ./torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl --extra-index https://download.pytorch.org/whl/cpu/ -fi - -cd .. -rm -rf pta \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index ee0a440..be4d13f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,8 +11,9 @@ requires = [ "scipy", "setuptools>=64", "setuptools-scm>=8", - "torch_npu==2.5.1rc1", + "torch-npu==2.5.1", "torch>=2.5.1", "torchvision<0.21.0", + "wheel", ] build-backend = "setuptools.build_meta" diff --git a/requirements.txt b/requirements.txt index 14d038b..03702b0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,6 +9,7 @@ pyyaml scipy setuptools>=64 setuptools-scm>=8 +torch-npu==2.5.1 torch>=2.5.1 torchvision<0.21.0 wheel diff --git a/setup.py b/setup.py index 35ebba1..ad468ca 100644 --- a/setup.py +++ b/setup.py @@ -342,11 +342,11 @@ setup( project_urls={ "Homepage": "https://github.com/vllm-project/vllm-ascend", }, + # TODO: Add 3.12 back when torch-npu support 3.12 classifiers=[ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", "License :: OSI Approved :: Apache Software License", "Intended Audience :: Developers", "Intended Audience :: Information Technology", diff --git a/vllm_ascend/models/qwen2_vl.py b/vllm_ascend/models/qwen2_vl.py index 7eba681..584205e 100644 --- a/vllm_ascend/models/qwen2_vl.py +++ b/vllm_ascend/models/qwen2_vl.py @@ -86,7 +86,7 @@ class CustomQwen2VisionAttention(Qwen2VisionAttention): context_layer = torch.torch.empty_like(q) - # operator requires pta version >= 2.5.1.dev20250226 + # operator requires pta version >= 2.5.1 torch_npu._npu_flash_attention_unpad( query=q, key=k,