diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 0719305..d3abb6b 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -39,25 +39,17 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Prepare - run: | - REPO_OWNER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') - echo "REPO_OWNER=${REPO_OWNER}" >> "$GITHUB_ENV" - - name: Print run: | - echo "REPO_OWNER:""${REPO_OWNER}" + lscpu - name: Docker meta id: meta uses: docker/metadata-action@v5 with: # TODO(yikun): add more hub image and a note on release policy for container image - # The REPO_OWNER will be: - # - `vllm-project` in usptream repo - # - lowercase github user in your fork repo images: | - ghcr.io/${{ env.REPO_OWNER }}/vllm-ascend + quay.io/ascend/vllm-ascend # Note for test case # https://github.com/marketplace/actions/docker-metadata-action#typeref # 1. branch job pulish per main/*-dev branch commits @@ -87,12 +79,13 @@ jobs: - name: Build - Set up Docker Buildx uses: docker/setup-buildx-action@v2 - - name: Publish - Login to GitHub Container Registry - uses: docker/login-action@v2 + - name: Publish - Login to Quay Container Registry + if: ${{ github.event_name == 'push' }} + uses: docker/login-action@v3 with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} + registry: quay.io + username: ${{ vars.QUAY_USERNAME }} + password: ${{ secrets.QUAY_PASSWORD }} - name: Build and push uses: docker/build-push-action@v6 @@ -104,3 +97,5 @@ jobs: push: ${{ github.event_name != 'pull_request' }} labels: ${{ steps.meta.outputs.labels }} tags: ${{ steps.meta.outputs.tags }} + build-args: | + PIP_INDEX_URL=https://pypi.org/simple diff --git a/Dockerfile b/Dockerfile index 5a77b71..9b4c075 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,6 +17,8 @@ FROM quay.io/ascend/cann:8.0.0-910b-ubuntu22.04-py3.10 +ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" + # Define environments ENV DEBIAN_FRONTEND=noninteractive @@ -29,7 +31,7 @@ WORKDIR /workspace COPY . /workspace/vllm-ascend/ -RUN pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple +RUN pip config set global.index-url ${PIP_INDEX_URL} # Install vLLM main ARG VLLM_REPO=https://github.com/vllm-project/vllm.git @@ -37,7 +39,7 @@ RUN git clone --depth 1 $VLLM_REPO /workspace/vllm RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install /workspace/vllm/ # Install vllm-ascend main -RUN python3 -m pip install /workspace/vllm-ascend/ -f https://download.pytorch.org/whl/torch/ +RUN python3 -m pip install /workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ # Install modelscope RUN python3 -m pip install modelscope diff --git a/docs/source/conf.py b/docs/source/conf.py index 4fc3809..393e674 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -68,7 +68,7 @@ myst_substitutions = { 'vllm_ascend_version': 'main', # the newest release version of vllm, used in quick start or container image tag. # This value should be updated when cut down release. - 'vllm_newest_release_version': "v0.7.1.rc1", + 'vllm_newest_release_version': "v0.7.1rc1", } # Add any paths that contain templates here, relative to this directory. diff --git a/docs/source/developer_guide/versioning_policy.md b/docs/source/developer_guide/versioning_policy.md index 7bde225..a290b25 100644 --- a/docs/source/developer_guide/versioning_policy.md +++ b/docs/source/developer_guide/versioning_policy.md @@ -51,7 +51,7 @@ Following is the Release Compatibility Matrix for vLLM Ascend Plugin: | vllm-ascend | vLLM | Python | Stable CANN | PyTorch/torch_npu | |--------------|--------------| --- | --- | --- | -| v0.7.1.rc1 | v0.7.1 | 3.9 - 3.12 | 8.0.0 | 2.5.1 / 2.5.1.dev20250218 | +| v0.7.1rc1 | v0.7.1 | 3.9 - 3.12 | 8.0.0 | 2.5.1 / 2.5.1.dev20250218 | ## Release cadence diff --git a/docs/source/installation.md b/docs/source/installation.md index 937ae68..ea0c26c 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -119,10 +119,10 @@ You can install `vllm` and `vllm-ascend` from **pre-built wheel**: # It'll be fixed in the next vllm release, e.g. v0.7.3. git clone --branch v0.7.1 https://github.com/vllm-project/vllm cd vllm -VLLM_TARGET_DEVICE=empty pip install . -f https://download.pytorch.org/whl/torch/ +VLLM_TARGET_DEVICE=empty pip install . --extra-index https://download.pytorch.org/whl/cpu/ # Install vllm-ascend from pypi. -pip install vllm-ascend -f https://download.pytorch.org/whl/torch/ +pip install vllm-ascend --extra-index https://download.pytorch.org/whl/cpu/ # Once the packages are installed, you need to install `torch-npu` manually, # because that vllm-ascend relies on an unreleased version of torch-npu. @@ -148,11 +148,11 @@ or build from **source code**: git clone --depth 1 --branch |vllm_version| https://github.com/vllm-project/vllm cd vllm -VLLM_TARGET_DEVICE=empty pip install . -f https://download.pytorch.org/whl/torch/ +VLLM_TARGET_DEVICE=empty pip install . --extra-index https://download.pytorch.org/whl/cpu/ git clone --depth 1 --branch |vllm_ascend_version| https://github.com/vllm-project/vllm-ascend.git cd vllm-ascend -pip install -e . -f https://download.pytorch.org/whl/torch/ +pip install -e . --extra-index https://download.pytorch.org/whl/cpu/ ``` :::: diff --git a/docs/source/quick_start.md b/docs/source/quick_start.md index 311f6e4..d3a7fd2 100644 --- a/docs/source/quick_start.md +++ b/docs/source/quick_start.md @@ -12,7 +12,7 @@ :substitutions: # You can change version a suitable one base on your requirement, e.g. main -export IMAGE=ghcr.io/vllm-project/vllm-ascend:|vllm_newest_release_version| +export IMAGE=quay.io/ascend:|vllm_newest_release_version| docker run \ --name vllm-ascend \ diff --git a/docs/source/tutorials.md b/docs/source/tutorials.md index 9bd9cb6..d400855 100644 --- a/docs/source/tutorials.md +++ b/docs/source/tutorials.md @@ -20,7 +20,7 @@ docker run \ -v /etc/ascend_install.info:/etc/ascend_install.info \ -v /root/.cache:/root/.cache \ -p 8000:8000 \ --it ghcr.io/vllm-project/vllm-ascend:v0.7.1.rc1 bash +-it quay.io/ascend/vllm-ascend:v0.7.1rc1 bash ``` Setup environment variables: @@ -83,7 +83,7 @@ docker run \ -p 8000:8000 \ -e VLLM_USE_MODELSCOPE=True \ -e PYTORCH_NPU_ALLOC_CONF=max_split_size_mb:256 \ --it ghcr.io/vllm-project/vllm-ascend:v0.7.1.rc1 \ +-it quay.io/ascend/vllm-ascend:v0.7.1rc1 \ vllm serve Qwen/Qwen2.5-7B-Instruct --max_model_len 26240 ``` @@ -146,7 +146,7 @@ docker run \ -v /etc/ascend_install.info:/etc/ascend_install.info \ -v /root/.cache:/root/.cache \ -p 8000:8000 \ --it ghcr.io/vllm-project/vllm-ascend:v0.7.1.rc1 bash +-it quay.io/ascend/vllm-ascend:v0.7.1rc1 bash ``` Setup environment variables: diff --git a/docs/source/user_guide/release_notes.md b/docs/source/user_guide/release_notes.md index 4e7d0fa..28849f6 100644 --- a/docs/source/user_guide/release_notes.md +++ b/docs/source/user_guide/release_notes.md @@ -1,6 +1,6 @@ # Release note -## v0.7.1.rc1 +## v0.7.1rc1 🎉 Hello, World! @@ -29,6 +29,6 @@ Please follow the [official doc](https://vllm-ascend.readthedocs.io/en/v0.7.1rc1 ### Known issues -- This release relies on an unreleased torch_npu version. It has been installed within official container image already. Please [install](https://vllm-ascend.readthedocs.io/en/v0.7.1.rc1/installation.html) it manually if you are using non-container environment. +- This release relies on an unreleased torch_npu version. It has been installed within official container image already. Please [install](https://vllm-ascend.readthedocs.io/en/v0.7.1rc1/installation.html) it manually if you are using non-container environment. - There are logs like `No platform deteced, vLLM is running on UnspecifiedPlatform` or `Failed to import from vllm._C with ModuleNotFoundError("No module named 'vllm._C'")` shown when runing vllm-ascend. It actually doesn't affect any functionality and performance. You can just ignore it. And it has been fixed in this [PR](https://github.com/vllm-project/vllm/pull/12432) which will be included in v0.7.3 soon. - There are logs like `# CPU blocks: 35064, # CPU blocks: 2730` shown when runing vllm-ascend which should be `# NPU blocks:` . It actually doesn't affect any functionality and performance. You can just ignore it. And it has been fixed in this [PR](https://github.com/vllm-project/vllm/pull/13378) which will be included in v0.7.3 soon.