vllm-ascend vnpu v1

This commit is contained in:
starkwj
2025-12-26 07:37:35 +00:00
parent 2f1aed98cc
commit 135cc0a505
168 changed files with 28337 additions and 9 deletions

View File

@@ -0,0 +1,45 @@
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
#
ARG PY_VERSION=3.11
FROM quay.io/ascend/manylinux:8.2.rc1-910b-manylinux_2_28-py${PY_VERSION}
ARG COMPILE_CUSTOM_KERNELS=1
# Define environments
ENV DEBIAN_FRONTEND=noninteractive
ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
RUN yum update -y && \
yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
rm -rf /var/cache/yum
WORKDIR /workspace
COPY . /workspace/vllm-ascend/
# Install req
RUN python3 -m pip install -r vllm-ascend/requirements.txt --extra-index https://download.pytorch.org/whl/cpu/ && \
python3 -m pip install twine
# Install vllm-ascend
RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
source /usr/local/Ascend/nnal/atb/set_env.sh && \
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
cd vllm-ascend && \
python3 setup.py bdist_wheel && \
ls -l dist
CMD ["/bin/bash"]

View File

@@ -0,0 +1,22 @@
name: 📚 Documentation
description: Report an issue related to https://vllm-ascend.readthedocs.org
title: "[Doc]: "
labels: ["documentation"]
body:
- type: textarea
attributes:
label: 📚 The doc issue
description: >
A clear and concise description of what content in https://vllm-ascend.readthedocs.org/ is an issue.
validations:
required: true
- type: textarea
attributes:
label: Suggest a potential alternative/fix
description: >
Tell us how we could improve the documentation in this regard.
- type: markdown
attributes:
value: >
Thanks for contributing 🎉!

View File

@@ -0,0 +1,37 @@
name: 📚 User Story
description: Apply for an user story to be displayed on https://vllm-ascend.readthedocs.io/en/latest/community/user_stories/index.html
title: "[User Story]: "
labels: ["user-story"]
body:
- type: textarea
attributes:
label: 📚 Title
description: >
A clear title about what your user story is about.
validations:
required: true
- type: textarea
attributes:
label: About / Introduction
description: >
A brief introduction about the background of your use case, like your scenario, hardware size etc.
- type: textarea
attributes:
label: Bussiness Challenges
description: >
Tell us how what kind of challenge you faced in this user story.
- type: textarea
attributes:
label: Solving challenges with vLLM Ascend and benefits
description: >
Tell us how vLLM Ascend helped you overcome the challenges, including details like how you use it, what version you used, hardware info, etc. And what kind of benefit do you get from using vLLM Ascend
- type: textarea
attributes:
label: Extra Info
description: >
Any extra infomation you want to include in this story
- type: markdown
attributes:
value: >
Thanks for contributing 🎉!

View File

@@ -0,0 +1,42 @@
name: 🛠️ Installation
description: Report an issue here when you hit errors during installation.
title: "[Installation]: "
labels: ["installation"]
body:
- type: markdown
attributes:
value: >
#### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/vllm-project/vllm-ascend/issues?q=is%3Aissue+sort%3Acreated-desc+).
- type: textarea
attributes:
label: Your current environment
description: |
Please run the following and paste the output below.
```sh
npu-smi info
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
wget https://raw.githubusercontent.com/vllm-project/vllm/main/collect_env.py
# For security purposes, please feel free to check the contents of collect_env.py before running it.
python collect_env.py
```
It is suggested to download and execute the latest script, as vllm might frequently update the diagnosis information needed for accurately and quickly responding to issues.
value: |
```text
The output of `python collect_env.py`
```
validations:
required: true
- type: textarea
attributes:
label: How you are installing vllm and vllm-ascend
description: |
Paste the full command you are trying to execute.
value: |
```sh
pip install -vvv vllm vllm-ascend
```
- type: markdown
attributes:
value: >
Thanks for contributing 🎉!

View File

@@ -0,0 +1,40 @@
name: 💻 Usage
description: Raise an issue here if you don't know how to use vllm on Ascend.
title: "[Usage]: "
labels: ["usage"]
body:
- type: markdown
attributes:
value: >
#### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/vllm-project/vllm-ascend/issues?q=is%3Aissue+sort%3Acreated-desc+).
- type: textarea
attributes:
label: Your current environment
description: |
Please run the following and paste the output below.
```sh
npu-smi info
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
wget https://raw.githubusercontent.com/vllm-project/vllm/main/collect_env.py
# For security purposes, please feel free to check the contents of collect_env.py before running it.
python collect_env.py
```
It is suggested to download and execute the latest script, as vllm might frequently update the diagnosis information needed for accurately and quickly responding to issues.
value: |
```text
The output of above commands
```
validations:
required: true
- type: textarea
attributes:
label: How would you like to use vllm on ascend
description: |
A detailed description of how you want to use vllm on ascend.
value: |
I want to run inference of a [specific model](put link here). I don't know how to integrate it with vllm.
- type: markdown
attributes:
value: >
Thanks for contributing 🎉!

View File

@@ -0,0 +1,81 @@
name: 🐛 Bug report
description: Raise an issue here if you find a bug.
title: "[Bug]: "
labels: ["bug"]
body:
- type: markdown
attributes:
value: >
#### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/vllm-project/vllm-ascend/issues?q=is%3Aissue+sort%3Acreated-desc+).
- type: textarea
attributes:
label: Your current environment
description: |
Please run the following and paste the output below.
```sh
wget https://raw.githubusercontent.com/vllm-project/vllm-ascend/main/collect_env.py
# For security purposes, please feel free to check the contents of collect_env.py before running it.
python collect_env.py
```
It is suggested to download and execute the latest script, as vllm might frequently update the diagnosis information needed for accurately and quickly responding to issues.
value: |
<details>
<summary>The output of `python collect_env.py`</summary>
```text
Your output of above commands here
```
</details>
validations:
required: true
- type: textarea
attributes:
label: 🐛 Describe the bug
description: |
Please provide a clear and concise description of what the bug is.
If relevant, add a minimal example so that we can reproduce the error by running the code. It is very important for the snippet to be as succinct (minimal) as possible, so please take time to trim down any irrelevant code to help us debug efficiently. We are going to copy-paste your code and we expect to get the same result as you did: avoid any external data, and include the relevant imports, etc. For example:
```python
from vllm import LLM, SamplingParams
prompts = [
"Hello, my name is",
"The president of the United States is",
"The capital of France is",
"The future of AI is",
]
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
llm = LLM(model="facebook/opt-125m")
outputs = llm.generate(prompts, sampling_params)
# Print the outputs.
for output in outputs:
prompt = output.prompt
generated_text = output.outputs[0].text
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
```
If the code is too long (hopefully, it isn't), feel free to put it in a public gist and link it in the issue: https://gist.github.com.
Please also paste or describe the results you observe instead of the expected results. If you observe an error, please paste the error message including the **full** traceback of the exception. It may be relevant to wrap error messages in ```` ```triple quotes blocks``` ````.
Please set the environment variable `export VLLM_LOGGING_LEVEL=DEBUG` to turn on more logging to help debugging potential issues.
If you experienced crashes or hangs, it would be helpful to run vllm with `export VLLM_TRACE_FUNCTION=1` . All the function calls in vllm will be recorded. Inspect these log files, and tell which function crashes or hangs.
placeholder: |
A clear and concise description of what the bug is.
```python
# Sample code to reproduce the problem
```
```
The error message you got, with the full traceback.
```
validations:
required: true

View File

@@ -0,0 +1,31 @@
name: 🚀 Feature request
description: Submit a proposal/request for a new vllm-ascend feature
title: "[Feature]: "
labels: ["feature request"]
body:
- type: markdown
attributes:
value: >
#### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/vllm-project/vllm-ascend/issues?q=is%3Aissue+sort%3Acreated-desc+).
- type: textarea
attributes:
label: 🚀 The feature, motivation and pitch
description: >
A clear and concise description of the feature proposal. Please outline the motivation for the proposal. Is your feature request related to a specific problem? e.g., *"I'm working on X and would like Y to be possible"*. If this is related to another GitHub issue, please link here too.
validations:
required: true
- type: textarea
attributes:
label: Alternatives
description: >
A description of any alternative solutions or features you've considered, if any.
- type: textarea
attributes:
label: Additional context
description: >
Add any other context or screenshots about the feature request.
- type: markdown
attributes:
value: >
Thanks for contributing 🎉!

View File

@@ -0,0 +1,33 @@
name: 🤗 Support request for new model supported from huggingface/modelscope/modelers on Ascend
description: Submit a proposal/request for a new model from huggingface/modelscope/modelers on Ascend
title: "[New Model]: "
labels: ["new model"]
body:
- type: markdown
attributes:
value: >
#### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/vllm-project/vllm-ascend/issues?q=is%3Aissue+sort%3Acreated-desc+).
#### We also highly recommend you read https://vllm-ascend.readthedocs.io/en/latest/user_guide/supported_models.html first to know which model already supported.
- type: textarea
attributes:
label: The model to consider.
description: >
A huggingface/modelscope/modelers url, pointing to the model, e.g. https://huggingface.co/openai-community/gpt2 .
validations:
required: true
- type: textarea
attributes:
label: The closest model vllm already supports.
description: >
Here is the list of models already supported by vllm: https://vllm-ascend.readthedocs.io/en/latest/user_guide/supported_models.html . Which model is the most similar to the model you want to add support for?
- type: textarea
attributes:
label: What's your difficulty of supporting the model you want?
description: >
For example, any new operators or new architecture?
- type: markdown
attributes:
value: >
Thanks for contributing 🎉!

View File

@@ -0,0 +1,54 @@
name: ⚡ Discussion on the performance of vllm-ascend
description: Submit a proposal/discussion about the performance of vllm-ascend
title: "[Performance]: "
labels: ["performance"]
body:
- type: markdown
attributes:
value: >
#### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/vllm-project/vllm-ascend/issues?q=is%3Aissue+sort%3Acreated-desc+).
- type: textarea
attributes:
label: Proposal to improve performance
description: >
How do you plan to improve vllm-ascend's performance?
validations:
required: false
- type: textarea
attributes:
label: Report of performance regression
description: >
Please provide detailed description of performance comparison to confirm the regression. You may want to run the benchmark script at https://github.com/vllm-project/vllm/tree/main/benchmarks .
validations:
required: false
- type: textarea
attributes:
label: Misc discussion on performance
description: >
Anything about the performance.
validations:
required: false
- type: textarea
attributes:
label: Your current environment (if you think it is necessary)
description: |
Please run the following and paste the output below.
```sh
npu-smi info
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
wget https://raw.githubusercontent.com/vllm-project/vllm/main/collect_env.py
# For security purposes, please feel free to check the contents of collect_env.py before running it.
python collect_env.py
```
It is suggested to download and execute the latest script, as vllm might frequently update the diagnosis information needed for accurately and quickly responding to issues.
value: |
```text
The output of `python collect_env.py`
```
validations:
required: false
- type: markdown
attributes:
value: >
Thanks for contributing 🎉!

View File

@@ -0,0 +1,49 @@
name: 💬 Request for comments (RFC).
description: Ask for feedback on major architectural changes or design choices.
title: "[RFC]: "
labels: ["RFC"]
body:
- type: markdown
attributes:
value: >
#### Please take a look at previous [RFCs](https://github.com/vllm-project/vllm-ascend/issues?q=label%3ARFC+sort%3Aupdated-desc) for reference.
- type: textarea
attributes:
label: Motivation.
description: >
The motivation of the RFC.
validations:
required: true
- type: textarea
attributes:
label: Proposed Change.
description: >
The proposed change of the RFC.
validations:
required: true
- type: textarea
attributes:
label: Feedback Period.
description: >
The feedback period of the RFC. Usually at least one week.
validations:
required: false
- type: textarea
attributes:
label: CC List.
description: >
The list of people you want to CC.
validations:
required: false
- type: textarea
attributes:
label: Any Other Things.
description: >
Any other things you would like to mention, such as feature branch request.
validations:
required: false
- type: markdown
attributes:
value: >
Thanks for contributing 🎉!

View File

@@ -0,0 +1,21 @@
name: 🎲 Others
description: Submit a discussion as you like. Note that developers are heavily overloaded and we mainly rely on community users to answer these issues.
title: "[Misc]: "
labels: ["misc"]
body:
- type: markdown
attributes:
value: >
#### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/vllm-project/vllm-ascend/issues?q=is%3Aissue+sort%3Acreated-desc+).
- type: textarea
attributes:
label: Anything you want to discuss about vllm on ascend.
description: >
Anything you want to discuss about vllm on ascend.
validations:
required: true
- type: markdown
attributes:
value: >
Thanks for contributing 🎉!

View File

@@ -0,0 +1,104 @@
name: Release Checklist
description: Generate a release checklist issue when prepare a new release.(Used for release team)
title: "[Release]: Release checklist for v"
body:
- type: textarea
attributes:
description: >
Brief info for the new release.
label: Release Checklist
value: >
**Release Version**:
**Release Branch**:
**Release Date**:
**Release Manager**:
- type: textarea
attributes:
description: >
Release notes.
label: Prepare Release Note
value: >
- [ ] Create a new issue for release feedback
- [ ] Upgrade vllm version to the new version for CI and Dockerfile
- [ ] Write the release note PR.
- [ ] Update the feedback issue link in docs/source/faqs.md
- [ ] Add release note to docs/source/user_guide/release_notes.md
- [ ] Update release version in README.md and README.zh.md
- [ ] Update version info in docs/source/community/versioning_policy.md
- [ ] Update contributor info in docs/source/community/contributors.md
- [ ] Update package version in docs/conf.py
- type: textarea
attributes:
description: >
Make sure the code is merged.
label: PR need Merge
value: >
- [ ] PR link1
- [ ] PR link2
- [ ] ...
- type: textarea
attributes:
description: >
Make sure the new Feature/Function is tested
label: Functional Test
value: >
- [ ] Feature1
- [ ] Bug1
- [ ] ...
- type: textarea
attributes:
description: >
Make sure the doc is updated.
label: Doc Test
value: >
- [ ] Tutorial is updated.
- [ ] User Guide is updated.
- [ ] Developer Guide is updated.
- type: textarea
attributes:
description: >
Make sure the artifacts is ready
label: Prepare Artifacts
value: >
- [ ] Docker image is ready.
- [ ] Wheel package is ready.
- type: textarea
attributes:
description: >
Start to release.
label: Release Step
value: >
- [ ] Release note PR is merged.
- [ ] Post the release on GitHub release page.
- [ ] Generate official doc page on https://app.readthedocs.org/dashboard/
- [ ] Wait for the wheel package to be available on https://pypi.org/project/vllm-ascend
- [ ] Wait for the docker image to be available on https://quay.io/ascend/vllm-ascend
- [ ] Upload 310p wheel to Github release page
- [ ] Broadcast the release news (By message, blog , etc)
- [ ] Close this issue

View File

@@ -0,0 +1 @@
blank_issues_enabled: false

View File

@@ -0,0 +1,27 @@
<!-- Thanks for sending a pull request!
BEFORE SUBMITTING, PLEASE READ https://docs.vllm.ai/en/latest/contributing/overview.html
-->
### What this PR does / why we need it?
<!--
- Please clarify what changes you are proposing. The purpose of this section is to outline the changes and how this PR fixes the issue.
If possible, please consider writing useful notes for better and faster reviews in your PR.
- Please clarify why the changes are needed. For instance, the use case and bug description.
- Fixes #
-->
### Does this PR introduce _any_ user-facing change?
<!--
Note that it means *any* user-facing change including all aspects such as API, interface or other behavior changes.
Documentation-only updates are not considered user-facing changes.
-->
### How was this patch tested?
<!--
CI passed with new added/existing test.
If it was tested in a way different from regular unit tests, please clarify how you tested step by step, ideally copy and paste-able, so that other reviewers can test and check, and descendants can verify in the future.
If tests were not added, please describe why they were not added and/or why it was difficult to add.
-->

View File

@@ -0,0 +1,21 @@
self-hosted-runner:
# Labels of self-hosted runner in array of strings.
labels:
- linux-aarch64-a2-0
- linux-aarch64-a2-1
- linux-aarch64-a2-2
- linux-aarch64-a2-4
- linux-aarch64-a2-8
- linux-arm64-npu-static-8
- linux-aarch64-310p-1
- linux-aarch64-310p-2
- linux-aarch64-310p-4
- ubuntu-24.04-arm
- linux-aarch64-a3-1
- linux-aarch64-a3-2
- linux-aarch64-a3-4
- linux-aarch64-a3-8
- linux-amd64-cpu-0
- linux-amd64-cpu-8
- linux-amd64-cpu-16
- linux-aarch64-a3-0

View File

@@ -0,0 +1,10 @@
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
# Check for updates to GitHub Actions every week
interval: "weekly"
open-pull-requests-limit: 2
reviewers:
- "Yikun"

View File

@@ -0,0 +1,59 @@
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
# Adapted from vllm/.github/scripts/cleanup_pr_body.sh
#!/bin/bash
set -eux
# ensure 2 argument is passed
if [ "$#" -ne 3 ]; then
echo "Usage: $0 <pr_number> <vllm_version> <vllm_commit>"
exit 1
fi
PR_NUMBER=$1
VLLM_VERSION=$2
VLLM_COMMIT=$3
OLD=/tmp/orig_pr_body.txt
NEW=/tmp/new_pr_body.txt
FINAL=/tmp/final_pr_body.txt
gh pr view --json body --template "{{.body}}" "${PR_NUMBER}" > "${OLD}"
cp "${OLD}" "${NEW}"
# Remove notes in pr description and add vLLM version and commit
sed -i '/<!--/,/-->/d' "${NEW}"
sed -i '/- vLLM .*$/d' "${NEW}"
{
echo ""
echo "- vLLM version: $VLLM_VERSION"
echo "- vLLM main: $VLLM_COMMIT"
} >> "${NEW}"
# Remove redundant empty lines
uniq "${NEW}" > "${FINAL}"
# Run this only if ${NEW} is different than ${OLD}
if ! cmp -s "${OLD}" "${FINAL}"; then
echo
echo "Updating PR body:"
echo
cat "${NEW}"
gh pr edit --body-file "${FINAL}" "${PR_NUMBER}"
else
echo "No changes needed"
fi

View File

@@ -0,0 +1,38 @@
---
documentation:
- changed-files:
- any-glob-to-any-file:
- 'docs/**'
- '**/*.md'
ci/build:
- changed-files:
- any-glob-to-any-file:
- '.github/actions/*.yml'
- '.github/workflows/*.yml'
'module:tests':
- changed-files:
- any-glob-to-any-file:
- 'tests/**'
'module:tools':
- changed-files:
- any-glob-to-any-file:
- 'tools/**'
'module:ops':
- changed-files:
- any-glob-to-any-file:
- 'vllm_ascend/ops/**'
'module:quantization':
- changed-files:
- any-glob-to-any-file:
- 'vllm_ascend/quantization/**'
'module:core':
- changed-files:
- any-glob-to-any-file:
- 'vllm_ascend/*.py'

View File

@@ -0,0 +1,175 @@
name: 'accuracy test'
on:
workflow_call:
inputs:
vllm:
required: true
type: string
vllm-ascend:
required: false
type: string
default: main
runner:
required: true
type: string
image:
required: true
type: string
model_name:
required: true
type: string
upload:
required: false
type: boolean
default: false
jobs:
accuracy_tests:
runs-on: ${{ inputs.runner }}
name: ${{ inputs.model_name }} accuracy
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
env:
VLLM_USE_MODELSCOPE: True
# 1. If version specified (work_dispatch), do specified branch accuracy test
# 2. If no version (labeled PR), do accuracy test by default ref:
# The branch, tag or SHA to checkout. When checking out the repository that
# triggered a workflow, this defaults to the reference or SHA for that event.
# Otherwise, uses the default branch.
GHA_VLLM_ASCEND_VERSION: ${{ inputs.vllm-ascend }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set model name as output
id: set_output
run: |
echo "model_name=${{ inputs.model_name }}" >> $GITHUB_OUTPUT
- name: Config mirrors
run: |
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
apt-get update -y
apt install git -y
- name: Install system dependencies
run: |
apt-get -y install `cat packages.txt`
apt-get -y install gcc g++ cmake libnuma-dev
- name: Checkout vllm-project/vllm repo
uses: actions/checkout@v4
with:
repository: vllm-project/vllm
ref: ${{ inputs.vllm }}
path: ./vllm-empty
- name: Install vllm-project/vllm from source
working-directory: ./vllm-empty
run: |
VLLM_TARGET_DEVICE=empty pip install -e .
- name: Resolve vllm-ascend version
run: |
VERSION_INPUT="${{ inputs.vllm-ascend }}"
if [[ "$VERSION_INPUT" == "latest" ]]; then
TAGS=$(git ls-remote --tags --sort=-v:refname https://github.com/vllm-project/vllm-ascend "v*" | cut -f2 | sed 's|refs/tags/||')
LATEST_TAG=$(echo "$TAGS" | head -n1)
if [[ -z "$LATEST_TAG" ]]; then
RESOLVED_VERSION="main"
else
RESOLVED_VERSION="$LATEST_TAG"
fi
else
RESOLVED_VERSION="$VERSION_INPUT"
fi
echo "GHA_VLLM_ASCEND_VERSION=$RESOLVED_VERSION" >> $GITHUB_ENV
- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v4
with:
repository: vllm-project/vllm-ascend
path: ./vllm-ascend
ref: ${{ env.GHA_VLLM_ASCEND_VERSION }}
- name: Install vllm-project/vllm-ascend
working-directory: ./vllm-ascend
env:
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
run: |
pip install -r requirements-dev.txt
pip install -v -e .
- name: Get vLLM commit hash and URL
working-directory: ./vllm-empty
run: |
VLLM_COMMIT=$(git rev-parse --short=7 HEAD)
echo "VLLM_COMMIT=$VLLM_COMMIT" >> $GITHUB_ENV
- name: Get vLLM-Ascend commit hash and URL
working-directory: ./vllm-ascend
run: |
VLLM_ASCEND_COMMIT=$(git rev-parse --short=7 HEAD)
echo "VLLM_ASCEND_COMMIT=$VLLM_ASCEND_COMMIT" >> $GITHUB_ENV
- name: Collect version info
run: |
for dir in /usr/local/Ascend/ascend-toolkit/*; do
dname=$(basename "$dir")
if [ "$dname" != "latest" ]; then
TOOLKIT_DIR="$dname"
break
fi
done
INFO_FILE="/usr/local/Ascend/ascend-toolkit/${TOOLKIT_DIR}/$(uname -i)-linux/ascend_toolkit_install.info"
GHA_CANN_VERSION=$(grep "version=" "$INFO_FILE" \
| head -n1 \
| cut -d'=' -f2 \
| tr -d '"')
{
echo "GHA_CANN_VERSION=$GHA_CANN_VERSION"
pip show torch | grep "Version:" | awk '{print "GHA_TORCH_VERSION="$2}'
pip show torch_npu | grep "Version:" | awk '{print "GHA_TORCH_NPU_VERSION="$2}'
pip show vllm | grep "Version:" | awk '{print "GHA_VLLM_VERSION="$2}' | sed 's/+.*//'
} >> "$GITHUB_ENV"
- name: Run accuracy test
id: report
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
VLLM_USE_MODELSCOPE: True
VLLM_VERSION: ${{ env.GHA_VLLM_VERSION }}
VLLM_COMMIT: ${{ env.VLLM_COMMIT }}
VLLM_ASCEND_VERSION: ${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}
VLLM_ASCEND_COMMIT: ${{ env.VLLM_ASCEND_COMMIT }}
CANN_VERSION: ${{ env.GHA_CANN_VERSION }}
TORCH_VERSION: ${{ env.GHA_TORCH_VERSION }}
TORCH_NPU_VERSION: ${{ env.GHA_TORCH_NPU_VERSION }}
run: |
model_base_name=$(basename ${{ inputs.model_name }})
markdown_name="${model_base_name}"
echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
mkdir -p ./benchmarks/accuracy
pytest -sv ./tests/e2e/models/test_lm_eval_correctness.py \
--config ./tests/e2e/models/configs/${{ inputs.model_name }}.yaml
- name: Generate step summary
if: ${{ always() }}
run: |
cat ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md >> $GITHUB_STEP_SUMMARY
- name: Upload Report
if: ${{ inputs.upload == true }}
uses: actions/upload-artifact@v4
with:
name: "report-${{ env.GHA_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}"
path: ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md
if-no-files-found: warn
retention-days: 90
overwrite: true

View File

@@ -0,0 +1,199 @@
name: 'e2e test'
on:
workflow_call:
inputs:
vllm:
required: true
type: string
runner:
required: true
type: string
image:
required: true
type: string
type:
required: true
type: string
jobs:
e2e:
name: singlecard
runs-on: ${{ inputs.runner }}-1
container:
image: ${{ inputs.image }}
env:
VLLM_LOGGING_LEVEL: ERROR
VLLM_USE_MODELSCOPE: True
steps:
- name: Check npu and CANN info
run: |
npu-smi info
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
- name: Config mirrors
run: |
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
apt-get update -y
apt install git -y
- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v4
- name: Install system dependencies
run: |
apt-get -y install `cat packages.txt`
apt-get -y install gcc g++ cmake libnuma-dev
- name: Checkout vllm-project/vllm repo
uses: actions/checkout@v4
with:
repository: vllm-project/vllm
ref: ${{ inputs.vllm }}
path: ./vllm-empty
fetch-depth: 1
- name: Install vllm-project/vllm from source
working-directory: ./vllm-empty
run: |
VLLM_TARGET_DEVICE=empty pip install -e .
- name: Install vllm-project/vllm-ascend
env:
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
run: |
pip install -r requirements-dev.txt
pip install -v -e .
- name: Run vllm-project/vllm-ascend test
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
VLLM_USE_MODELSCOPE: True
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
if: ${{ inputs.type == 'light' }}
run: |
pytest -sv tests/e2e/singlecard/test_aclgraph.py
pytest -sv tests/e2e/singlecard/test_quantization.py
pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
- name: Run e2e test
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
VLLM_USE_MODELSCOPE: True
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
if: ${{ inputs.type == 'full' }}
run: |
# We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
# the test separately.
pytest -sv tests/e2e/singlecard/test_aclgraph.py
pytest -sv tests/e2e/singlecard/test_aclgraph_mem.py
pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py
pytest -sv tests/e2e/singlecard/test_bge_model.py
pytest -sv tests/e2e/singlecard/test_camem.py
pytest -sv tests/e2e/singlecard/test_chunked.py
pytest -sv tests/e2e/singlecard/test_embedding.py
pytest -sv tests/e2e/singlecard/test_embedding_aclgraph.py
pytest -sv tests/e2e/singlecard/test_guided_decoding.py
pytest -sv tests/e2e/singlecard/test_ilama_lora.py
pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
pytest -sv tests/e2e/singlecard/test_quantization.py
pytest -sv tests/e2e/singlecard/test_sampler.py
pytest -sv tests/e2e/singlecard/test_vlm.py
# ------------------------------------ v1 spec decode test ------------------------------------ #
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
# Fix me: test_eagle_correctness OOM error
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
pytest -sv tests/e2e/singlecard/ops/
e2e-2-cards:
name: multicard
runs-on: ${{ inputs.runner }}-2
container:
image: ${{ inputs.image }}
env:
VLLM_LOGGING_LEVEL: ERROR
VLLM_USE_MODELSCOPE: True
steps:
- name: Check npu and CANN info
run: |
npu-smi info
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
- name: Config mirrors
run: |
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
apt-get update -y
apt install git -y
- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v4
- name: Install system dependencies
run: |
apt-get -y install `cat packages.txt`
apt-get -y install gcc g++ cmake libnuma-dev
- name: Checkout vllm-project/vllm repo
uses: actions/checkout@v4
with:
repository: vllm-project/vllm
ref: ${{ inputs.vllm }}
path: ./vllm-empty
fetch-depth: 1
- name: Install vllm-project/vllm from source
working-directory: ./vllm-empty
run: |
VLLM_TARGET_DEVICE=empty pip install -e .
- name: Install vllm-project/vllm-ascend
env:
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
run: |
pip install -r requirements-dev.txt
pip install -v -e .
- name: Run vllm-project/vllm-ascend test (light)
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
VLLM_USE_MODELSCOPE: True
if: ${{ inputs.type == 'light' }}
run: |
pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
- name: Run vllm-project/vllm-ascend test (full)
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
VLLM_USE_MODELSCOPE: True
if: ${{ inputs.type == 'full' }}
run: |
pytest -sv tests/e2e/multicard/test_data_parallel.py
pytest -sv tests/e2e/multicard/test_expert_parallel.py
pytest -sv tests/e2e/multicard/test_external_launcher.py
pytest -sv tests/e2e/multicard/test_single_request_aclgraph.py
pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
# To avoid oom, we need to run the test in a single process.
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W8A8
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC_new_version
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC_old_version
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_flashcomm_v1
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight
pytest -sv tests/e2e/multicard/test_pipeline_parallel.py
pytest -sv tests/e2e/multicard/test_prefix_caching.py
pytest -sv tests/e2e/multicard/test_qwen3_moe.py
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py

View File

@@ -0,0 +1,72 @@
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
#
# This test will be triggered:
# - PR labeled with: 'accuracy-test' & 'ready-for-test'
name: ascend test / accuracy
on:
pull_request:
branches:
- 'main'
- '*-dev'
types: [ labeled, synchronize ]
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
# It's used to activate ascend-toolkit environment variables.
defaults:
run:
shell: bash -el {0}
# only cancel in-progress runs of the same workflow
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
run:
name: ""
strategy:
matrix:
# Only top series models should be listed in here
include:
- runner: a2-1
model_name: Qwen3-8B
- runner: a2-1
model_name: Qwen2.5-VL-7B-Instruct
- runner: a2-1
model_name: Qwen2-Audio-7B-Instruct
- runner: a2-2
model_name: Qwen3-30B-A3B
- runner: a2-2
model_name: Qwen3-VL-30B-A3B-Instruct
- runner: a2-2
model_name: DeepSeek-V2-Lite
fail-fast: false
# test will be triggered when tag 'accuracy-test' & 'ready-for-test'
if: >-
${{
contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
contains(github.event.pull_request.labels.*.name, 'ready-for-test')
}}
uses: ./.github/workflows/_accuracy_test.yaml
with:
vllm: v0.11.0
runner: linux-aarch64-${{ matrix.runner }}
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
model_name: ${{ matrix.model_name }}

View File

@@ -0,0 +1,57 @@
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
#
name: format / pr body
on:
# The PR updated when PR opened and push new commits
pull_request_target:
types: [opened, synchronize]
branches:
- 'main'
permissions:
pull-requests: write
jobs:
update-description:
name: update vLLM version
runs-on: ubuntu-latest
steps:
- name: Get vLLM version
run: |
VLLM_COMMIT=v0.11.0
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
- name: Checkout repository
uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
- name: Set up Python
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
- name: Get vLLM release version
run: |
VLLM_VERSION=$(python3 docs/source/conf.py | jq .ci_vllm_version | tr -d '"')
echo "VLLM_VERSION=$VLLM_VERSION" >> $GITHUB_ENV
- name: Update PR description
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
bash .github/format_pr_body.sh "${{ github.event.number }}" "${{ env.VLLM_VERSION }}" "${{ env.VLLM_COMMIT }}"

View File

@@ -0,0 +1,135 @@
name: 'image / openEuler / 310p'
# This is a docker build check and publish job:
# 1. PR Triggered docker image build check
# - is for image build check
# - Enable on main/*-dev branch
# - push: ${{ github.event_name != 'pull_request' }} ==> false
# 2. branches push trigger image publish
# - is for branch/dev/nightly image
# - commits are merge into main/*-dev ==> vllm-ascend:main-310p-openeuler / vllm-ascend:*-dev-310p-openeuler
# 3. tags push trigger image publish
# - is for final release image
# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3-310p-openeuler / vllm-ascend:v1.2.3rc1-310p-openeuler
on:
pull_request:
branches:
- 'main'
- '*-dev'
paths:
- '.github/workflows/image_310p_openeuler.yml'
- 'Dockerfile.310p.openEuler'
- 'vllm_ascend/**'
- 'setup.py'
- 'pyproject.toml'
- 'requirements.txt'
- 'cmake/**'
- 'CMakeLists.txt'
- 'csrc/**'
types: [ labeled ]
push:
# Publish image when tagging, the Dockerfile in tag will be build as tag image
branches:
- 'main'
- '*-dev'
tags:
- 'v*'
paths:
- '.github/workflows/image_310p_openeuler.yml'
- 'Dockerfile.310p.openEuler'
- 'vllm_ascend/**'
- 'setup.py'
- 'pyproject.toml'
- 'requirements.txt'
- 'cmake/**'
- 'CMakeLists.txt'
- 'csrc/**'
# only cancel in-progress runs of the same workflow
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
build:
name: vllm-ascend image build
# Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
# Push event or PR with both 'ready' and 'ready-for-test' labels
runs-on: >-
${{
github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
'ubuntu-latest' ||
'ubuntu-24.04-arm'
}}
if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Print
run: |
lscpu
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
# TODO(yikun): add more hub image and a note on release policy for container image
images: |
quay.io/ascend/vllm-ascend
# Note for test case
# https://github.com/marketplace/actions/docker-metadata-action#typeref
# 1. branch job pulish per main/*-dev branch commits
# 2. main and dev pull_request is build only, so the tag pr-N-310p-openeuler is fine
# 3. only pep440 matched tag will be published:
# - v0.7.1 --> v0.7.1-310p-openeuler
# - pre/post/dev: v0.7.1rc1-310p-openeuler/v0.7.1rc1-310p-openeuler/v0.7.1rc1.dev1-310p-openeuler/v0.7.1.post1-310p-openeuler, no latest
# which follow the rule from vLLM with prefix v
# TODO(yikun): the post release might be considered as latest release
tags: |
type=ref,event=branch,suffix=-310p-openeuler
type=ref,event=pr,suffix=-310p-openeuler
type=pep440,pattern={{raw}},suffix=-310p-openeuler
flavor:
latest=false
- name: Free up disk space
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
with:
tool-cache: true
docker-images: false
- name: Build - Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Build - Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Publish - Login to Quay Container Registry
if: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
uses: docker/login-action@v3
with:
registry: quay.io
username: ${{ vars.QUAY_USERNAME }}
password: ${{ secrets.QUAY_PASSWORD }}
- name: Build and push 310p
uses: docker/build-push-action@v6
with:
platforms: >-
${{
github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
'linux/amd64,linux/arm64' ||
'linux/arm64'
}}
# use the current repo path as the build context, ensure .git is contained
context: .
# only trigger when tag, branch/main push
push: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
labels: ${{ steps.meta.outputs.labels }}
tags: ${{ steps.meta.outputs.tags }}
file: Dockerfile.310p.openEuler
build-args: |
PIP_INDEX_URL=https://pypi.org/simple
provenance: false

View File

@@ -0,0 +1,131 @@
name: 'image / Ubuntu / 310p'
# This is a docker build check and publish job:
# 1. PR Triggered docker image build check
# - is for image build check
# - Enable on main/*-dev branch
# - push: ${{ github.event_name != 'pull_request' }} ==> false
# 2. branches push trigger image publish
# - is for branch/dev/nightly image
# - commits are merge into main/*-dev ==> vllm-ascend:main-310p / vllm-ascend:*-dev-310p
# 3. tags push trigger image publish
# - is for final release image
# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3-310p / vllm-ascend:v1.2.3rc1-310p
on:
pull_request:
branches:
- 'main'
- '*-dev'
paths:
- '.github/workflows/image_310p_ubuntu.yml'
- 'Dockerfile.310p'
- 'vllm_ascend/**'
- 'setup.py'
- 'pyproject.toml'
- 'requirements.txt'
- 'cmake/**'
- 'CMakeLists.txt'
- 'csrc/**'
types: [ labeled ]
push:
# Publish image when tagging, the Dockerfile in tag will be build as tag image
branches:
- 'main'
- '*-dev'
tags:
- 'v*'
paths:
- '.github/workflows/image_310p_ubuntu.yml'
- 'Dockerfile.310p'
- 'vllm_ascend/**'
- 'setup.py'
- 'pyproject.toml'
- 'requirements.txt'
- 'cmake/**'
- 'CMakeLists.txt'
- 'csrc/**'
# only cancel in-progress runs of the same workflow
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
build:
name: vllm-ascend image build
# Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
# Push event or PR with both 'ready' and 'ready-for-test' labels
runs-on: ubuntu-latest
if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Print
run: |
lscpu
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
# TODO(yikun): add more hub image and a note on release policy for container image
images: |
quay.io/ascend/vllm-ascend
# Note for test case
# https://github.com/marketplace/actions/docker-metadata-action#typeref
# 1. branch job pulish per main/*-dev branch commits
# 2. main and dev pull_request is build only, so the tag pr-N is fine
# 3. only pep440 matched tag will be published:
# - v0.7.1 --> v0.7.1-310p
# - pre/post/dev: v0.7.1rc1-310p/v0.7.1rc1-310p/v0.7.1rc1.dev1-310p/v0.7.1.post1-310p, no latest
# which follow the rule from vLLM with prefix v
# TODO(yikun): the post release might be considered as latest release
tags: |
type=ref,event=branch,suffix=-310p
type=ref,event=pr,suffix=-310p
type=pep440,pattern={{raw}},suffix=-310p
flavor:
latest=false
- name: Free up disk space
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
with:
tool-cache: true
docker-images: false
- name: Build - Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Build - Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Publish - Login to Quay Container Registry
if: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
uses: docker/login-action@v3
with:
registry: quay.io
username: ${{ vars.QUAY_USERNAME }}
password: ${{ secrets.QUAY_PASSWORD }}
- name: Build and push 310p
uses: docker/build-push-action@v6
with:
platforms: >-
${{
github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
'linux/amd64,linux/arm64' ||
'linux/amd64'
}}
# use the current repo path as the build context, ensure .git is contained
context: .
file: Dockerfile.310p
# only trigger when tag, branch/main push
push: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
labels: ${{ steps.meta.outputs.labels }}
tags: ${{ steps.meta.outputs.tags }}
build-args: |
PIP_INDEX_URL=https://pypi.org/simple
provenance: false

View File

@@ -0,0 +1,135 @@
name: 'image / openEuler / a3'
# This is a docker build check and publish job:
# 1. PR Triggered docker image build check
# - is for image build check
# - Enable on main/*-dev branch
# - push: ${{ github.event_name != 'pull_request' }} ==> false
# 2. branches push trigger image publish
# - is for branch/dev/nightly image
# - commits are merge into main/*-dev ==> vllm-ascend:main / vllm-ascend:*-dev
# 3. tags push trigger image publish
# - is for final release image
# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3-a3-openeuler / vllm-ascend:v1.2.3rc1-a3-openeuler
on:
pull_request:
branches:
- 'main'
- '*-dev'
paths:
- '.github/workflows/image_a3_openeuler.yml'
- 'Dockerfile.a3.openEuler'
- 'vllm_ascend/**'
- 'setup.py'
- 'pyproject.toml'
- 'requirements.txt'
- 'cmake/**'
- 'CMakeLists.txt'
- 'csrc/**'
types: [ labeled ]
push:
# Publish image when tagging, the Dockerfile in tag will be build as tag image
branches:
- 'main'
- '*-dev'
tags:
- 'v*'
paths:
- '.github/workflows/image_a3_openeuler.yml'
- 'Dockerfile.a3.openEuler'
- 'vllm_ascend/**'
- 'setup.py'
- 'pyproject.toml'
- 'requirements.txt'
- 'cmake/**'
- 'CMakeLists.txt'
- 'csrc/**'
# only cancel in-progress runs of the same workflow
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
build:
name: vllm-ascend image build
# Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
# Push event or PR with both 'ready' and 'ready-for-test' labels
runs-on: >-
${{
github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
'ubuntu-latest' ||
'ubuntu-24.04-arm'
}}
if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Print
run: |
lscpu
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
# TODO(yikun): add more hub image and a note on release policy for container image
images: |
quay.io/ascend/vllm-ascend
# Note for test case
# https://github.com/marketplace/actions/docker-metadata-action#typeref
# 1. branch job pulish per main/*-dev branch commits
# 2. main and dev pull_request is build only, so the tag pr-N-a3-openeuler is fine
# 3. only pep440 matched tag will be published:
# - v0.7.1 --> v0.7.1-a3-openeuler
# - pre/post/dev: v0.7.1rc1-a3-openeuler/v0.7.1rc1-a3-openeuler/v0.7.1rc1.dev1-a3-openeuler/v0.7.1.post1-a3-openeuler, no latest
# which follow the rule from vLLM with prefix v
# TODO(yikun): the post release might be considered as latest release
tags: |
type=ref,event=branch,suffix=-a3-openeuler
type=ref,event=pr,suffix=-a3-openeuler
type=pep440,pattern={{raw}},suffix=-a3-openeuler
flavor:
latest=false
- name: Free up disk space
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
with:
tool-cache: true
docker-images: false
- name: Build - Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Build - Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Publish - Login to Quay Container Registry
if: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
uses: docker/login-action@v3
with:
registry: quay.io
username: ${{ vars.QUAY_USERNAME }}
password: ${{ secrets.QUAY_PASSWORD }}
- name: Build and push a3
uses: docker/build-push-action@v6
with:
platforms: >-
${{
github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
'linux/amd64,linux/arm64' ||
'linux/arm64'
}}
# use the current repo path as the build context, ensure .git is contained
context: .
# only trigger when tag, branch/main push
push: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
labels: ${{ steps.meta.outputs.labels }}
tags: ${{ steps.meta.outputs.tags }}
file: Dockerfile.a3.openEuler
build-args: |
PIP_INDEX_URL=https://pypi.org/simple
provenance: false

View File

@@ -0,0 +1,131 @@
name: 'image / Ubuntu / a3'
# This is a docker build check and publish job:
# 1. PR Triggered docker image build check
# - is for image build check
# - Enable on main/*-dev branch
# - push: ${{ github.event_name != 'pull_request' }} ==> false
# 2. branches push trigger image publish
# - is for branch/dev/nightly image
# - commits are merge into main/*-dev ==> vllm-ascend:main / vllm-ascend:*-dev
# 3. tags push trigger image publish
# - is for final release image
# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3-a3|vllm-ascend:v1.2.3rc1-a3
on:
pull_request:
branches:
- 'main'
- '*-dev'
paths:
- '.github/workflows/image_a3_ubuntu.yml'
- 'Dockerfile.a3'
- 'vllm_ascend/**'
- 'setup.py'
- 'pyproject.toml'
- 'requirements.txt'
- 'cmake/**'
- 'CMakeLists.txt'
- 'csrc/**'
types: [ labeled ]
push:
# Publish image when tagging, the Dockerfile in tag will be build as tag image
branches:
- 'main'
- '*-dev'
tags:
- 'v*'
paths:
- '.github/workflows/image_a3_ubuntu.yml'
- 'Dockerfile.a3'
- 'vllm_ascend/**'
- 'setup.py'
- 'pyproject.toml'
- 'requirements.txt'
- 'cmake/**'
- 'CMakeLists.txt'
- 'csrc/**'
# only cancel in-progress runs of the same workflow
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
build:
name: vllm-ascend image build
# Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
# Push event or PR with both 'ready' and 'ready-for-test' labels
runs-on: ubuntu-latest
if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Print
run: |
lscpu
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
# TODO(yikun): add more hub image and a note on release policy for container image
images: |
quay.io/ascend/vllm-ascend
# Note for test case
# https://github.com/marketplace/actions/docker-metadata-action#typeref
# 1. branch job pulish per main/*-dev branch commits
# 2. main and dev pull_request is build only, so the tag pr-N-a3 is fine
# 3. only pep440 matched tag will be published:
# - v0.7.1 --> v0.7.1-a3
# - pre/post/dev: v0.7.1rc1-a3/v0.7.1rc1-a3/v0.7.1rc1.dev1-a3/v0.7.1.post1-a3, no latest
# which follow the rule from vLLM with prefix v
# TODO(yikun): the post release might be considered as latest release
tags: |
type=ref,event=branch,suffix=-a3
type=ref,event=pr,suffix=-a3
type=pep440,pattern={{raw}},suffix=-a3
flavor:
latest=false
- name: Free up disk space
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
with:
tool-cache: true
docker-images: false
- name: Build - Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Build - Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Publish - Login to Quay Container Registry
if: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
uses: docker/login-action@v3
with:
registry: quay.io
username: ${{ vars.QUAY_USERNAME }}
password: ${{ secrets.QUAY_PASSWORD }}
- name: Build and push a3
uses: docker/build-push-action@v6
with:
platforms: >-
${{
github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
'linux/amd64,linux/arm64' ||
'linux/amd64'
}}
# use the current repo path as the build context, ensure .git is contained
context: .
file: Dockerfile.a3
# only trigger when tag, branch/main push
push: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
labels: ${{ steps.meta.outputs.labels }}
tags: ${{ steps.meta.outputs.tags }}
build-args: |
PIP_INDEX_URL=https://pypi.org/simple
provenance: false

View File

@@ -0,0 +1,134 @@
name: 'image / openEuler'
# This is a docker build check and publish job:
# 1. PR Triggered docker image build check
# - is for image build check
# - Enable on main/*-dev branch
# - push: ${{ github.event_name != 'pull_request' }} ==> false
# 2. branches push trigger image publish
# - is for branch/dev/nightly image
# - commits are merge into main/*-dev ==> vllm-ascend:main-openeuler / vllm-ascend:*-dev-openeuler
# - is for final release image
# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3-openeuler / vllm-ascend:v1.2.3rc1-openeuler
on:
pull_request:
branches:
- 'main'
- '*-dev'
paths:
- '.github/workflows/image_openeuler.yml'
- 'Dockerfile.openEuler'
- 'vllm_ascend/**'
- 'setup.py'
- 'pyproject.toml'
- 'requirements.txt'
- 'cmake/**'
- 'CMakeLists.txt'
- 'csrc/**'
types: [ labeled ]
push:
# Publish image when tagging, the Dockerfile in tag will be build as tag image
branches:
- 'main'
- '*-dev'
tags:
- 'v*'
paths:
- '.github/workflows/image_openeuler.yml'
- 'Dockerfile.openEuler'
- 'vllm_ascend/**'
- 'setup.py'
- 'pyproject.toml'
- 'requirements.txt'
- 'cmake/**'
- 'CMakeLists.txt'
- 'csrc/**'
# only cancel in-progress runs of the same workflow
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
build:
name: vllm-ascend image build
# Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
# Push event or PR with both 'ready' and 'ready-for-test' labels
runs-on: >-
${{
github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
'ubuntu-latest' ||
'ubuntu-24.04-arm'
}}
if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Print
run: |
lscpu
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
# TODO(yikun): add more hub image and a note on release policy for container image
images: |
quay.io/ascend/vllm-ascend
# Note for test case
# https://github.com/marketplace/actions/docker-metadata-action#typeref
# 1. branch job pulish per main/*-dev branch commits
# 2. main and dev pull_request is build only, so the tag pr-N-openeuler is fine
# 3. only pep440 matched tag will be published:
# - v0.7.1 --> v0.7.1-openeuler
# - pre/post/dev: v0.7.1rc1-openeuler/v0.7.1rc1-openeuler/v0.7.1rc1.dev1-openeuler/v0.7.1.post1-openeuler, no latest
# which follow the rule from vLLM with prefix v
# TODO(yikun): the post release might be considered as latest release
tags: |
type=ref,event=branch,suffix=-openeuler
type=ref,event=pr,suffix=-openeuler
type=pep440,pattern={{raw}},suffix=-openeuler
flavor:
latest=true
- name: Free up disk space
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
with:
tool-cache: true
docker-images: false
- name: Build - Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Build - Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Publish - Login to Quay Container Registry
if: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
uses: docker/login-action@v3
with:
registry: quay.io
username: ${{ vars.QUAY_USERNAME }}
password: ${{ secrets.QUAY_PASSWORD }}
- name: Build and push 910b
uses: docker/build-push-action@v6
with:
platforms: >-
${{
github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
'linux/amd64,linux/arm64' ||
'linux/arm64'
}}
# use the current repo path as the build context, ensure .git is contained
context: .
# only trigger when tag, branch/main push
push: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
labels: ${{ steps.meta.outputs.labels }}
tags: ${{ steps.meta.outputs.tags }}
file: Dockerfile.openEuler
build-args: |
PIP_INDEX_URL=https://pypi.org/simple
provenance: false

View File

@@ -0,0 +1,131 @@
name: 'image / Ubuntu'
# This is a docker build check and publish job:
# 1. PR Triggered docker image build check
# - is for image build check
# - Enable on main/*-dev branch
# - push: ${{ github.event_name != 'pull_request' }} ==> false
# 2. branches push trigger image publish
# - is for branch/dev/nightly image
# - commits are merge into main/*-dev ==> vllm-ascend:main / vllm-ascend:*-dev
# 3. tags push trigger image publish
# - is for final release image
# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3 / vllm-ascend:v1.2.3rc1
on:
pull_request:
branches:
- 'main'
- '*-dev'
paths:
- '.github/workflows/image_ubuntu.yml'
- 'Dockerfile'
- 'vllm_ascend/**'
- 'setup.py'
- 'pyproject.toml'
- 'requirements.txt'
- 'cmake/**'
- 'CMakeLists.txt'
- 'csrc/**'
types: [ labeled ]
push:
# Publish image when tagging, the Dockerfile in tag will be build as tag image
branches:
- 'main'
- '*-dev'
tags:
- 'v*'
paths:
- '.github/workflows/image_ubuntu.yml'
- 'Dockerfile'
- 'vllm_ascend/**'
- 'setup.py'
- 'pyproject.toml'
- 'requirements.txt'
- 'cmake/**'
- 'CMakeLists.txt'
- 'csrc/**'
# only cancel in-progress runs of the same workflow
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
build:
name: vllm-ascend image build
# Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
# Push event or PR with both 'ready' and 'ready-for-test' labels
runs-on: ubuntu-latest
if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Print
run: |
lscpu
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
# TODO(yikun): add more hub image and a note on release policy for container image
images: |
quay.io/ascend/vllm-ascend
# Note for test case
# https://github.com/marketplace/actions/docker-metadata-action#typeref
# 1. branch job pulish per main/*-dev branch commits
# 2. main and dev pull_request is build only, so the tag pr-N is fine
# 3. only pep440 matched tag will be published:
# - v0.7.1 --> v0.7.1, latest
# - pre/post/dev: v0.7.1rc1/v0.7.1rc1/v0.7.1rc1.dev1/v0.7.1.post1, no latest
# which follow the rule from vLLM with prefix v
# TODO(yikun): the post release might be considered as latest release
tags: |
type=ref,event=branch
type=ref,event=pr
type=pep440,pattern={{raw}}
flavor:
latest=true
- name: Free up disk space
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
with:
tool-cache: true
docker-images: false
- name: Build - Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Build - Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Publish - Login to Quay Container Registry
if: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
uses: docker/login-action@v3
with:
registry: quay.io
username: ${{ vars.QUAY_USERNAME }}
password: ${{ secrets.QUAY_PASSWORD }}
- name: Build and push 910b
uses: docker/build-push-action@v6
with:
platforms: >-
${{
github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
'linux/amd64,linux/arm64' ||
'linux/amd64'
}}
# use the current repo path as the build context, ensure .git is contained
context: .
file: Dockerfile
# only trigger when tag, branch/main push
push: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
labels: ${{ steps.meta.outputs.labels }}
tags: ${{ steps.meta.outputs.tags }}
build-args: |
PIP_INDEX_URL=https://pypi.org/simple
provenance: false

View File

@@ -0,0 +1,20 @@
name: "Merge Conflict Labeler"
on:
# So that PRs touching the same files as the push are updated
push:
# So that the `dirtyLabel` is removed if conflicts are resolve
# We recommend `pull_request_target` so that github secrets are available.
# In `pull_request` we wouldn't be able to change labels of fork PRs
pull_request_target:
types: [synchronize]
jobs:
main:
runs-on: ubuntu-latest
steps:
- name: check if prs are dirty
uses: eps1lon/actions-label-merge-conflict@v3
with:
dirtyLabel: "merge-conflicts"
repoToken: "${{ secrets.GITHUB_TOKEN }}"
commentOnDirty: "This pull request has conflicts, please resolve those before we can evaluate the pull request."

View File

@@ -0,0 +1,18 @@
name: Pull Request Labeler
on: pull_request_target
jobs:
label:
name: Label
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: write
steps:
- name: Label the PR
uses: actions/labeler@v6
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
configuration-path: .github/labeler.yml
sync-labels: true

View File

@@ -0,0 +1,17 @@
{
"problemMatcher": [
{
"owner": "actionlint",
"pattern": [
{
"regexp": "^(?:\\x1b\\[\\d+m)?(.+?)(?:\\x1b\\[\\d+m)*:(?:\\x1b\\[\\d+m)*(\\d+)(?:\\x1b\\[\\d+m)*:(?:\\x1b\\[\\d+m)*(\\d+)(?:\\x1b\\[\\d+m)*: (?:\\x1b\\[\\d+m)*(.+?)(?:\\x1b\\[\\d+m)* \\[(.+?)\\]$",
"file": 1,
"line": 2,
"column": 3,
"message": 4,
"code": 5
}
]
}
]
}

View File

@@ -0,0 +1,16 @@
{
"problemMatcher": [
{
"owner": "mypy",
"pattern": [
{
"regexp": "^(.+):(\\d+):\\s(error|warning):\\s(.+)$",
"file": 1,
"line": 2,
"severity": 3,
"message": 4
}
]
}
]
}

View File

@@ -0,0 +1,17 @@
{
"problemMatcher": [
{
"owner": "ruff",
"pattern": [
{
"regexp": "^(.+?):(\\d+):(\\d+): (\\w+): (.+)$",
"file": 1,
"line": 2,
"column": 3,
"code": 4,
"message": 5
}
]
}
]
}

View File

@@ -0,0 +1,118 @@
name: 'e2e test / multi-dp'
on:
schedule:
- cron: "0 */4 * * *"
workflow_dispatch:
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
# It's used to activate ascend-toolkit environment variables.
defaults:
run:
shell: bash -el {0}
# only cancel in-progress runs of the same workflow
# and ignore the lint / 8 cards test type
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
e2e:
# This is a runner with no NPU for k8s controller
runs-on: linux-aarch64-a3-0
container:
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
env:
KUBECONFIG: /tmp/kubeconfig
KUBECTL: /root/.cache/.kube/kubectl
NAMESPACE: vllm-project
LEADER_POD: vllm-0
steps:
- name: Install system denpendencies
run: |
# configure apt and pip source
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
apt-get update -y && apt-get install -y git curl
TOKEN=`echo -n "x-access-token:${{ secrets.ADMIN_PTA }}" | base64`
git config --global http.https://gh-proxy.test.osinfra.cn/.extraheader "AUTHORIZATION: basic $TOKEN"
- name: Install kubectl
run: |
install -o root -g root -m 0755 $KUBECTL /usr/local/bin/kubectl
# get kubeconfig from secret
echo "${{ secrets.KUBECONFIG_B64 }}" | base64 -d > $KUBECONFIG
- name: Checkout code
uses: actions/checkout@v4
- name: Prepare scripts
run: |
# prepare for lws entrypoint scripts
install -D tests/e2e/multi_node/scripts/run.sh /root/.cache/tests/run.sh
- name: Launch cluster
run: |
kubectl apply -f tests/e2e/multi_node/scripts/lws.yaml
- name: Waiting for pod ready
run: |
echo "waiting for Pod [$LEADER_POD] in namespace [$NAMESPACE] to Ready..."
while true; do
# get pod status
READY_STATUS=$(kubectl get pod "$LEADER_POD" -n "$NAMESPACE" -o jsonpath='{.status.containerStatuses[*].ready}')
if [[ "$READY_STATUS" == "true" ]]; then
echo "✅ Pod [$LEADER_POD] is Ready!"
break
else
echo "Pod [$LEADER_POD] not ready, waiting..."
sleep 3
fi
done
- name: Stream logs and monitor pod health
run: |
set -euo pipefail
echo "🚀 Start streaming logs for Pod [$LEADER_POD] ..."
kubectl logs -f "$LEADER_POD" -n "$NAMESPACE" &
LOG_PID=$!
echo "Start monitoring Pod [$LEADER_POD] status ..."
while true; do
STATUS=$(kubectl get pod "$LEADER_POD" -n "$NAMESPACE" -o jsonpath='{.status.phase}')
if [[ "$STATUS" != "Running" && "$STATUS" != "Succeeded" ]]; then
echo "❌ Pod [$LEADER_POD] exited abnormally with status: $STATUS"
kubectl describe pod "$LEADER_POD" -n "$NAMESPACE" || true
kubectl logs "$LEADER_POD" -n "$NAMESPACE" --previous --all-containers || true
kill $LOG_PID || true
exit 1
fi
sleep 5
done &
MONITOR_PID=$!
wait $LOG_PID || true
kill $MONITOR_PID || true
- name: Generate summary
if: always()
run: |
if [ -f "/root/.cache/test_summary.md" ]; then
cat /root/.cache/test_summary.md >> "$GITHUB_STEP_SUMMARY"
else
echo "No summary file found." >> "$GITHUB_STEP_SUMMARY"
fi
- name: Post process
if: always()
run: |
kubectl get pods -n $NAMESPACE
kubectl delete -f tests/e2e/multi_node/scripts/lws.yaml

View File

@@ -0,0 +1,206 @@
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
# This file is a part of the vllm-ascend project.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
name: 'ascend test / performance'
# This workflow runs nightly benchmarks for vllm-ascend.
on:
schedule:
# Run benchmarks at 20:00 and 03:00 Beijing time (UTC+8)
- cron: "0 12 * * *"
- cron: "0 19 * * *"
workflow_dispatch:
# Allow manual triggering of the workflow
pull_request:
types: [ labeled ]
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
# It's used to activate ascend-toolkit environment variables.
defaults:
run:
shell: bash -el {0}
# only 1 job can runs on static-8-01-cards
concurrency:
group: static-8-01-cards
cancel-in-progress: false
jobs:
test:
if: ${{ contains(github.event.pull_request.labels.*.name, 'performance-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
name: Benchmarks/vLLM=${{ matrix.vllm_branch }}, vLLM-Ascend=${{ matrix.vllm_ascend_branch }}, use_v1=${{ matrix.vllm_use_v1 }}
runs-on: 'linux-arm64-npu-static-8'
strategy:
matrix:
include:
- vllm_branch: v0.11.0
vllm_ascend_branch: main
vllm_use_v1: 1
max-parallel: 1
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
volumes:
- /usr/local/dcmi:/usr/local/dcmi
- /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
- /usr/local/Ascend/driver/:/usr/local/Ascend/driver/
# Use self-host cache speed up pip and model download
- /home/action/.cache:/github/home/.cache/
options: >-
--device /dev/davinci0
--device /dev/davinci1
--device /dev/davinci_manager
--device /dev/devmm_svm
--device /dev/hisi_hdc
env:
VLLM_USE_MODELSCOPE: True
ES_OM_DOMAIN: ${{ secrets.ES_OM_DOMAIN }}
ES_OM_AUTHORIZATION: ${{ secrets.ES_OM_AUTHORIZATION }}
VLLM_USE_V1: ${{ matrix.vllm_use_v1 }}
steps:
- name: Check npu and CANN info
run: |
npu-smi info
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
- name: Config mirrors
run: |
# keep using tuna's proxy since linux-arm64-npu-static-8 is in another region
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
- name: Install system dependencies
run: |
apt-get update -y
apt-get -y install git jq wget curl lsof gcc g++ cmake libnuma-dev
- name: Config git
run: |
git config --global --add safe.directory "$GITHUB_WORKSPACE"
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Checkout vllm-project/vllm repo
uses: actions/checkout@v4
with:
repository: vllm-project/vllm
path: ./vllm-empty
ref: ${{ matrix.vllm_branch }}
- name: Install vllm-project/vllm from source
working-directory: ./vllm-empty
run: |
VLLM_TARGET_DEVICE=empty pip install -e .
- name: Install vllm-project/vllm-ascend
env:
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
run: |
pip install -e .
pip install -r benchmarks/requirements-bench.txt
- name: Run current commit benchmarks
if: github.event_name != 'schedule' && github.event_name != 'workflow_dispatch'
run: |
# Sometimes we only want to run benchmarks on the current commit
# This is useful for debugging or a release benchmark
bash benchmarks/scripts/run-performance-benchmarks.sh
# Convert the benchmark results to markdown format
python3 benchmarks/scripts/convert_json_to_markdown.py
- name: Generate step summary
if: github.event_name != 'schedule' && github.event_name != 'workflow_dispatch'
run: |
cat ./benchmarks/results/benchmark_results.md >> $GITHUB_STEP_SUMMARY
- name: Upload benchmark artifacts
if: github.event_name != 'schedule' && github.event_name != 'workflow_dispatch'
uses: actions/upload-artifact@v4
with:
name: "benchmark-performance-${{ matrix.vllm_branch }}-${{ matrix.vllm_ascend_branch }}-report"
path: ./benchmarks/results/benchmark_results.md
if-no-files-found: warn
retention-days: 90
overwrite: true
- name: Install elastic_tool
if: github.event_name != 'pull_request'
run: |
pip install escli-tool==0.2.3
- name: Collect pr info from vllm-project/vllm-ascend
if: github.event_name != 'pull_request'
run: |
# Only get the pull request which may influences performance
git log --pretty=format:"%H %s" -- '**/*.py' ':!docs/*' ':!tests/*' ':!examples/*' ':!benchmarks/*' > commit_log.txt
escli check commit_log.txt
- name: Prepare benchmark script in advance
if: github.event_name != 'pull_request'
# This is for the benchmark iteration, which will change the benchmark scripts while checkouting each commit.
# We need ensure the benchmark scripts always available.
run: |
# Prepare the benchmark script in advance
mkdir -p /github/home/benchmarks
cp -r benchmarks/* /github/home/benchmarks/
- name: Run benchmark iteration
env:
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
if: github.event_name != 'pull_request'
run: |
while IFS= read -r line || [[ -n "$line" ]]; do
commit_id=${line%% *}
commit_title=${line#* }
git checkout $commit_id
commit_time=$(git show -s --format=%cd $commit_hash --date=iso-strict)
commit_time_no_tz=${commit_time::19}
pip install -e .
echo "------------------------"
echo "commit_id: $commit_id"
echo "commit_title: $commit_title"
echo "commit_time: $commit_time_no_tz"
echo "vllm branch: ${{ matrix.vllm_branch }}"
echo "vllm-ascend branch: ${{ matrix.vllm_ascend_branch }}"
echo "------------------------"
cd /github/home
ERROR_MSG=""
if ! bash benchmarks/scripts/run-performance-benchmarks.sh; then
ERROR_MSG="Benchmark failed to run"
fi
# send the result to es
escli add --vllm_branch ${{ matrix.vllm_branch }} \
--vllm_ascend_branch ${{ matrix.vllm_ascend_branch }} \
--commit_id $commit_id \
--commit_title "$commit_title" \
--created_at "$commit_time_no_tz" \
--res_dir ./benchmarks/results \
--error "$ERROR_MSG" \
--extra_feat '{"VLLM_USE_V1": "${{ matrix.vllm_use_v1 }}"}'
rm -rf ./benchmarks/results
cd -
done < commit_log.txt

View File

@@ -0,0 +1,43 @@
name: pre-commit
on:
workflow_call:
inputs:
vllm:
required: true
type: string
permissions:
contents: read
jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v4
- uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
with:
python-version: "3.11"
- run: echo "::add-matcher::.github/workflows/matchers/actionlint.json"
- run: echo "::add-matcher::.github/workflows/matchers/mypy.json"
- name: Checkout vllm-project/vllm repo
uses: actions/checkout@v4
with:
repository: vllm-project/vllm
path: ./vllm-empty
ref: ${{ inputs.vllm }}
- name: Install vllm
working-directory: vllm-empty
run: |
pip install -r requirements/build.txt --extra-index-url https://download.pytorch.org/whl/cpu
VLLM_TARGET_DEVICE=empty pip install .
- name: Install vllm-ascend dev
run: |
pip install -r requirements-dev.txt --extra-index-url https://download.pytorch.org/whl/cpu
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
env:
SHELLCHECK_OPTS: "--exclude=SC2046,SC2006,SC2086" # Exclude SC2046, SC2006, SC2086 for actionlint
with:
extra_args: --all-files --hook-stage manual

View File

@@ -0,0 +1,75 @@
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
#
name: build / sdist
on:
pull_request:
branches:
- 'main'
- '*-dev'
paths:
- '.github/workflows/release_code.yml'
- 'vllm_ascend/**'
- 'setup.py'
- 'pyproject.toml'
- 'requirements.txt'
- 'cmake/**'
- 'CMakeLists.txt'
- 'csrc/**'
push:
tags:
- 'v*'
jobs:
build:
name: release code
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.11"]
steps:
- uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
- name: Print
run: |
lscpu
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python3 -m pip install twine setuptools_scm
- name: Generate tar.gz
run: |
python3 setup.py sdist
ls dist
- name: Archive tar.gz
uses: actions/upload-artifact@v4
with:
name: vllm-ascend-src
path: dist/*
- name: Release
if: startsWith(github.ref, 'refs/tags/')
run: |
python3 -m twine upload dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}

View File

@@ -0,0 +1,125 @@
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
#
name: build / wheel
on:
schedule:
# Runs at 23:00 UTC (7:00 AM Beijing) every day
- cron: '0 23 * * *'
pull_request:
branches:
- 'main'
- '*-dev'
paths:
- '.github/workflows/release_whl.yml'
- '.github/Dockerfile.buildwheel'
- 'vllm_ascend/**'
- 'setup.py'
- 'pyproject.toml'
- 'requirements.txt'
- 'cmake/**'
- 'CMakeLists.txt'
- 'csrc/**'
push:
tags:
- 'v*'
jobs:
build:
name: build and release wheel
strategy:
matrix:
os: [ubuntu-24.04, ubuntu-24.04-arm]
# PR only trigger latest version
python-version: ${{ fromJSON(
(github.event_name == 'pull_request' && '["3.11"]') ||
'["3.9", "3.10", "3.11"]'
) }}
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
- name: Print
run: |
lscpu
- name: Free up disk space
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
with:
tool-cache: true
docker-images: false
- name: Build wheel
run: |
ls
docker build -f ./.github/Dockerfile.buildwheel \
--build-arg PY_VERSION=${{ matrix.python-version }} \
-t wheel:v1 .
docker run --rm \
-u $(id -u):$(id -g) \
-v $(pwd):/outpwd \
wheel:v1 \
bash -c "cp -r /workspace/vllm-ascend/dist /outpwd"
ls dist
- name: Set up Python ${{ matrix.python-version }}
if: startsWith(github.ref, 'refs/tags/')
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
with:
python-version: ${{ matrix.python-version }}
- name: Repair wheels with auditwheel
run: |
python3 -m pip install auditwheel
python3 -m pip install patchelf
mkdir -p dist/repaired
for whl in dist/*.whl; do
auditwheel repair "$whl" -w dist/repaired/ \
--exclude libplatform.so \
--exclude libregister.so \
--exclude libge_common_base.so \
--exclude libc10.so \
--exclude libc_sec.so \
--exclude "libascend*.so" \
--exclude "libtorch*.so" \
--exclude "liberror_manager.so"
done
rm -f dist/*.whl
mv dist/repaired/*.whl dist/
rmdir dist/repaired
ls dist
- name: Verify automatic platform tags
run: |
cd dist
for wheel in *.whl; do
echo "verification file: $wheel"
auditwheel show "$wheel"
done
- name: Archive wheel
uses: actions/upload-artifact@v4
with:
name: vllm-ascend-${{ matrix.os }}-py${{ matrix.python-version }}-wheel
path: dist/*
- name: Release
if: startsWith(github.ref, 'refs/tags/')
run: |
python3 -m pip install twine
python3 -m twine upload --verbose dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}

View File

@@ -0,0 +1,26 @@
name: PR Reminder Comment Bot
permissions:
pull-requests: write
on:
pull_request_target:
types: [opened]
jobs:
pr_reminder:
runs-on: ubuntu-latest
steps:
- name: Remind to run full CI on PR
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
with:
script: |
github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: '👋 Hi! Thank you for contributing to the vLLM Ascend project. The following points will speed up your PR merge:\n\n' +
'- A PR should do only one thing, smaller PRs enable faster reviews.\n' +
'- Every PR should include unit tests and end-to-end tests to ensure it works and is not broken by other future PRs.\n' +
'- Write the commit message by fulfilling the PR description to help reviewer and future developers understand.\n\n' +
'If CI fails, you can run linting and testing checks locally according [Contributing](https://vllm-ascend.readthedocs.io/zh-cn/latest/developer_guide/contribution/index.html) and [Testing](https://vllm-ascend.readthedocs.io/zh-cn/latest/developer_guide/contribution/testing.html).'
})
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -0,0 +1,100 @@
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
#
name: 'e2e test / a3-test'
on:
workflow_call:
pull_request:
types: [ labeled ]
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
# It's used to activate ascend-toolkit environment variables.
defaults:
run:
shell: bash -el {0}
# only cancel in-progress runs of the same workflow
# and ignore the lint / 8 cards test type
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
e2e:
# only trigger e2e test after lint passed and the change is e2e related with pull request.
if: ${{ contains(github.event.pull_request.labels.*.name, 'dist-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'workflow_dispatch' }}
strategy:
matrix:
os: [linux-aarch64-a3-8]
vllm_version: [v0.11.0]
name: vLLM Ascend test
runs-on: ${{ matrix.os }}
container:
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
env:
DEBIAN_FRONTEND: noninteractive
steps:
- name: Check npu and CANN info
run: |
npu-smi info
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
- name: Config mirrors
run: |
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
apt-get update -y
apt install git -y
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v4
- name: Install system dependencies
run: |
apt-get -y install `cat packages.txt`
apt-get -y install gcc g++ cmake libnuma-dev
- name: Checkout vllm-project/vllm repo
uses: actions/checkout@v4
with:
repository: vllm-project/vllm
ref: ${{ matrix.vllm_version }}
path: ./vllm-empty
- name: Install vllm-project/vllm from source
working-directory: ./vllm-empty
run: |
VLLM_TARGET_DEVICE=empty pip install -e .
- name: Install vllm-project/vllm-ascend
run: |
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
pip install -r requirements-dev.txt
pip install -v -e .
- name: Run vllm-project/vllm-ascend test for V1 Engine
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
VLLM_USE_MODELSCOPE: True
run: |
# TODO: enable more tests
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe

View File

@@ -0,0 +1,87 @@
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
#
name: 'ascend test / doctest'
on:
workflow_dispatch:
pull_request:
branches:
- 'main'
- '*-dev'
paths:
# If we are changing the doctest we should do a PR test
- '.github/workflows/vllm_ascend_doctest.yaml'
- 'tests/e2e/doctests/**'
- 'tests/e2e/common.sh'
- 'tests/e2e/run_doctests.sh'
schedule:
# Runs every 12 hours
- cron: '0 */12 * * *'
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
# It's used to activate ascend-toolkit environment variables.
defaults:
run:
shell: bash -el {0}
jobs:
test:
strategy:
# Each version should be tested
fail-fast: false
matrix:
vllm_verison: [v0.9.1-dev, v0.9.1-dev-openeuler, main, main-openeuler]
name: vLLM Ascend test
runs-on: linux-aarch64-a2-1
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:${{ matrix.vllm_verison }}
steps:
- name: Check NPU/CANN and git info
run: |
echo "====> Print NPU/CANN info"
npu-smi info
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
echo "====> Print vllm-ascend git info"
cd /vllm-workspace/vllm-ascend
git --no-pager log -1 || true
echo "====> Print vllm git info"
cd /vllm-workspace/vllm
git --no-pager log -1 || true
- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v4
- name: Run vllm-ascend/tests/e2e/run_doctests.sh
run: |
# PWD: /__w/vllm-ascend/vllm-ascend
# Make sure e2e tests are latest
echo "Replacing /vllm-workspace/vllm-ascend/tests/e2e ..."
rm -rf /vllm-workspace/vllm-ascend/tests/e2e
mkdir -p /vllm-workspace/vllm-ascend/tests
# Overwrite e2e and examples
cp -r tests/e2e /vllm-workspace/vllm-ascend/tests/
cp -r examples /vllm-workspace/vllm-ascend/
# Simulate container to enter directory
cd /workspace
# Run real test
echo "Test:"
/vllm-workspace/vllm-ascend/tests/e2e/run_doctests.sh

View File

@@ -0,0 +1,149 @@
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
#
name: 'ascend test'
on:
push:
branches:
- 'main'
pull_request:
branches:
- 'main'
- '*-dev'
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
# It's used to activate ascend-toolkit environment variables.
defaults:
run:
shell: bash -el {0}
# only cancel in-progress runs of the same workflow
# and ignore the lint / 1 card / 4 cards test type
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
lint:
uses: ./.github/workflows/pre-commit.yml
with:
vllm: v0.11.0
changes:
runs-on: ubuntu-latest
outputs:
e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
steps:
- uses: actions/checkout@v4
- uses: dorny/paths-filter@v3
id: filter
with:
filters: |
e2e_tracker:
- '.github/workflows/vllm_ascend_test.yaml'
- 'vllm_ascend/**'
- 'csrc/**'
- 'cmake/**'
- 'tests/e2e/**'
- 'CMakeLists.txt'
- 'setup.py'
- 'requirements.txt'
- 'requirements-dev.txt'
- 'requirements-lint.txt'
- 'packages.txt'
ut_tracker:
- 'tests/ut/**'
ut:
needs: [lint, changes]
name: unit test
# only trigger unit test after lint passed and the change is e2e and ut related.
if: ${{ needs.lint.result == 'success' && (needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.ut_tracker == 'true') }}
runs-on: ubuntu-22.04-arm
container:
image: quay.io/ascend/cann:8.2.rc1-910b-ubuntu22.04-py3.11
env:
VLLM_LOGGING_LEVEL: ERROR
VLLM_USE_MODELSCOPE: True
strategy:
matrix:
vllm_version: [v0.11.0]
steps:
- name: Install packages
run: |
apt-get update -y
apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev curl gnupg2
- name: Checkout vllm-project/vllm repo
uses: actions/checkout@v4
with:
repository: vllm-project/vllm
ref: ${{ matrix.vllm_version }}
path: ./vllm-empty
- name: Install vllm-project/vllm from source
working-directory: ./vllm-empty
run: |
VLLM_TARGET_DEVICE=empty python3 -m pip install .
python3 -m pip uninstall -y triton
- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v4
- name: Install vllm-project/vllm-ascend
run: |
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/arm64-linux/devlib
python3 -m pip install -r requirements-dev.txt
python3 -m pip install -v .
- name: Run unit test
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
TORCH_DEVICE_BACKEND_AUTOLOAD: 0
run: |
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/arm64-linux/devlib
pytest -sv --cov --cov-report=xml:unittests-coverage.xml tests/ut \
--ignore tests/ut/attention/test_attention_v1.py
- name: Upload coverage to Codecov
# only upload coverage when commits merged
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
uses: codecov/codecov-action@v5
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
with:
flags: unittests
name: vllm-ascend
verbose: true
e2e-light:
name: e2e-light
strategy:
matrix:
vllm_version: [v0.11.0]
# Note (yikun): If CI resource are limited we can split job into two chain jobs
needs: [lint, changes]
# only trigger e2e test after lint passed and the change is e2e related with pull request.
if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' && !contains(github.event.pull_request.labels.*.name, 'ready') }}
uses: ./.github/workflows/_e2e_test.yaml
with:
vllm: ${{ matrix.vllm_version }}
runner: linux-aarch64-a2
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
type: light

View File

@@ -0,0 +1,117 @@
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
#
name: 'e2e test / 310p-test'
on:
push:
tags:
- 'v*'
schedule:
# Runs every 6 hours
- cron: '0 */6 * * *'
pull_request:
types: [ labeled ]
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
# It's used to activate ascend-toolkit environment variables.
defaults:
run:
shell: bash -el {0}
# only cancel in-progress runs of the same workflow
# and ignore the lint / 1 card / 4 cards test type
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
e2e:
# e2e-310p-test will be triggered when tag 'e2e-310p-test' & 'ready-for-test' or schedule job
if: >-
${{
(contains(github.event.pull_request.labels.*.name, 'e2e-310p-test')) &&
contains(github.event.pull_request.labels.*.name, 'ready-for-test') ||
github.event_name == 'schedule' || github.event_name == 'push'
}}
strategy:
max-parallel: 2
matrix:
os: [linux-aarch64-310p-1, linux-aarch64-310p-4]
vllm_version: [v0.11.0]
name: 310p e2e test
runs-on: ${{ matrix.os }}
container:
# TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-310p-ubuntu22.04-py3.11
env:
VLLM_LOGGING_LEVEL: ERROR
VLLM_USE_MODELSCOPE: True
steps:
- name: Check npu and CANN info
run: |
npu-smi info
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
- name: Config mirrors
run: |
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
apt-get update -y
apt install git -y
- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v4
- name: Install system dependencies
run: |
apt-get -y install `cat packages.txt`
apt-get -y install git vim wget net-tools gcc g++ cmake libnuma-dev curl gnupg2
- name: Checkout vllm-project/vllm repo
uses: actions/checkout@v4
with:
repository: vllm-project/vllm
ref: ${{ matrix.vllm_version }}
path: ./vllm-empty
- name: Install vllm-project/vllm from source
working-directory: ./vllm-empty
run: |
VLLM_TARGET_DEVICE=empty pip install -e .
- name: Install vllm-project/vllm-ascend
run: |
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
export SOC_VERSION=ASCEND310P3
pip install -r requirements-dev.txt
pip install -v -e .
- name: Run e2e test
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
VLLM_USE_MODELSCOPE: True
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
run: |
if [[ "${{ matrix.os }}" == "linux-aarch64-310p-1" ]]; then
pytest -sv tests/e2e/310p/test_offline_inference_310p.py
else
pytest -sv tests/e2e/310p/test_offline_inference_parallel_310p.py
fi

View File

@@ -0,0 +1,80 @@
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
#
name: 'ascend test / full'
on:
pull_request:
branches:
- 'main'
- '*-dev'
types: [ labeled, synchronize ]
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
# It's used to activate ascend-toolkit environment variables.
defaults:
run:
shell: bash -el {0}
# only cancel in-progress runs of the same workflow
# and ignore the lint / 1 card / 4 cards test type
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
changes:
runs-on: ubuntu-latest
if: ${{ contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') }}
outputs:
e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
steps:
- uses: actions/checkout@v4
- uses: dorny/paths-filter@v3
id: filter
with:
filters: |
e2e_tracker:
- '.github/workflows/vllm_ascend_test.yaml'
- '.github/workflows/_e2e_test.yaml'
- 'vllm_ascend/**'
- 'csrc/**'
- 'cmake/**'
- 'tests/e2e/**'
- 'CMakeLists.txt'
- 'setup.py'
- 'requirements.txt'
- 'requirements-dev.txt'
- 'requirements-lint.txt'
- 'packages.txt'
ut_tracker:
- 'tests/ut/**'
e2e-test:
name: e2e-full
strategy:
matrix:
vllm_version: [v0.11.0]
needs: [changes]
if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
uses: ./.github/workflows/_e2e_test.yaml
with:
vllm: ${{ matrix.vllm_version }}
runner: linux-aarch64-a2
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
type: full

View File

@@ -0,0 +1,45 @@
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
#
name: 'ascend test / vllm main'
on:
# Run 1-card and 2-cards e2e tests per 2h
schedule:
- cron: '0 */2 * * *'
workflow_dispatch:
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
# It's used to activate ascend-toolkit environment variables.
defaults:
run:
shell: bash -el {0}
# only cancel in-progress runs of the same workflow
# and ignore the lint / 1 card / 4 cards test type
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
e2e-test:
uses: ./.github/workflows/_e2e_test.yaml
with:
vllm: main
runner: linux-aarch64-a2
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
type: full

View File

@@ -0,0 +1,177 @@
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
#
# This test will be triggered:
# 1. schedule
# 2. pull_request change the related files
# 3. workflow_dispatch with models input
name: ascend test / models
on:
schedule:
# Runs every 6 hours
- cron: '0 */6 * * *'
pull_request:
branches:
- 'main'
- '*-dev'
paths:
- '.github/workflows/vllm_ascend_test_models.yaml'
- 'tests/e2e/models/test_lm_eval_correctness.py'
workflow_dispatch:
inputs:
vllm-ascend-version:
description: 'vllm-ascend:'
required: true
type: choice
# Current supported vLLM versions
options:
- latest
- main
default: main
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
# It's used to activate ascend-toolkit environment variables.
defaults:
run:
shell: bash -el {0}
# only cancel in-progress runs of the same workflow
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
run:
strategy:
matrix:
include:
- model_name: Qwen3-8B
runner: a2-1
- model_name: Qwen2.5-VL-7B-Instruct
runner: a2-1
- model_name: Qwen2-Audio-7B-Instruct
runner: a2-1
- model_name: Qwen3-30B-A3B
runner: a2-2
- model_name: Qwen3-VL-30B-A3B-Instruct
runner: a2-2
- model_name: DeepSeek-V2-Lite
runner: a2-2
fail-fast: false
uses: ./.github/workflows/_accuracy_test.yaml
with:
vllm: v0.11.0
runner: linux-aarch64-${{ matrix.runner }}
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
model_name: ${{ matrix.model_name }}
upload: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.vllm-ascend-version == 'latest' }}
create_pr:
runs-on: ubuntu-latest
needs: run
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.vllm-ascend-version == 'latest' }}
env:
UPSTREAM_REPO: vllm-project/vllm-ascend
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
repository: vllm-ascend-ci/vllm-ascend
token: ${{ secrets.PAT_TOKEN }}
ref: main
- name: Add upstream remote
run: |
git remote add upstream https://github.com/${{ env.UPSTREAM_REPO }}.git
git fetch upstream
git remote -v
- name: Set Git user info dynamically
run: |
git config user.name "${{ github.actor }}"
git config user.email "${{ github.actor }}@users.noreply.github.com"
- name: Create or switch to branch
run: |
TIMESTAMP=$(date +%Y%m%d%H%M%S)
BRANCH_NAME="auto-pr/accuracy-report-${TIMESTAMP}"
echo "BRANCH_NAME=${BRANCH_NAME}" >> $GITHUB_ENV
git checkout -B "${BRANCH_NAME}" upstream/main
- name: Download only current run reports
uses: actions/download-artifact@v5
with:
path: ./docs/source/developer_guide/evaluation/accuracy_report
pattern: report-*
github-token: ${{ secrets.GITHUB_TOKEN }}
run-id: ${{ github.run_id }}
- name: Delete old report
run: |
find ./docs/source/developer_guide/evaluation/accuracy_report -maxdepth 1 -type f -name '*.md' ! -name 'index.md' -delete
find ./docs/source/developer_guide/evaluation/accuracy_report -mindepth 2 -type f -name '*.md' -exec mv -f {} ./docs/source/developer_guide/evaluation/accuracy_report \;
find ./docs/source/developer_guide/evaluation/accuracy_report -mindepth 1 -type d -empty -delete
- name: Update accuracy_report/index.md
run: |
REPORT_DIR="./docs/source/developer_guide/evaluation/accuracy_report"
INDEX_MD="$REPORT_DIR/index.md"
{
echo "# Accuracy Report"
echo ""
echo ":::{toctree}"
echo ":caption: Accuracy Report"
echo ":maxdepth: 1"
for report in "$REPORT_DIR"/*.md; do
filename="$(basename "$report" .md)"
if [ "$filename" != "index" ]; then
echo "$filename"
fi
done
echo ":::"
} > "$INDEX_MD"
- name: push accuracy report
env:
GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
run: |
git add ./docs/source/developer_guide/evaluation/accuracy_report/*.md
git commit -s -m "[Doc] Update accuracy reports for ${{ env.BRANCH_NAME }}"
git push -f origin "${{ env.BRANCH_NAME }}"
- name: Create PR in upstream via API
uses: actions/github-script@v8
with:
github-token: ${{ secrets.PAT_TOKEN }}
script: |
const pr = await github.rest.pulls.create({
owner: 'vllm-project',
repo: 'vllm-ascend',
head: `vllm-ascend-ci:${{ env.BRANCH_NAME }}`,
base: 'main',
title: `[Doc] Update accuracy reports for ${{ env.BRANCH_NAME }}`,
body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for: All models
- [Workflow run][1]
[1]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`
});
core.info(`Created PR #${pr.data.number}`);

View File

@@ -0,0 +1,112 @@
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
# This file is a part of the vllm-ascend project.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
name: 'e2e test / pd-disaggregation'
on:
schedule:
# Runs at 23:00 UTC (7:00 AM Beijing) every day
- cron: '0 23 * * *'
pull_request:
types: [ labeled ]
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
# It's used to activate ascend-toolkit environment variables.
defaults:
run:
shell: bash -el {0}
# only 1 job can runs on static-8-01-cards
concurrency:
group: static-8-01-cards
cancel-in-progress: false
jobs:
prefilling-decoding-disaggregation:
# pd-test will be triggered when tag 'pd-test' & 'ready-for-test' or schedule job
if: ${{ contains(github.event.pull_request.labels.*.name, 'pd-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' }}
strategy:
matrix:
vllm_verison: [
main,
v0.9.1
]
name: vLLM Ascend prefilling decoding disaggregation test
runs-on: linux-arm64-npu-static-8
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
volumes:
- /usr/local/dcmi:/usr/local/dcmi
- /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
- /usr/local/Ascend/driver/:/usr/local/Ascend/driver/
# Use self-host cache speed up pip and model download
- /home/action/.cache:/github/home/.cache/
options: >-
--device /dev/davinci0
--device /dev/davinci1
--device /dev/davinci_manager
--device /dev/devmm_svm
--device /dev/hisi_hdc
env:
VLLM_USE_MODELSCOPE: True
steps:
- name: Check npu and CANN info
run: |
npu-smi info
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
- name: Config mirrors
run: |
# keep using tuna's proxy since linux-arm64-npu-static-8 is in another region
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
apt-get update -y
apt install git -y
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v4
- name: Install system dependencies
run: |
apt-get -y install `cat packages.txt`
apt-get -y install gcc g++ cmake libnuma-dev
- name: Checkout vllm-project/vllm repo
uses: actions/checkout@v4
with:
repository: vllm-project/vllm
ref: ${{ matrix.vllm_verison }}
path: ./vllm-empty
- name: Install vllm-project/vllm from source
working-directory: ./vllm-empty
run: |
VLLM_TARGET_DEVICE=empty pip install -e .
- name: Install vllm-project/vllm-ascend
env:
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
run: |
pip install -r requirements-dev.txt
pip install -v -e .
- name: Run vllm-project/vllm-ascend PD Disaggregation edge test
run: |
git config --global --add safe.directory/__w/vllm-ascend/vllm-ascend
bash tests/e2e/pd_disaggreate/run_edge_case_test.sh