vllm-ascend vnpu v1
This commit is contained in:
175
.github.backup/workflows/_accuracy_test.yaml
Normal file
175
.github.backup/workflows/_accuracy_test.yaml
Normal file
@@ -0,0 +1,175 @@
|
||||
name: 'accuracy test'
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
vllm:
|
||||
required: true
|
||||
type: string
|
||||
vllm-ascend:
|
||||
required: false
|
||||
type: string
|
||||
default: main
|
||||
runner:
|
||||
required: true
|
||||
type: string
|
||||
image:
|
||||
required: true
|
||||
type: string
|
||||
model_name:
|
||||
required: true
|
||||
type: string
|
||||
upload:
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
jobs:
|
||||
accuracy_tests:
|
||||
|
||||
runs-on: ${{ inputs.runner }}
|
||||
name: ${{ inputs.model_name }} accuracy
|
||||
container:
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
|
||||
env:
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
# 1. If version specified (work_dispatch), do specified branch accuracy test
|
||||
# 2. If no version (labeled PR), do accuracy test by default ref:
|
||||
# The branch, tag or SHA to checkout. When checking out the repository that
|
||||
# triggered a workflow, this defaults to the reference or SHA for that event.
|
||||
# Otherwise, uses the default branch.
|
||||
GHA_VLLM_ASCEND_VERSION: ${{ inputs.vllm-ascend }}
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set model name as output
|
||||
id: set_output
|
||||
run: |
|
||||
echo "model_name=${{ inputs.model_name }}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Config mirrors
|
||||
run: |
|
||||
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
|
||||
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
|
||||
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
|
||||
apt-get update -y
|
||||
apt install git -y
|
||||
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
apt-get -y install `cat packages.txt`
|
||||
apt-get -y install gcc g++ cmake libnuma-dev
|
||||
|
||||
- name: Checkout vllm-project/vllm repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: vllm-project/vllm
|
||||
ref: ${{ inputs.vllm }}
|
||||
path: ./vllm-empty
|
||||
|
||||
- name: Install vllm-project/vllm from source
|
||||
working-directory: ./vllm-empty
|
||||
run: |
|
||||
VLLM_TARGET_DEVICE=empty pip install -e .
|
||||
|
||||
- name: Resolve vllm-ascend version
|
||||
run: |
|
||||
VERSION_INPUT="${{ inputs.vllm-ascend }}"
|
||||
|
||||
if [[ "$VERSION_INPUT" == "latest" ]]; then
|
||||
TAGS=$(git ls-remote --tags --sort=-v:refname https://github.com/vllm-project/vllm-ascend "v*" | cut -f2 | sed 's|refs/tags/||')
|
||||
LATEST_TAG=$(echo "$TAGS" | head -n1)
|
||||
if [[ -z "$LATEST_TAG" ]]; then
|
||||
RESOLVED_VERSION="main"
|
||||
else
|
||||
RESOLVED_VERSION="$LATEST_TAG"
|
||||
fi
|
||||
else
|
||||
RESOLVED_VERSION="$VERSION_INPUT"
|
||||
fi
|
||||
echo "GHA_VLLM_ASCEND_VERSION=$RESOLVED_VERSION" >> $GITHUB_ENV
|
||||
|
||||
- name: Checkout vllm-project/vllm-ascend repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: vllm-project/vllm-ascend
|
||||
path: ./vllm-ascend
|
||||
ref: ${{ env.GHA_VLLM_ASCEND_VERSION }}
|
||||
|
||||
- name: Install vllm-project/vllm-ascend
|
||||
working-directory: ./vllm-ascend
|
||||
env:
|
||||
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
|
||||
run: |
|
||||
pip install -r requirements-dev.txt
|
||||
pip install -v -e .
|
||||
|
||||
- name: Get vLLM commit hash and URL
|
||||
working-directory: ./vllm-empty
|
||||
run: |
|
||||
VLLM_COMMIT=$(git rev-parse --short=7 HEAD)
|
||||
echo "VLLM_COMMIT=$VLLM_COMMIT" >> $GITHUB_ENV
|
||||
|
||||
- name: Get vLLM-Ascend commit hash and URL
|
||||
working-directory: ./vllm-ascend
|
||||
run: |
|
||||
VLLM_ASCEND_COMMIT=$(git rev-parse --short=7 HEAD)
|
||||
echo "VLLM_ASCEND_COMMIT=$VLLM_ASCEND_COMMIT" >> $GITHUB_ENV
|
||||
|
||||
- name: Collect version info
|
||||
run: |
|
||||
for dir in /usr/local/Ascend/ascend-toolkit/*; do
|
||||
dname=$(basename "$dir")
|
||||
if [ "$dname" != "latest" ]; then
|
||||
TOOLKIT_DIR="$dname"
|
||||
break
|
||||
fi
|
||||
done
|
||||
INFO_FILE="/usr/local/Ascend/ascend-toolkit/${TOOLKIT_DIR}/$(uname -i)-linux/ascend_toolkit_install.info"
|
||||
GHA_CANN_VERSION=$(grep "version=" "$INFO_FILE" \
|
||||
| head -n1 \
|
||||
| cut -d'=' -f2 \
|
||||
| tr -d '"')
|
||||
{
|
||||
echo "GHA_CANN_VERSION=$GHA_CANN_VERSION"
|
||||
pip show torch | grep "Version:" | awk '{print "GHA_TORCH_VERSION="$2}'
|
||||
pip show torch_npu | grep "Version:" | awk '{print "GHA_TORCH_NPU_VERSION="$2}'
|
||||
pip show vllm | grep "Version:" | awk '{print "GHA_VLLM_VERSION="$2}' | sed 's/+.*//'
|
||||
} >> "$GITHUB_ENV"
|
||||
|
||||
- name: Run accuracy test
|
||||
id: report
|
||||
env:
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
VLLM_VERSION: ${{ env.GHA_VLLM_VERSION }}
|
||||
VLLM_COMMIT: ${{ env.VLLM_COMMIT }}
|
||||
VLLM_ASCEND_VERSION: ${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}
|
||||
VLLM_ASCEND_COMMIT: ${{ env.VLLM_ASCEND_COMMIT }}
|
||||
CANN_VERSION: ${{ env.GHA_CANN_VERSION }}
|
||||
TORCH_VERSION: ${{ env.GHA_TORCH_VERSION }}
|
||||
TORCH_NPU_VERSION: ${{ env.GHA_TORCH_NPU_VERSION }}
|
||||
run: |
|
||||
model_base_name=$(basename ${{ inputs.model_name }})
|
||||
markdown_name="${model_base_name}"
|
||||
echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
|
||||
mkdir -p ./benchmarks/accuracy
|
||||
pytest -sv ./tests/e2e/models/test_lm_eval_correctness.py \
|
||||
--config ./tests/e2e/models/configs/${{ inputs.model_name }}.yaml
|
||||
|
||||
- name: Generate step summary
|
||||
if: ${{ always() }}
|
||||
run: |
|
||||
cat ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
- name: Upload Report
|
||||
if: ${{ inputs.upload == true }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: "report-${{ env.GHA_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}"
|
||||
path: ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md
|
||||
if-no-files-found: warn
|
||||
retention-days: 90
|
||||
overwrite: true
|
||||
199
.github.backup/workflows/_e2e_test.yaml
Normal file
199
.github.backup/workflows/_e2e_test.yaml
Normal file
@@ -0,0 +1,199 @@
|
||||
name: 'e2e test'
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
vllm:
|
||||
required: true
|
||||
type: string
|
||||
runner:
|
||||
required: true
|
||||
type: string
|
||||
image:
|
||||
required: true
|
||||
type: string
|
||||
type:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
e2e:
|
||||
name: singlecard
|
||||
runs-on: ${{ inputs.runner }}-1
|
||||
container:
|
||||
image: ${{ inputs.image }}
|
||||
env:
|
||||
VLLM_LOGGING_LEVEL: ERROR
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
steps:
|
||||
- name: Check npu and CANN info
|
||||
run: |
|
||||
npu-smi info
|
||||
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
|
||||
|
||||
- name: Config mirrors
|
||||
run: |
|
||||
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
|
||||
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
|
||||
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
|
||||
apt-get update -y
|
||||
apt install git -y
|
||||
|
||||
- name: Checkout vllm-project/vllm-ascend repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
apt-get -y install `cat packages.txt`
|
||||
apt-get -y install gcc g++ cmake libnuma-dev
|
||||
|
||||
- name: Checkout vllm-project/vllm repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: vllm-project/vllm
|
||||
ref: ${{ inputs.vllm }}
|
||||
path: ./vllm-empty
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Install vllm-project/vllm from source
|
||||
working-directory: ./vllm-empty
|
||||
run: |
|
||||
VLLM_TARGET_DEVICE=empty pip install -e .
|
||||
|
||||
- name: Install vllm-project/vllm-ascend
|
||||
env:
|
||||
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
|
||||
run: |
|
||||
pip install -r requirements-dev.txt
|
||||
pip install -v -e .
|
||||
|
||||
- name: Run vllm-project/vllm-ascend test
|
||||
env:
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
|
||||
if: ${{ inputs.type == 'light' }}
|
||||
run: |
|
||||
pytest -sv tests/e2e/singlecard/test_aclgraph.py
|
||||
pytest -sv tests/e2e/singlecard/test_quantization.py
|
||||
pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
|
||||
|
||||
- name: Run e2e test
|
||||
env:
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
|
||||
if: ${{ inputs.type == 'full' }}
|
||||
run: |
|
||||
# We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
|
||||
# the test separately.
|
||||
|
||||
pytest -sv tests/e2e/singlecard/test_aclgraph.py
|
||||
pytest -sv tests/e2e/singlecard/test_aclgraph_mem.py
|
||||
pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py
|
||||
pytest -sv tests/e2e/singlecard/test_bge_model.py
|
||||
pytest -sv tests/e2e/singlecard/test_camem.py
|
||||
pytest -sv tests/e2e/singlecard/test_chunked.py
|
||||
pytest -sv tests/e2e/singlecard/test_embedding.py
|
||||
pytest -sv tests/e2e/singlecard/test_embedding_aclgraph.py
|
||||
pytest -sv tests/e2e/singlecard/test_guided_decoding.py
|
||||
pytest -sv tests/e2e/singlecard/test_ilama_lora.py
|
||||
pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
|
||||
pytest -sv tests/e2e/singlecard/test_quantization.py
|
||||
pytest -sv tests/e2e/singlecard/test_sampler.py
|
||||
pytest -sv tests/e2e/singlecard/test_vlm.py
|
||||
|
||||
# ------------------------------------ v1 spec decode test ------------------------------------ #
|
||||
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
|
||||
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
|
||||
# Fix me: test_eagle_correctness OOM error
|
||||
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
|
||||
|
||||
pytest -sv tests/e2e/singlecard/ops/
|
||||
|
||||
e2e-2-cards:
|
||||
name: multicard
|
||||
runs-on: ${{ inputs.runner }}-2
|
||||
container:
|
||||
image: ${{ inputs.image }}
|
||||
env:
|
||||
VLLM_LOGGING_LEVEL: ERROR
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
steps:
|
||||
- name: Check npu and CANN info
|
||||
run: |
|
||||
npu-smi info
|
||||
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
|
||||
|
||||
- name: Config mirrors
|
||||
run: |
|
||||
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
|
||||
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
|
||||
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
|
||||
apt-get update -y
|
||||
apt install git -y
|
||||
|
||||
- name: Checkout vllm-project/vllm-ascend repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
apt-get -y install `cat packages.txt`
|
||||
apt-get -y install gcc g++ cmake libnuma-dev
|
||||
|
||||
- name: Checkout vllm-project/vllm repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: vllm-project/vllm
|
||||
ref: ${{ inputs.vllm }}
|
||||
path: ./vllm-empty
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Install vllm-project/vllm from source
|
||||
working-directory: ./vllm-empty
|
||||
run: |
|
||||
VLLM_TARGET_DEVICE=empty pip install -e .
|
||||
|
||||
- name: Install vllm-project/vllm-ascend
|
||||
env:
|
||||
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
|
||||
run: |
|
||||
pip install -r requirements-dev.txt
|
||||
pip install -v -e .
|
||||
|
||||
- name: Run vllm-project/vllm-ascend test (light)
|
||||
env:
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
if: ${{ inputs.type == 'light' }}
|
||||
run: |
|
||||
pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
|
||||
|
||||
- name: Run vllm-project/vllm-ascend test (full)
|
||||
env:
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
if: ${{ inputs.type == 'full' }}
|
||||
run: |
|
||||
pytest -sv tests/e2e/multicard/test_data_parallel.py
|
||||
pytest -sv tests/e2e/multicard/test_expert_parallel.py
|
||||
pytest -sv tests/e2e/multicard/test_external_launcher.py
|
||||
pytest -sv tests/e2e/multicard/test_single_request_aclgraph.py
|
||||
pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
|
||||
pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
|
||||
|
||||
# To avoid oom, we need to run the test in a single process.
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W8A8
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC_new_version
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC_old_version
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_flashcomm_v1
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight
|
||||
|
||||
pytest -sv tests/e2e/multicard/test_pipeline_parallel.py
|
||||
pytest -sv tests/e2e/multicard/test_prefix_caching.py
|
||||
pytest -sv tests/e2e/multicard/test_qwen3_moe.py
|
||||
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
|
||||
72
.github.backup/workflows/accuracy_test.yaml
Normal file
72
.github.backup/workflows/accuracy_test.yaml
Normal file
@@ -0,0 +1,72 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
|
||||
# This test will be triggered:
|
||||
# - PR labeled with: 'accuracy-test' & 'ready-for-test'
|
||||
name: ascend test / accuracy
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
types: [ labeled, synchronize ]
|
||||
|
||||
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
||||
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
||||
# It's used to activate ascend-toolkit environment variables.
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -el {0}
|
||||
|
||||
# only cancel in-progress runs of the same workflow
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
run:
|
||||
name: ""
|
||||
strategy:
|
||||
matrix:
|
||||
# Only top series models should be listed in here
|
||||
include:
|
||||
- runner: a2-1
|
||||
model_name: Qwen3-8B
|
||||
- runner: a2-1
|
||||
model_name: Qwen2.5-VL-7B-Instruct
|
||||
- runner: a2-1
|
||||
model_name: Qwen2-Audio-7B-Instruct
|
||||
- runner: a2-2
|
||||
model_name: Qwen3-30B-A3B
|
||||
- runner: a2-2
|
||||
model_name: Qwen3-VL-30B-A3B-Instruct
|
||||
- runner: a2-2
|
||||
model_name: DeepSeek-V2-Lite
|
||||
fail-fast: false
|
||||
# test will be triggered when tag 'accuracy-test' & 'ready-for-test'
|
||||
if: >-
|
||||
${{
|
||||
contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
|
||||
contains(github.event.pull_request.labels.*.name, 'ready-for-test')
|
||||
}}
|
||||
uses: ./.github/workflows/_accuracy_test.yaml
|
||||
with:
|
||||
vllm: v0.11.0
|
||||
runner: linux-aarch64-${{ matrix.runner }}
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
|
||||
model_name: ${{ matrix.model_name }}
|
||||
57
.github.backup/workflows/format_pr_body.yaml
Normal file
57
.github.backup/workflows/format_pr_body.yaml
Normal file
@@ -0,0 +1,57 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
|
||||
name: format / pr body
|
||||
|
||||
on:
|
||||
# The PR updated when PR opened and push new commits
|
||||
pull_request_target:
|
||||
types: [opened, synchronize]
|
||||
branches:
|
||||
- 'main'
|
||||
|
||||
permissions:
|
||||
pull-requests: write
|
||||
|
||||
jobs:
|
||||
update-description:
|
||||
name: update vLLM version
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
|
||||
- name: Get vLLM version
|
||||
run: |
|
||||
VLLM_COMMIT=v0.11.0
|
||||
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
|
||||
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
||||
|
||||
- name: Get vLLM release version
|
||||
run: |
|
||||
VLLM_VERSION=$(python3 docs/source/conf.py | jq .ci_vllm_version | tr -d '"')
|
||||
echo "VLLM_VERSION=$VLLM_VERSION" >> $GITHUB_ENV
|
||||
|
||||
- name: Update PR description
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
bash .github/format_pr_body.sh "${{ github.event.number }}" "${{ env.VLLM_VERSION }}" "${{ env.VLLM_COMMIT }}"
|
||||
135
.github.backup/workflows/image_310p_openeuler.yml
Normal file
135
.github.backup/workflows/image_310p_openeuler.yml
Normal file
@@ -0,0 +1,135 @@
|
||||
name: 'image / openEuler / 310p'
|
||||
# This is a docker build check and publish job:
|
||||
# 1. PR Triggered docker image build check
|
||||
# - is for image build check
|
||||
# - Enable on main/*-dev branch
|
||||
# - push: ${{ github.event_name != 'pull_request' }} ==> false
|
||||
# 2. branches push trigger image publish
|
||||
# - is for branch/dev/nightly image
|
||||
# - commits are merge into main/*-dev ==> vllm-ascend:main-310p-openeuler / vllm-ascend:*-dev-310p-openeuler
|
||||
# 3. tags push trigger image publish
|
||||
# - is for final release image
|
||||
# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3-310p-openeuler / vllm-ascend:v1.2.3rc1-310p-openeuler
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
paths:
|
||||
- '.github/workflows/image_310p_openeuler.yml'
|
||||
- 'Dockerfile.310p.openEuler'
|
||||
- 'vllm_ascend/**'
|
||||
- 'setup.py'
|
||||
- 'pyproject.toml'
|
||||
- 'requirements.txt'
|
||||
- 'cmake/**'
|
||||
- 'CMakeLists.txt'
|
||||
- 'csrc/**'
|
||||
types: [ labeled ]
|
||||
push:
|
||||
# Publish image when tagging, the Dockerfile in tag will be build as tag image
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
tags:
|
||||
- 'v*'
|
||||
paths:
|
||||
- '.github/workflows/image_310p_openeuler.yml'
|
||||
- 'Dockerfile.310p.openEuler'
|
||||
- 'vllm_ascend/**'
|
||||
- 'setup.py'
|
||||
- 'pyproject.toml'
|
||||
- 'requirements.txt'
|
||||
- 'cmake/**'
|
||||
- 'CMakeLists.txt'
|
||||
- 'csrc/**'
|
||||
|
||||
# only cancel in-progress runs of the same workflow
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: vllm-ascend image build
|
||||
# Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
|
||||
# Push event or PR with both 'ready' and 'ready-for-test' labels
|
||||
runs-on: >-
|
||||
${{
|
||||
github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
|
||||
'ubuntu-latest' ||
|
||||
'ubuntu-24.04-arm'
|
||||
}}
|
||||
if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Print
|
||||
run: |
|
||||
lscpu
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
# TODO(yikun): add more hub image and a note on release policy for container image
|
||||
images: |
|
||||
quay.io/ascend/vllm-ascend
|
||||
# Note for test case
|
||||
# https://github.com/marketplace/actions/docker-metadata-action#typeref
|
||||
# 1. branch job pulish per main/*-dev branch commits
|
||||
# 2. main and dev pull_request is build only, so the tag pr-N-310p-openeuler is fine
|
||||
# 3. only pep440 matched tag will be published:
|
||||
# - v0.7.1 --> v0.7.1-310p-openeuler
|
||||
# - pre/post/dev: v0.7.1rc1-310p-openeuler/v0.7.1rc1-310p-openeuler/v0.7.1rc1.dev1-310p-openeuler/v0.7.1.post1-310p-openeuler, no latest
|
||||
# which follow the rule from vLLM with prefix v
|
||||
# TODO(yikun): the post release might be considered as latest release
|
||||
tags: |
|
||||
type=ref,event=branch,suffix=-310p-openeuler
|
||||
type=ref,event=pr,suffix=-310p-openeuler
|
||||
type=pep440,pattern={{raw}},suffix=-310p-openeuler
|
||||
flavor:
|
||||
latest=false
|
||||
|
||||
- name: Free up disk space
|
||||
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
|
||||
with:
|
||||
tool-cache: true
|
||||
docker-images: false
|
||||
|
||||
- name: Build - Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Build - Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Publish - Login to Quay Container Registry
|
||||
if: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: quay.io
|
||||
username: ${{ vars.QUAY_USERNAME }}
|
||||
password: ${{ secrets.QUAY_PASSWORD }}
|
||||
|
||||
- name: Build and push 310p
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
platforms: >-
|
||||
${{
|
||||
github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
|
||||
'linux/amd64,linux/arm64' ||
|
||||
'linux/arm64'
|
||||
}}
|
||||
# use the current repo path as the build context, ensure .git is contained
|
||||
context: .
|
||||
# only trigger when tag, branch/main push
|
||||
push: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
file: Dockerfile.310p.openEuler
|
||||
build-args: |
|
||||
PIP_INDEX_URL=https://pypi.org/simple
|
||||
provenance: false
|
||||
131
.github.backup/workflows/image_310p_ubuntu.yml
Normal file
131
.github.backup/workflows/image_310p_ubuntu.yml
Normal file
@@ -0,0 +1,131 @@
|
||||
name: 'image / Ubuntu / 310p'
|
||||
# This is a docker build check and publish job:
|
||||
# 1. PR Triggered docker image build check
|
||||
# - is for image build check
|
||||
# - Enable on main/*-dev branch
|
||||
# - push: ${{ github.event_name != 'pull_request' }} ==> false
|
||||
# 2. branches push trigger image publish
|
||||
# - is for branch/dev/nightly image
|
||||
# - commits are merge into main/*-dev ==> vllm-ascend:main-310p / vllm-ascend:*-dev-310p
|
||||
# 3. tags push trigger image publish
|
||||
# - is for final release image
|
||||
# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3-310p / vllm-ascend:v1.2.3rc1-310p
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
paths:
|
||||
- '.github/workflows/image_310p_ubuntu.yml'
|
||||
- 'Dockerfile.310p'
|
||||
- 'vllm_ascend/**'
|
||||
- 'setup.py'
|
||||
- 'pyproject.toml'
|
||||
- 'requirements.txt'
|
||||
- 'cmake/**'
|
||||
- 'CMakeLists.txt'
|
||||
- 'csrc/**'
|
||||
types: [ labeled ]
|
||||
push:
|
||||
# Publish image when tagging, the Dockerfile in tag will be build as tag image
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
tags:
|
||||
- 'v*'
|
||||
paths:
|
||||
- '.github/workflows/image_310p_ubuntu.yml'
|
||||
- 'Dockerfile.310p'
|
||||
- 'vllm_ascend/**'
|
||||
- 'setup.py'
|
||||
- 'pyproject.toml'
|
||||
- 'requirements.txt'
|
||||
- 'cmake/**'
|
||||
- 'CMakeLists.txt'
|
||||
- 'csrc/**'
|
||||
|
||||
# only cancel in-progress runs of the same workflow
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
|
||||
build:
|
||||
name: vllm-ascend image build
|
||||
# Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
|
||||
# Push event or PR with both 'ready' and 'ready-for-test' labels
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Print
|
||||
run: |
|
||||
lscpu
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
# TODO(yikun): add more hub image and a note on release policy for container image
|
||||
images: |
|
||||
quay.io/ascend/vllm-ascend
|
||||
# Note for test case
|
||||
# https://github.com/marketplace/actions/docker-metadata-action#typeref
|
||||
# 1. branch job pulish per main/*-dev branch commits
|
||||
# 2. main and dev pull_request is build only, so the tag pr-N is fine
|
||||
# 3. only pep440 matched tag will be published:
|
||||
# - v0.7.1 --> v0.7.1-310p
|
||||
# - pre/post/dev: v0.7.1rc1-310p/v0.7.1rc1-310p/v0.7.1rc1.dev1-310p/v0.7.1.post1-310p, no latest
|
||||
# which follow the rule from vLLM with prefix v
|
||||
# TODO(yikun): the post release might be considered as latest release
|
||||
tags: |
|
||||
type=ref,event=branch,suffix=-310p
|
||||
type=ref,event=pr,suffix=-310p
|
||||
type=pep440,pattern={{raw}},suffix=-310p
|
||||
flavor:
|
||||
latest=false
|
||||
|
||||
- name: Free up disk space
|
||||
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
|
||||
with:
|
||||
tool-cache: true
|
||||
docker-images: false
|
||||
|
||||
- name: Build - Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Build - Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Publish - Login to Quay Container Registry
|
||||
if: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: quay.io
|
||||
username: ${{ vars.QUAY_USERNAME }}
|
||||
password: ${{ secrets.QUAY_PASSWORD }}
|
||||
|
||||
- name: Build and push 310p
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
platforms: >-
|
||||
${{
|
||||
github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
|
||||
'linux/amd64,linux/arm64' ||
|
||||
'linux/amd64'
|
||||
}}
|
||||
# use the current repo path as the build context, ensure .git is contained
|
||||
context: .
|
||||
file: Dockerfile.310p
|
||||
# only trigger when tag, branch/main push
|
||||
push: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
build-args: |
|
||||
PIP_INDEX_URL=https://pypi.org/simple
|
||||
provenance: false
|
||||
135
.github.backup/workflows/image_a3_openeuler.yml
Normal file
135
.github.backup/workflows/image_a3_openeuler.yml
Normal file
@@ -0,0 +1,135 @@
|
||||
name: 'image / openEuler / a3'
|
||||
# This is a docker build check and publish job:
|
||||
# 1. PR Triggered docker image build check
|
||||
# - is for image build check
|
||||
# - Enable on main/*-dev branch
|
||||
# - push: ${{ github.event_name != 'pull_request' }} ==> false
|
||||
# 2. branches push trigger image publish
|
||||
# - is for branch/dev/nightly image
|
||||
# - commits are merge into main/*-dev ==> vllm-ascend:main / vllm-ascend:*-dev
|
||||
# 3. tags push trigger image publish
|
||||
# - is for final release image
|
||||
# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3-a3-openeuler / vllm-ascend:v1.2.3rc1-a3-openeuler
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
paths:
|
||||
- '.github/workflows/image_a3_openeuler.yml'
|
||||
- 'Dockerfile.a3.openEuler'
|
||||
- 'vllm_ascend/**'
|
||||
- 'setup.py'
|
||||
- 'pyproject.toml'
|
||||
- 'requirements.txt'
|
||||
- 'cmake/**'
|
||||
- 'CMakeLists.txt'
|
||||
- 'csrc/**'
|
||||
types: [ labeled ]
|
||||
push:
|
||||
# Publish image when tagging, the Dockerfile in tag will be build as tag image
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
tags:
|
||||
- 'v*'
|
||||
paths:
|
||||
- '.github/workflows/image_a3_openeuler.yml'
|
||||
- 'Dockerfile.a3.openEuler'
|
||||
- 'vllm_ascend/**'
|
||||
- 'setup.py'
|
||||
- 'pyproject.toml'
|
||||
- 'requirements.txt'
|
||||
- 'cmake/**'
|
||||
- 'CMakeLists.txt'
|
||||
- 'csrc/**'
|
||||
|
||||
# only cancel in-progress runs of the same workflow
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: vllm-ascend image build
|
||||
# Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
|
||||
# Push event or PR with both 'ready' and 'ready-for-test' labels
|
||||
runs-on: >-
|
||||
${{
|
||||
github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
|
||||
'ubuntu-latest' ||
|
||||
'ubuntu-24.04-arm'
|
||||
}}
|
||||
if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Print
|
||||
run: |
|
||||
lscpu
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
# TODO(yikun): add more hub image and a note on release policy for container image
|
||||
images: |
|
||||
quay.io/ascend/vllm-ascend
|
||||
# Note for test case
|
||||
# https://github.com/marketplace/actions/docker-metadata-action#typeref
|
||||
# 1. branch job pulish per main/*-dev branch commits
|
||||
# 2. main and dev pull_request is build only, so the tag pr-N-a3-openeuler is fine
|
||||
# 3. only pep440 matched tag will be published:
|
||||
# - v0.7.1 --> v0.7.1-a3-openeuler
|
||||
# - pre/post/dev: v0.7.1rc1-a3-openeuler/v0.7.1rc1-a3-openeuler/v0.7.1rc1.dev1-a3-openeuler/v0.7.1.post1-a3-openeuler, no latest
|
||||
# which follow the rule from vLLM with prefix v
|
||||
# TODO(yikun): the post release might be considered as latest release
|
||||
tags: |
|
||||
type=ref,event=branch,suffix=-a3-openeuler
|
||||
type=ref,event=pr,suffix=-a3-openeuler
|
||||
type=pep440,pattern={{raw}},suffix=-a3-openeuler
|
||||
flavor:
|
||||
latest=false
|
||||
|
||||
- name: Free up disk space
|
||||
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
|
||||
with:
|
||||
tool-cache: true
|
||||
docker-images: false
|
||||
|
||||
- name: Build - Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Build - Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Publish - Login to Quay Container Registry
|
||||
if: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: quay.io
|
||||
username: ${{ vars.QUAY_USERNAME }}
|
||||
password: ${{ secrets.QUAY_PASSWORD }}
|
||||
|
||||
- name: Build and push a3
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
platforms: >-
|
||||
${{
|
||||
github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
|
||||
'linux/amd64,linux/arm64' ||
|
||||
'linux/arm64'
|
||||
}}
|
||||
# use the current repo path as the build context, ensure .git is contained
|
||||
context: .
|
||||
# only trigger when tag, branch/main push
|
||||
push: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
file: Dockerfile.a3.openEuler
|
||||
build-args: |
|
||||
PIP_INDEX_URL=https://pypi.org/simple
|
||||
provenance: false
|
||||
|
||||
131
.github.backup/workflows/image_a3_ubuntu.yml
Normal file
131
.github.backup/workflows/image_a3_ubuntu.yml
Normal file
@@ -0,0 +1,131 @@
|
||||
name: 'image / Ubuntu / a3'
|
||||
# This is a docker build check and publish job:
|
||||
# 1. PR Triggered docker image build check
|
||||
# - is for image build check
|
||||
# - Enable on main/*-dev branch
|
||||
# - push: ${{ github.event_name != 'pull_request' }} ==> false
|
||||
# 2. branches push trigger image publish
|
||||
# - is for branch/dev/nightly image
|
||||
# - commits are merge into main/*-dev ==> vllm-ascend:main / vllm-ascend:*-dev
|
||||
# 3. tags push trigger image publish
|
||||
# - is for final release image
|
||||
# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3-a3|vllm-ascend:v1.2.3rc1-a3
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
paths:
|
||||
- '.github/workflows/image_a3_ubuntu.yml'
|
||||
- 'Dockerfile.a3'
|
||||
- 'vllm_ascend/**'
|
||||
- 'setup.py'
|
||||
- 'pyproject.toml'
|
||||
- 'requirements.txt'
|
||||
- 'cmake/**'
|
||||
- 'CMakeLists.txt'
|
||||
- 'csrc/**'
|
||||
types: [ labeled ]
|
||||
push:
|
||||
# Publish image when tagging, the Dockerfile in tag will be build as tag image
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
tags:
|
||||
- 'v*'
|
||||
paths:
|
||||
- '.github/workflows/image_a3_ubuntu.yml'
|
||||
- 'Dockerfile.a3'
|
||||
- 'vllm_ascend/**'
|
||||
- 'setup.py'
|
||||
- 'pyproject.toml'
|
||||
- 'requirements.txt'
|
||||
- 'cmake/**'
|
||||
- 'CMakeLists.txt'
|
||||
- 'csrc/**'
|
||||
|
||||
# only cancel in-progress runs of the same workflow
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
|
||||
build:
|
||||
name: vllm-ascend image build
|
||||
# Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
|
||||
# Push event or PR with both 'ready' and 'ready-for-test' labels
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Print
|
||||
run: |
|
||||
lscpu
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
# TODO(yikun): add more hub image and a note on release policy for container image
|
||||
images: |
|
||||
quay.io/ascend/vllm-ascend
|
||||
# Note for test case
|
||||
# https://github.com/marketplace/actions/docker-metadata-action#typeref
|
||||
# 1. branch job pulish per main/*-dev branch commits
|
||||
# 2. main and dev pull_request is build only, so the tag pr-N-a3 is fine
|
||||
# 3. only pep440 matched tag will be published:
|
||||
# - v0.7.1 --> v0.7.1-a3
|
||||
# - pre/post/dev: v0.7.1rc1-a3/v0.7.1rc1-a3/v0.7.1rc1.dev1-a3/v0.7.1.post1-a3, no latest
|
||||
# which follow the rule from vLLM with prefix v
|
||||
# TODO(yikun): the post release might be considered as latest release
|
||||
tags: |
|
||||
type=ref,event=branch,suffix=-a3
|
||||
type=ref,event=pr,suffix=-a3
|
||||
type=pep440,pattern={{raw}},suffix=-a3
|
||||
flavor:
|
||||
latest=false
|
||||
|
||||
- name: Free up disk space
|
||||
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
|
||||
with:
|
||||
tool-cache: true
|
||||
docker-images: false
|
||||
|
||||
- name: Build - Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Build - Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Publish - Login to Quay Container Registry
|
||||
if: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: quay.io
|
||||
username: ${{ vars.QUAY_USERNAME }}
|
||||
password: ${{ secrets.QUAY_PASSWORD }}
|
||||
|
||||
- name: Build and push a3
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
platforms: >-
|
||||
${{
|
||||
github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
|
||||
'linux/amd64,linux/arm64' ||
|
||||
'linux/amd64'
|
||||
}}
|
||||
# use the current repo path as the build context, ensure .git is contained
|
||||
context: .
|
||||
file: Dockerfile.a3
|
||||
# only trigger when tag, branch/main push
|
||||
push: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
build-args: |
|
||||
PIP_INDEX_URL=https://pypi.org/simple
|
||||
provenance: false
|
||||
|
||||
134
.github.backup/workflows/image_openeuler.yml
Normal file
134
.github.backup/workflows/image_openeuler.yml
Normal file
@@ -0,0 +1,134 @@
|
||||
name: 'image / openEuler'
|
||||
# This is a docker build check and publish job:
|
||||
# 1. PR Triggered docker image build check
|
||||
# - is for image build check
|
||||
# - Enable on main/*-dev branch
|
||||
# - push: ${{ github.event_name != 'pull_request' }} ==> false
|
||||
# 2. branches push trigger image publish
|
||||
# - is for branch/dev/nightly image
|
||||
# - commits are merge into main/*-dev ==> vllm-ascend:main-openeuler / vllm-ascend:*-dev-openeuler
|
||||
# - is for final release image
|
||||
# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3-openeuler / vllm-ascend:v1.2.3rc1-openeuler
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
paths:
|
||||
- '.github/workflows/image_openeuler.yml'
|
||||
- 'Dockerfile.openEuler'
|
||||
- 'vllm_ascend/**'
|
||||
- 'setup.py'
|
||||
- 'pyproject.toml'
|
||||
- 'requirements.txt'
|
||||
- 'cmake/**'
|
||||
- 'CMakeLists.txt'
|
||||
- 'csrc/**'
|
||||
types: [ labeled ]
|
||||
push:
|
||||
# Publish image when tagging, the Dockerfile in tag will be build as tag image
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
tags:
|
||||
- 'v*'
|
||||
paths:
|
||||
- '.github/workflows/image_openeuler.yml'
|
||||
- 'Dockerfile.openEuler'
|
||||
- 'vllm_ascend/**'
|
||||
- 'setup.py'
|
||||
- 'pyproject.toml'
|
||||
- 'requirements.txt'
|
||||
- 'cmake/**'
|
||||
- 'CMakeLists.txt'
|
||||
- 'csrc/**'
|
||||
|
||||
# only cancel in-progress runs of the same workflow
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: vllm-ascend image build
|
||||
# Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
|
||||
# Push event or PR with both 'ready' and 'ready-for-test' labels
|
||||
runs-on: >-
|
||||
${{
|
||||
github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
|
||||
'ubuntu-latest' ||
|
||||
'ubuntu-24.04-arm'
|
||||
}}
|
||||
if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Print
|
||||
run: |
|
||||
lscpu
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
# TODO(yikun): add more hub image and a note on release policy for container image
|
||||
images: |
|
||||
quay.io/ascend/vllm-ascend
|
||||
# Note for test case
|
||||
# https://github.com/marketplace/actions/docker-metadata-action#typeref
|
||||
# 1. branch job pulish per main/*-dev branch commits
|
||||
# 2. main and dev pull_request is build only, so the tag pr-N-openeuler is fine
|
||||
# 3. only pep440 matched tag will be published:
|
||||
# - v0.7.1 --> v0.7.1-openeuler
|
||||
# - pre/post/dev: v0.7.1rc1-openeuler/v0.7.1rc1-openeuler/v0.7.1rc1.dev1-openeuler/v0.7.1.post1-openeuler, no latest
|
||||
# which follow the rule from vLLM with prefix v
|
||||
# TODO(yikun): the post release might be considered as latest release
|
||||
tags: |
|
||||
type=ref,event=branch,suffix=-openeuler
|
||||
type=ref,event=pr,suffix=-openeuler
|
||||
type=pep440,pattern={{raw}},suffix=-openeuler
|
||||
flavor:
|
||||
latest=true
|
||||
|
||||
- name: Free up disk space
|
||||
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
|
||||
with:
|
||||
tool-cache: true
|
||||
docker-images: false
|
||||
|
||||
- name: Build - Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Build - Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Publish - Login to Quay Container Registry
|
||||
if: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: quay.io
|
||||
username: ${{ vars.QUAY_USERNAME }}
|
||||
password: ${{ secrets.QUAY_PASSWORD }}
|
||||
|
||||
- name: Build and push 910b
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
platforms: >-
|
||||
${{
|
||||
github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
|
||||
'linux/amd64,linux/arm64' ||
|
||||
'linux/arm64'
|
||||
}}
|
||||
# use the current repo path as the build context, ensure .git is contained
|
||||
context: .
|
||||
# only trigger when tag, branch/main push
|
||||
push: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
file: Dockerfile.openEuler
|
||||
build-args: |
|
||||
PIP_INDEX_URL=https://pypi.org/simple
|
||||
provenance: false
|
||||
131
.github.backup/workflows/image_ubuntu.yml
Normal file
131
.github.backup/workflows/image_ubuntu.yml
Normal file
@@ -0,0 +1,131 @@
|
||||
name: 'image / Ubuntu'
|
||||
# This is a docker build check and publish job:
|
||||
# 1. PR Triggered docker image build check
|
||||
# - is for image build check
|
||||
# - Enable on main/*-dev branch
|
||||
# - push: ${{ github.event_name != 'pull_request' }} ==> false
|
||||
# 2. branches push trigger image publish
|
||||
# - is for branch/dev/nightly image
|
||||
# - commits are merge into main/*-dev ==> vllm-ascend:main / vllm-ascend:*-dev
|
||||
# 3. tags push trigger image publish
|
||||
# - is for final release image
|
||||
# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3 / vllm-ascend:v1.2.3rc1
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
paths:
|
||||
- '.github/workflows/image_ubuntu.yml'
|
||||
- 'Dockerfile'
|
||||
- 'vllm_ascend/**'
|
||||
- 'setup.py'
|
||||
- 'pyproject.toml'
|
||||
- 'requirements.txt'
|
||||
- 'cmake/**'
|
||||
- 'CMakeLists.txt'
|
||||
- 'csrc/**'
|
||||
types: [ labeled ]
|
||||
push:
|
||||
# Publish image when tagging, the Dockerfile in tag will be build as tag image
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
tags:
|
||||
- 'v*'
|
||||
paths:
|
||||
- '.github/workflows/image_ubuntu.yml'
|
||||
- 'Dockerfile'
|
||||
- 'vllm_ascend/**'
|
||||
- 'setup.py'
|
||||
- 'pyproject.toml'
|
||||
- 'requirements.txt'
|
||||
- 'cmake/**'
|
||||
- 'CMakeLists.txt'
|
||||
- 'csrc/**'
|
||||
|
||||
# only cancel in-progress runs of the same workflow
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
|
||||
build:
|
||||
name: vllm-ascend image build
|
||||
# Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
|
||||
# Push event or PR with both 'ready' and 'ready-for-test' labels
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Print
|
||||
run: |
|
||||
lscpu
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
# TODO(yikun): add more hub image and a note on release policy for container image
|
||||
images: |
|
||||
quay.io/ascend/vllm-ascend
|
||||
# Note for test case
|
||||
# https://github.com/marketplace/actions/docker-metadata-action#typeref
|
||||
# 1. branch job pulish per main/*-dev branch commits
|
||||
# 2. main and dev pull_request is build only, so the tag pr-N is fine
|
||||
# 3. only pep440 matched tag will be published:
|
||||
# - v0.7.1 --> v0.7.1, latest
|
||||
# - pre/post/dev: v0.7.1rc1/v0.7.1rc1/v0.7.1rc1.dev1/v0.7.1.post1, no latest
|
||||
# which follow the rule from vLLM with prefix v
|
||||
# TODO(yikun): the post release might be considered as latest release
|
||||
tags: |
|
||||
type=ref,event=branch
|
||||
type=ref,event=pr
|
||||
type=pep440,pattern={{raw}}
|
||||
flavor:
|
||||
latest=true
|
||||
|
||||
- name: Free up disk space
|
||||
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
|
||||
with:
|
||||
tool-cache: true
|
||||
docker-images: false
|
||||
|
||||
- name: Build - Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Build - Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Publish - Login to Quay Container Registry
|
||||
if: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: quay.io
|
||||
username: ${{ vars.QUAY_USERNAME }}
|
||||
password: ${{ secrets.QUAY_PASSWORD }}
|
||||
|
||||
- name: Build and push 910b
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
platforms: >-
|
||||
${{
|
||||
github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
|
||||
'linux/amd64,linux/arm64' ||
|
||||
'linux/amd64'
|
||||
}}
|
||||
# use the current repo path as the build context, ensure .git is contained
|
||||
context: .
|
||||
file: Dockerfile
|
||||
# only trigger when tag, branch/main push
|
||||
push: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
build-args: |
|
||||
PIP_INDEX_URL=https://pypi.org/simple
|
||||
provenance: false
|
||||
20
.github.backup/workflows/label_merge_conflict.yml
Normal file
20
.github.backup/workflows/label_merge_conflict.yml
Normal file
@@ -0,0 +1,20 @@
|
||||
name: "Merge Conflict Labeler"
|
||||
on:
|
||||
# So that PRs touching the same files as the push are updated
|
||||
push:
|
||||
# So that the `dirtyLabel` is removed if conflicts are resolve
|
||||
# We recommend `pull_request_target` so that github secrets are available.
|
||||
# In `pull_request` we wouldn't be able to change labels of fork PRs
|
||||
pull_request_target:
|
||||
types: [synchronize]
|
||||
|
||||
jobs:
|
||||
main:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: check if prs are dirty
|
||||
uses: eps1lon/actions-label-merge-conflict@v3
|
||||
with:
|
||||
dirtyLabel: "merge-conflicts"
|
||||
repoToken: "${{ secrets.GITHUB_TOKEN }}"
|
||||
commentOnDirty: "This pull request has conflicts, please resolve those before we can evaluate the pull request."
|
||||
18
.github.backup/workflows/labeler.yml
Normal file
18
.github.backup/workflows/labeler.yml
Normal file
@@ -0,0 +1,18 @@
|
||||
name: Pull Request Labeler
|
||||
|
||||
on: pull_request_target
|
||||
|
||||
jobs:
|
||||
label:
|
||||
name: Label
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
steps:
|
||||
- name: Label the PR
|
||||
uses: actions/labeler@v6
|
||||
with:
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
configuration-path: .github/labeler.yml
|
||||
sync-labels: true
|
||||
17
.github.backup/workflows/matchers/actionlint.json
Normal file
17
.github.backup/workflows/matchers/actionlint.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"problemMatcher": [
|
||||
{
|
||||
"owner": "actionlint",
|
||||
"pattern": [
|
||||
{
|
||||
"regexp": "^(?:\\x1b\\[\\d+m)?(.+?)(?:\\x1b\\[\\d+m)*:(?:\\x1b\\[\\d+m)*(\\d+)(?:\\x1b\\[\\d+m)*:(?:\\x1b\\[\\d+m)*(\\d+)(?:\\x1b\\[\\d+m)*: (?:\\x1b\\[\\d+m)*(.+?)(?:\\x1b\\[\\d+m)* \\[(.+?)\\]$",
|
||||
"file": 1,
|
||||
"line": 2,
|
||||
"column": 3,
|
||||
"message": 4,
|
||||
"code": 5
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
16
.github.backup/workflows/matchers/mypy.json
Normal file
16
.github.backup/workflows/matchers/mypy.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"problemMatcher": [
|
||||
{
|
||||
"owner": "mypy",
|
||||
"pattern": [
|
||||
{
|
||||
"regexp": "^(.+):(\\d+):\\s(error|warning):\\s(.+)$",
|
||||
"file": 1,
|
||||
"line": 2,
|
||||
"severity": 3,
|
||||
"message": 4
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
17
.github.backup/workflows/matchers/ruff.json
Normal file
17
.github.backup/workflows/matchers/ruff.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"problemMatcher": [
|
||||
{
|
||||
"owner": "ruff",
|
||||
"pattern": [
|
||||
{
|
||||
"regexp": "^(.+?):(\\d+):(\\d+): (\\w+): (.+)$",
|
||||
"file": 1,
|
||||
"line": 2,
|
||||
"column": 3,
|
||||
"code": 4,
|
||||
"message": 5
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
118
.github.backup/workflows/multi_node_test.yaml
Normal file
118
.github.backup/workflows/multi_node_test.yaml
Normal file
@@ -0,0 +1,118 @@
|
||||
name: 'e2e test / multi-dp'
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 */4 * * *"
|
||||
workflow_dispatch:
|
||||
|
||||
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
||||
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
||||
# It's used to activate ascend-toolkit environment variables.
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -el {0}
|
||||
|
||||
# only cancel in-progress runs of the same workflow
|
||||
# and ignore the lint / 8 cards test type
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
e2e:
|
||||
# This is a runner with no NPU for k8s controller
|
||||
runs-on: linux-aarch64-a3-0
|
||||
container:
|
||||
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
|
||||
env:
|
||||
KUBECONFIG: /tmp/kubeconfig
|
||||
KUBECTL: /root/.cache/.kube/kubectl
|
||||
NAMESPACE: vllm-project
|
||||
LEADER_POD: vllm-0
|
||||
steps:
|
||||
- name: Install system denpendencies
|
||||
run: |
|
||||
# configure apt and pip source
|
||||
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
|
||||
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
|
||||
|
||||
apt-get update -y && apt-get install -y git curl
|
||||
|
||||
TOKEN=`echo -n "x-access-token:${{ secrets.ADMIN_PTA }}" | base64`
|
||||
git config --global http.https://gh-proxy.test.osinfra.cn/.extraheader "AUTHORIZATION: basic $TOKEN"
|
||||
|
||||
- name: Install kubectl
|
||||
run: |
|
||||
install -o root -g root -m 0755 $KUBECTL /usr/local/bin/kubectl
|
||||
|
||||
# get kubeconfig from secret
|
||||
echo "${{ secrets.KUBECONFIG_B64 }}" | base64 -d > $KUBECONFIG
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Prepare scripts
|
||||
run: |
|
||||
# prepare for lws entrypoint scripts
|
||||
install -D tests/e2e/multi_node/scripts/run.sh /root/.cache/tests/run.sh
|
||||
|
||||
- name: Launch cluster
|
||||
run: |
|
||||
kubectl apply -f tests/e2e/multi_node/scripts/lws.yaml
|
||||
|
||||
- name: Waiting for pod ready
|
||||
run: |
|
||||
echo "waiting for Pod [$LEADER_POD] in namespace [$NAMESPACE] to Ready..."
|
||||
|
||||
while true; do
|
||||
# get pod status
|
||||
READY_STATUS=$(kubectl get pod "$LEADER_POD" -n "$NAMESPACE" -o jsonpath='{.status.containerStatuses[*].ready}')
|
||||
|
||||
if [[ "$READY_STATUS" == "true" ]]; then
|
||||
echo "✅ Pod [$LEADER_POD] is Ready!"
|
||||
break
|
||||
else
|
||||
echo "Pod [$LEADER_POD] not ready, waiting..."
|
||||
sleep 3
|
||||
fi
|
||||
done
|
||||
|
||||
- name: Stream logs and monitor pod health
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
echo "🚀 Start streaming logs for Pod [$LEADER_POD] ..."
|
||||
kubectl logs -f "$LEADER_POD" -n "$NAMESPACE" &
|
||||
LOG_PID=$!
|
||||
|
||||
echo "Start monitoring Pod [$LEADER_POD] status ..."
|
||||
while true; do
|
||||
STATUS=$(kubectl get pod "$LEADER_POD" -n "$NAMESPACE" -o jsonpath='{.status.phase}')
|
||||
if [[ "$STATUS" != "Running" && "$STATUS" != "Succeeded" ]]; then
|
||||
echo "❌ Pod [$LEADER_POD] exited abnormally with status: $STATUS"
|
||||
kubectl describe pod "$LEADER_POD" -n "$NAMESPACE" || true
|
||||
kubectl logs "$LEADER_POD" -n "$NAMESPACE" --previous --all-containers || true
|
||||
kill $LOG_PID || true
|
||||
exit 1
|
||||
fi
|
||||
sleep 5
|
||||
done &
|
||||
|
||||
MONITOR_PID=$!
|
||||
wait $LOG_PID || true
|
||||
kill $MONITOR_PID || true
|
||||
|
||||
- name: Generate summary
|
||||
if: always()
|
||||
run: |
|
||||
if [ -f "/root/.cache/test_summary.md" ]; then
|
||||
cat /root/.cache/test_summary.md >> "$GITHUB_STEP_SUMMARY"
|
||||
else
|
||||
echo "No summary file found." >> "$GITHUB_STEP_SUMMARY"
|
||||
fi
|
||||
|
||||
- name: Post process
|
||||
if: always()
|
||||
run: |
|
||||
kubectl get pods -n $NAMESPACE
|
||||
kubectl delete -f tests/e2e/multi_node/scripts/lws.yaml
|
||||
206
.github.backup/workflows/nightly_benchmarks.yaml
Normal file
206
.github.backup/workflows/nightly_benchmarks.yaml
Normal file
@@ -0,0 +1,206 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
name: 'ascend test / performance'
|
||||
# This workflow runs nightly benchmarks for vllm-ascend.
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Run benchmarks at 20:00 and 03:00 Beijing time (UTC+8)
|
||||
- cron: "0 12 * * *"
|
||||
- cron: "0 19 * * *"
|
||||
|
||||
workflow_dispatch:
|
||||
# Allow manual triggering of the workflow
|
||||
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
|
||||
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
||||
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
||||
# It's used to activate ascend-toolkit environment variables.
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -el {0}
|
||||
|
||||
# only 1 job can runs on static-8-01-cards
|
||||
concurrency:
|
||||
group: static-8-01-cards
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
test:
|
||||
if: ${{ contains(github.event.pull_request.labels.*.name, 'performance-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
|
||||
|
||||
name: Benchmarks/vLLM=${{ matrix.vllm_branch }}, vLLM-Ascend=${{ matrix.vllm_ascend_branch }}, use_v1=${{ matrix.vllm_use_v1 }}
|
||||
runs-on: 'linux-arm64-npu-static-8'
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- vllm_branch: v0.11.0
|
||||
vllm_ascend_branch: main
|
||||
vllm_use_v1: 1
|
||||
max-parallel: 1
|
||||
container:
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
|
||||
volumes:
|
||||
- /usr/local/dcmi:/usr/local/dcmi
|
||||
- /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
|
||||
- /usr/local/Ascend/driver/:/usr/local/Ascend/driver/
|
||||
# Use self-host cache speed up pip and model download
|
||||
- /home/action/.cache:/github/home/.cache/
|
||||
options: >-
|
||||
--device /dev/davinci0
|
||||
--device /dev/davinci1
|
||||
--device /dev/davinci_manager
|
||||
--device /dev/devmm_svm
|
||||
--device /dev/hisi_hdc
|
||||
env:
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
ES_OM_DOMAIN: ${{ secrets.ES_OM_DOMAIN }}
|
||||
ES_OM_AUTHORIZATION: ${{ secrets.ES_OM_AUTHORIZATION }}
|
||||
VLLM_USE_V1: ${{ matrix.vllm_use_v1 }}
|
||||
steps:
|
||||
- name: Check npu and CANN info
|
||||
run: |
|
||||
npu-smi info
|
||||
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
|
||||
|
||||
- name: Config mirrors
|
||||
run: |
|
||||
# keep using tuna's proxy since linux-arm64-npu-static-8 is in another region
|
||||
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
|
||||
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
|
||||
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
apt-get update -y
|
||||
apt-get -y install git jq wget curl lsof gcc g++ cmake libnuma-dev
|
||||
|
||||
- name: Config git
|
||||
run: |
|
||||
git config --global --add safe.directory "$GITHUB_WORKSPACE"
|
||||
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
|
||||
|
||||
- name: Checkout vllm-project/vllm-ascend repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Checkout vllm-project/vllm repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: vllm-project/vllm
|
||||
path: ./vllm-empty
|
||||
ref: ${{ matrix.vllm_branch }}
|
||||
|
||||
- name: Install vllm-project/vllm from source
|
||||
working-directory: ./vllm-empty
|
||||
run: |
|
||||
VLLM_TARGET_DEVICE=empty pip install -e .
|
||||
|
||||
- name: Install vllm-project/vllm-ascend
|
||||
env:
|
||||
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
|
||||
run: |
|
||||
pip install -e .
|
||||
pip install -r benchmarks/requirements-bench.txt
|
||||
|
||||
- name: Run current commit benchmarks
|
||||
if: github.event_name != 'schedule' && github.event_name != 'workflow_dispatch'
|
||||
run: |
|
||||
# Sometimes we only want to run benchmarks on the current commit
|
||||
# This is useful for debugging or a release benchmark
|
||||
bash benchmarks/scripts/run-performance-benchmarks.sh
|
||||
# Convert the benchmark results to markdown format
|
||||
python3 benchmarks/scripts/convert_json_to_markdown.py
|
||||
|
||||
- name: Generate step summary
|
||||
if: github.event_name != 'schedule' && github.event_name != 'workflow_dispatch'
|
||||
run: |
|
||||
cat ./benchmarks/results/benchmark_results.md >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
- name: Upload benchmark artifacts
|
||||
if: github.event_name != 'schedule' && github.event_name != 'workflow_dispatch'
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: "benchmark-performance-${{ matrix.vllm_branch }}-${{ matrix.vllm_ascend_branch }}-report"
|
||||
path: ./benchmarks/results/benchmark_results.md
|
||||
if-no-files-found: warn
|
||||
retention-days: 90
|
||||
overwrite: true
|
||||
|
||||
- name: Install elastic_tool
|
||||
if: github.event_name != 'pull_request'
|
||||
run: |
|
||||
pip install escli-tool==0.2.3
|
||||
|
||||
- name: Collect pr info from vllm-project/vllm-ascend
|
||||
if: github.event_name != 'pull_request'
|
||||
run: |
|
||||
# Only get the pull request which may influences performance
|
||||
git log --pretty=format:"%H %s" -- '**/*.py' ':!docs/*' ':!tests/*' ':!examples/*' ':!benchmarks/*' > commit_log.txt
|
||||
escli check commit_log.txt
|
||||
|
||||
- name: Prepare benchmark script in advance
|
||||
if: github.event_name != 'pull_request'
|
||||
# This is for the benchmark iteration, which will change the benchmark scripts while checkouting each commit.
|
||||
# We need ensure the benchmark scripts always available.
|
||||
run: |
|
||||
# Prepare the benchmark script in advance
|
||||
mkdir -p /github/home/benchmarks
|
||||
cp -r benchmarks/* /github/home/benchmarks/
|
||||
|
||||
- name: Run benchmark iteration
|
||||
env:
|
||||
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
|
||||
if: github.event_name != 'pull_request'
|
||||
run: |
|
||||
while IFS= read -r line || [[ -n "$line" ]]; do
|
||||
commit_id=${line%% *}
|
||||
commit_title=${line#* }
|
||||
|
||||
git checkout $commit_id
|
||||
commit_time=$(git show -s --format=%cd $commit_hash --date=iso-strict)
|
||||
commit_time_no_tz=${commit_time::19}
|
||||
pip install -e .
|
||||
|
||||
echo "------------------------"
|
||||
echo "commit_id: $commit_id"
|
||||
echo "commit_title: $commit_title"
|
||||
echo "commit_time: $commit_time_no_tz"
|
||||
echo "vllm branch: ${{ matrix.vllm_branch }}"
|
||||
echo "vllm-ascend branch: ${{ matrix.vllm_ascend_branch }}"
|
||||
echo "------------------------"
|
||||
|
||||
cd /github/home
|
||||
ERROR_MSG=""
|
||||
if ! bash benchmarks/scripts/run-performance-benchmarks.sh; then
|
||||
ERROR_MSG="Benchmark failed to run"
|
||||
fi
|
||||
# send the result to es
|
||||
escli add --vllm_branch ${{ matrix.vllm_branch }} \
|
||||
--vllm_ascend_branch ${{ matrix.vllm_ascend_branch }} \
|
||||
--commit_id $commit_id \
|
||||
--commit_title "$commit_title" \
|
||||
--created_at "$commit_time_no_tz" \
|
||||
--res_dir ./benchmarks/results \
|
||||
--error "$ERROR_MSG" \
|
||||
--extra_feat '{"VLLM_USE_V1": "${{ matrix.vllm_use_v1 }}"}'
|
||||
rm -rf ./benchmarks/results
|
||||
cd -
|
||||
done < commit_log.txt
|
||||
43
.github.backup/workflows/pre-commit.yml
Normal file
43
.github.backup/workflows/pre-commit.yml
Normal file
@@ -0,0 +1,43 @@
|
||||
name: pre-commit
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
vllm:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
pre-commit:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout vllm-project/vllm-ascend repo
|
||||
uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- run: echo "::add-matcher::.github/workflows/matchers/actionlint.json"
|
||||
- run: echo "::add-matcher::.github/workflows/matchers/mypy.json"
|
||||
- name: Checkout vllm-project/vllm repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: vllm-project/vllm
|
||||
path: ./vllm-empty
|
||||
ref: ${{ inputs.vllm }}
|
||||
- name: Install vllm
|
||||
working-directory: vllm-empty
|
||||
run: |
|
||||
pip install -r requirements/build.txt --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
VLLM_TARGET_DEVICE=empty pip install .
|
||||
- name: Install vllm-ascend dev
|
||||
run: |
|
||||
pip install -r requirements-dev.txt --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
||||
env:
|
||||
SHELLCHECK_OPTS: "--exclude=SC2046,SC2006,SC2086" # Exclude SC2046, SC2006, SC2086 for actionlint
|
||||
with:
|
||||
extra_args: --all-files --hook-stage manual
|
||||
|
||||
75
.github.backup/workflows/release_code.yml
Normal file
75
.github.backup/workflows/release_code.yml
Normal file
@@ -0,0 +1,75 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
|
||||
name: build / sdist
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
paths:
|
||||
- '.github/workflows/release_code.yml'
|
||||
- 'vllm_ascend/**'
|
||||
- 'setup.py'
|
||||
- 'pyproject.toml'
|
||||
- 'requirements.txt'
|
||||
- 'cmake/**'
|
||||
- 'CMakeLists.txt'
|
||||
- 'csrc/**'
|
||||
push:
|
||||
tags:
|
||||
- 'v*'
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: release code
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.11"]
|
||||
steps:
|
||||
- uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
|
||||
|
||||
- name: Print
|
||||
run: |
|
||||
lscpu
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python3 -m pip install twine setuptools_scm
|
||||
|
||||
- name: Generate tar.gz
|
||||
run: |
|
||||
python3 setup.py sdist
|
||||
ls dist
|
||||
|
||||
- name: Archive tar.gz
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: vllm-ascend-src
|
||||
path: dist/*
|
||||
|
||||
- name: Release
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
run: |
|
||||
python3 -m twine upload dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}
|
||||
125
.github.backup/workflows/release_whl.yml
Normal file
125
.github.backup/workflows/release_whl.yml
Normal file
@@ -0,0 +1,125 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
|
||||
name: build / wheel
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Runs at 23:00 UTC (7:00 AM Beijing) every day
|
||||
- cron: '0 23 * * *'
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
paths:
|
||||
- '.github/workflows/release_whl.yml'
|
||||
- '.github/Dockerfile.buildwheel'
|
||||
- 'vllm_ascend/**'
|
||||
- 'setup.py'
|
||||
- 'pyproject.toml'
|
||||
- 'requirements.txt'
|
||||
- 'cmake/**'
|
||||
- 'CMakeLists.txt'
|
||||
- 'csrc/**'
|
||||
push:
|
||||
tags:
|
||||
- 'v*'
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: build and release wheel
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-24.04, ubuntu-24.04-arm]
|
||||
# PR only trigger latest version
|
||||
python-version: ${{ fromJSON(
|
||||
(github.event_name == 'pull_request' && '["3.11"]') ||
|
||||
'["3.9", "3.10", "3.11"]'
|
||||
) }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
steps:
|
||||
- uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
|
||||
|
||||
- name: Print
|
||||
run: |
|
||||
lscpu
|
||||
|
||||
- name: Free up disk space
|
||||
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
|
||||
with:
|
||||
tool-cache: true
|
||||
docker-images: false
|
||||
|
||||
- name: Build wheel
|
||||
run: |
|
||||
ls
|
||||
docker build -f ./.github/Dockerfile.buildwheel \
|
||||
--build-arg PY_VERSION=${{ matrix.python-version }} \
|
||||
-t wheel:v1 .
|
||||
docker run --rm \
|
||||
-u $(id -u):$(id -g) \
|
||||
-v $(pwd):/outpwd \
|
||||
wheel:v1 \
|
||||
bash -c "cp -r /workspace/vllm-ascend/dist /outpwd"
|
||||
ls dist
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Repair wheels with auditwheel
|
||||
run: |
|
||||
python3 -m pip install auditwheel
|
||||
python3 -m pip install patchelf
|
||||
mkdir -p dist/repaired
|
||||
for whl in dist/*.whl; do
|
||||
auditwheel repair "$whl" -w dist/repaired/ \
|
||||
--exclude libplatform.so \
|
||||
--exclude libregister.so \
|
||||
--exclude libge_common_base.so \
|
||||
--exclude libc10.so \
|
||||
--exclude libc_sec.so \
|
||||
--exclude "libascend*.so" \
|
||||
--exclude "libtorch*.so" \
|
||||
--exclude "liberror_manager.so"
|
||||
done
|
||||
rm -f dist/*.whl
|
||||
mv dist/repaired/*.whl dist/
|
||||
rmdir dist/repaired
|
||||
ls dist
|
||||
|
||||
- name: Verify automatic platform tags
|
||||
run: |
|
||||
cd dist
|
||||
for wheel in *.whl; do
|
||||
echo "verification file: $wheel"
|
||||
auditwheel show "$wheel"
|
||||
done
|
||||
|
||||
- name: Archive wheel
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: vllm-ascend-${{ matrix.os }}-py${{ matrix.python-version }}-wheel
|
||||
path: dist/*
|
||||
|
||||
- name: Release
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
run: |
|
||||
python3 -m pip install twine
|
||||
python3 -m twine upload --verbose dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}
|
||||
26
.github.backup/workflows/reminder_comment.yml
Normal file
26
.github.backup/workflows/reminder_comment.yml
Normal file
@@ -0,0 +1,26 @@
|
||||
name: PR Reminder Comment Bot
|
||||
permissions:
|
||||
pull-requests: write
|
||||
on:
|
||||
pull_request_target:
|
||||
types: [opened]
|
||||
jobs:
|
||||
pr_reminder:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Remind to run full CI on PR
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
script: |
|
||||
github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
body: '👋 Hi! Thank you for contributing to the vLLM Ascend project. The following points will speed up your PR merge:\n\n' +
|
||||
'- A PR should do only one thing, smaller PRs enable faster reviews.\n' +
|
||||
'- Every PR should include unit tests and end-to-end tests to ensure it works and is not broken by other future PRs.\n' +
|
||||
'- Write the commit message by fulfilling the PR description to help reviewer and future developers understand.\n\n' +
|
||||
'If CI fails, you can run linting and testing checks locally according [Contributing](https://vllm-ascend.readthedocs.io/zh-cn/latest/developer_guide/contribution/index.html) and [Testing](https://vllm-ascend.readthedocs.io/zh-cn/latest/developer_guide/contribution/testing.html).'
|
||||
})
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
100
.github.backup/workflows/vllm_ascend_dist.yaml
Normal file
100
.github.backup/workflows/vllm_ascend_dist.yaml
Normal file
@@ -0,0 +1,100 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
|
||||
name: 'e2e test / a3-test'
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
|
||||
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
||||
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
||||
# It's used to activate ascend-toolkit environment variables.
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -el {0}
|
||||
|
||||
# only cancel in-progress runs of the same workflow
|
||||
# and ignore the lint / 8 cards test type
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
e2e:
|
||||
# only trigger e2e test after lint passed and the change is e2e related with pull request.
|
||||
if: ${{ contains(github.event.pull_request.labels.*.name, 'dist-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'workflow_dispatch' }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [linux-aarch64-a3-8]
|
||||
vllm_version: [v0.11.0]
|
||||
name: vLLM Ascend test
|
||||
runs-on: ${{ matrix.os }}
|
||||
container:
|
||||
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
|
||||
env:
|
||||
DEBIAN_FRONTEND: noninteractive
|
||||
steps:
|
||||
- name: Check npu and CANN info
|
||||
run: |
|
||||
npu-smi info
|
||||
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
|
||||
|
||||
- name: Config mirrors
|
||||
run: |
|
||||
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
|
||||
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
|
||||
apt-get update -y
|
||||
apt install git -y
|
||||
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
|
||||
|
||||
- name: Checkout vllm-project/vllm-ascend repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
apt-get -y install `cat packages.txt`
|
||||
apt-get -y install gcc g++ cmake libnuma-dev
|
||||
|
||||
- name: Checkout vllm-project/vllm repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: vllm-project/vllm
|
||||
ref: ${{ matrix.vllm_version }}
|
||||
path: ./vllm-empty
|
||||
|
||||
- name: Install vllm-project/vllm from source
|
||||
working-directory: ./vllm-empty
|
||||
run: |
|
||||
VLLM_TARGET_DEVICE=empty pip install -e .
|
||||
|
||||
- name: Install vllm-project/vllm-ascend
|
||||
run: |
|
||||
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
|
||||
pip install -r requirements-dev.txt
|
||||
pip install -v -e .
|
||||
|
||||
- name: Run vllm-project/vllm-ascend test for V1 Engine
|
||||
env:
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
run: |
|
||||
# TODO: enable more tests
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
|
||||
87
.github.backup/workflows/vllm_ascend_doctest.yaml
Normal file
87
.github.backup/workflows/vllm_ascend_doctest.yaml
Normal file
@@ -0,0 +1,87 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
|
||||
name: 'ascend test / doctest'
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
paths:
|
||||
# If we are changing the doctest we should do a PR test
|
||||
- '.github/workflows/vllm_ascend_doctest.yaml'
|
||||
- 'tests/e2e/doctests/**'
|
||||
- 'tests/e2e/common.sh'
|
||||
- 'tests/e2e/run_doctests.sh'
|
||||
schedule:
|
||||
# Runs every 12 hours
|
||||
- cron: '0 */12 * * *'
|
||||
|
||||
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
||||
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
||||
# It's used to activate ascend-toolkit environment variables.
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -el {0}
|
||||
|
||||
jobs:
|
||||
test:
|
||||
strategy:
|
||||
# Each version should be tested
|
||||
fail-fast: false
|
||||
matrix:
|
||||
vllm_verison: [v0.9.1-dev, v0.9.1-dev-openeuler, main, main-openeuler]
|
||||
name: vLLM Ascend test
|
||||
runs-on: linux-aarch64-a2-1
|
||||
container:
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:${{ matrix.vllm_verison }}
|
||||
steps:
|
||||
- name: Check NPU/CANN and git info
|
||||
run: |
|
||||
echo "====> Print NPU/CANN info"
|
||||
npu-smi info
|
||||
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
|
||||
|
||||
echo "====> Print vllm-ascend git info"
|
||||
cd /vllm-workspace/vllm-ascend
|
||||
git --no-pager log -1 || true
|
||||
echo "====> Print vllm git info"
|
||||
cd /vllm-workspace/vllm
|
||||
git --no-pager log -1 || true
|
||||
|
||||
- name: Checkout vllm-project/vllm-ascend repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Run vllm-ascend/tests/e2e/run_doctests.sh
|
||||
run: |
|
||||
# PWD: /__w/vllm-ascend/vllm-ascend
|
||||
# Make sure e2e tests are latest
|
||||
echo "Replacing /vllm-workspace/vllm-ascend/tests/e2e ..."
|
||||
rm -rf /vllm-workspace/vllm-ascend/tests/e2e
|
||||
mkdir -p /vllm-workspace/vllm-ascend/tests
|
||||
# Overwrite e2e and examples
|
||||
cp -r tests/e2e /vllm-workspace/vllm-ascend/tests/
|
||||
cp -r examples /vllm-workspace/vllm-ascend/
|
||||
|
||||
# Simulate container to enter directory
|
||||
cd /workspace
|
||||
|
||||
# Run real test
|
||||
echo "Test:"
|
||||
/vllm-workspace/vllm-ascend/tests/e2e/run_doctests.sh
|
||||
149
.github.backup/workflows/vllm_ascend_test.yaml
Normal file
149
.github.backup/workflows/vllm_ascend_test.yaml
Normal file
@@ -0,0 +1,149 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
|
||||
name: 'ascend test'
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- 'main'
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
||||
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
||||
# It's used to activate ascend-toolkit environment variables.
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -el {0}
|
||||
|
||||
# only cancel in-progress runs of the same workflow
|
||||
# and ignore the lint / 1 card / 4 cards test type
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
uses: ./.github/workflows/pre-commit.yml
|
||||
with:
|
||||
vllm: v0.11.0
|
||||
|
||||
changes:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
|
||||
ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: dorny/paths-filter@v3
|
||||
id: filter
|
||||
with:
|
||||
filters: |
|
||||
e2e_tracker:
|
||||
- '.github/workflows/vllm_ascend_test.yaml'
|
||||
- 'vllm_ascend/**'
|
||||
- 'csrc/**'
|
||||
- 'cmake/**'
|
||||
- 'tests/e2e/**'
|
||||
- 'CMakeLists.txt'
|
||||
- 'setup.py'
|
||||
- 'requirements.txt'
|
||||
- 'requirements-dev.txt'
|
||||
- 'requirements-lint.txt'
|
||||
- 'packages.txt'
|
||||
ut_tracker:
|
||||
- 'tests/ut/**'
|
||||
|
||||
ut:
|
||||
needs: [lint, changes]
|
||||
name: unit test
|
||||
# only trigger unit test after lint passed and the change is e2e and ut related.
|
||||
if: ${{ needs.lint.result == 'success' && (needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.ut_tracker == 'true') }}
|
||||
runs-on: ubuntu-22.04-arm
|
||||
container:
|
||||
image: quay.io/ascend/cann:8.2.rc1-910b-ubuntu22.04-py3.11
|
||||
env:
|
||||
VLLM_LOGGING_LEVEL: ERROR
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
strategy:
|
||||
matrix:
|
||||
vllm_version: [v0.11.0]
|
||||
steps:
|
||||
- name: Install packages
|
||||
run: |
|
||||
apt-get update -y
|
||||
apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev curl gnupg2
|
||||
|
||||
- name: Checkout vllm-project/vllm repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: vllm-project/vllm
|
||||
ref: ${{ matrix.vllm_version }}
|
||||
path: ./vllm-empty
|
||||
|
||||
- name: Install vllm-project/vllm from source
|
||||
working-directory: ./vllm-empty
|
||||
run: |
|
||||
VLLM_TARGET_DEVICE=empty python3 -m pip install .
|
||||
python3 -m pip uninstall -y triton
|
||||
|
||||
- name: Checkout vllm-project/vllm-ascend repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install vllm-project/vllm-ascend
|
||||
run: |
|
||||
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/arm64-linux/devlib
|
||||
python3 -m pip install -r requirements-dev.txt
|
||||
python3 -m pip install -v .
|
||||
|
||||
- name: Run unit test
|
||||
env:
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
TORCH_DEVICE_BACKEND_AUTOLOAD: 0
|
||||
run: |
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/arm64-linux/devlib
|
||||
pytest -sv --cov --cov-report=xml:unittests-coverage.xml tests/ut \
|
||||
--ignore tests/ut/attention/test_attention_v1.py
|
||||
- name: Upload coverage to Codecov
|
||||
# only upload coverage when commits merged
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
uses: codecov/codecov-action@v5
|
||||
env:
|
||||
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
||||
with:
|
||||
flags: unittests
|
||||
name: vllm-ascend
|
||||
verbose: true
|
||||
|
||||
e2e-light:
|
||||
name: e2e-light
|
||||
strategy:
|
||||
matrix:
|
||||
vllm_version: [v0.11.0]
|
||||
# Note (yikun): If CI resource are limited we can split job into two chain jobs
|
||||
needs: [lint, changes]
|
||||
# only trigger e2e test after lint passed and the change is e2e related with pull request.
|
||||
if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' && !contains(github.event.pull_request.labels.*.name, 'ready') }}
|
||||
uses: ./.github/workflows/_e2e_test.yaml
|
||||
with:
|
||||
vllm: ${{ matrix.vllm_version }}
|
||||
runner: linux-aarch64-a2
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
|
||||
type: light
|
||||
117
.github.backup/workflows/vllm_ascend_test_310p.yaml
Normal file
117
.github.backup/workflows/vllm_ascend_test_310p.yaml
Normal file
@@ -0,0 +1,117 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
|
||||
name: 'e2e test / 310p-test'
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'v*'
|
||||
schedule:
|
||||
# Runs every 6 hours
|
||||
- cron: '0 */6 * * *'
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
|
||||
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
||||
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
||||
# It's used to activate ascend-toolkit environment variables.
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -el {0}
|
||||
|
||||
# only cancel in-progress runs of the same workflow
|
||||
# and ignore the lint / 1 card / 4 cards test type
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
e2e:
|
||||
# e2e-310p-test will be triggered when tag 'e2e-310p-test' & 'ready-for-test' or schedule job
|
||||
if: >-
|
||||
${{
|
||||
(contains(github.event.pull_request.labels.*.name, 'e2e-310p-test')) &&
|
||||
contains(github.event.pull_request.labels.*.name, 'ready-for-test') ||
|
||||
github.event_name == 'schedule' || github.event_name == 'push'
|
||||
}}
|
||||
strategy:
|
||||
max-parallel: 2
|
||||
matrix:
|
||||
os: [linux-aarch64-310p-1, linux-aarch64-310p-4]
|
||||
vllm_version: [v0.11.0]
|
||||
name: 310p e2e test
|
||||
runs-on: ${{ matrix.os }}
|
||||
container:
|
||||
# TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-310p-ubuntu22.04-py3.11
|
||||
env:
|
||||
VLLM_LOGGING_LEVEL: ERROR
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
steps:
|
||||
- name: Check npu and CANN info
|
||||
run: |
|
||||
npu-smi info
|
||||
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
|
||||
|
||||
- name: Config mirrors
|
||||
run: |
|
||||
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
|
||||
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
|
||||
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
|
||||
apt-get update -y
|
||||
apt install git -y
|
||||
|
||||
- name: Checkout vllm-project/vllm-ascend repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
apt-get -y install `cat packages.txt`
|
||||
apt-get -y install git vim wget net-tools gcc g++ cmake libnuma-dev curl gnupg2
|
||||
|
||||
- name: Checkout vllm-project/vllm repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: vllm-project/vllm
|
||||
ref: ${{ matrix.vllm_version }}
|
||||
path: ./vllm-empty
|
||||
|
||||
- name: Install vllm-project/vllm from source
|
||||
working-directory: ./vllm-empty
|
||||
run: |
|
||||
VLLM_TARGET_DEVICE=empty pip install -e .
|
||||
|
||||
- name: Install vllm-project/vllm-ascend
|
||||
run: |
|
||||
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
|
||||
export SOC_VERSION=ASCEND310P3
|
||||
pip install -r requirements-dev.txt
|
||||
pip install -v -e .
|
||||
|
||||
- name: Run e2e test
|
||||
env:
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
|
||||
run: |
|
||||
if [[ "${{ matrix.os }}" == "linux-aarch64-310p-1" ]]; then
|
||||
pytest -sv tests/e2e/310p/test_offline_inference_310p.py
|
||||
else
|
||||
pytest -sv tests/e2e/310p/test_offline_inference_parallel_310p.py
|
||||
fi
|
||||
80
.github.backup/workflows/vllm_ascend_test_full.yaml
Normal file
80
.github.backup/workflows/vllm_ascend_test_full.yaml
Normal file
@@ -0,0 +1,80 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
name: 'ascend test / full'
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
types: [ labeled, synchronize ]
|
||||
|
||||
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
||||
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
||||
# It's used to activate ascend-toolkit environment variables.
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -el {0}
|
||||
|
||||
# only cancel in-progress runs of the same workflow
|
||||
# and ignore the lint / 1 card / 4 cards test type
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
changes:
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') }}
|
||||
outputs:
|
||||
e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
|
||||
ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: dorny/paths-filter@v3
|
||||
id: filter
|
||||
with:
|
||||
filters: |
|
||||
e2e_tracker:
|
||||
- '.github/workflows/vllm_ascend_test.yaml'
|
||||
- '.github/workflows/_e2e_test.yaml'
|
||||
- 'vllm_ascend/**'
|
||||
- 'csrc/**'
|
||||
- 'cmake/**'
|
||||
- 'tests/e2e/**'
|
||||
- 'CMakeLists.txt'
|
||||
- 'setup.py'
|
||||
- 'requirements.txt'
|
||||
- 'requirements-dev.txt'
|
||||
- 'requirements-lint.txt'
|
||||
- 'packages.txt'
|
||||
ut_tracker:
|
||||
- 'tests/ut/**'
|
||||
|
||||
e2e-test:
|
||||
name: e2e-full
|
||||
strategy:
|
||||
matrix:
|
||||
vllm_version: [v0.11.0]
|
||||
needs: [changes]
|
||||
if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
|
||||
uses: ./.github/workflows/_e2e_test.yaml
|
||||
with:
|
||||
vllm: ${{ matrix.vllm_version }}
|
||||
runner: linux-aarch64-a2
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
|
||||
type: full
|
||||
@@ -0,0 +1,45 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
name: 'ascend test / vllm main'
|
||||
|
||||
on:
|
||||
# Run 1-card and 2-cards e2e tests per 2h
|
||||
schedule:
|
||||
- cron: '0 */2 * * *'
|
||||
workflow_dispatch:
|
||||
|
||||
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
||||
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
||||
# It's used to activate ascend-toolkit environment variables.
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -el {0}
|
||||
|
||||
# only cancel in-progress runs of the same workflow
|
||||
# and ignore the lint / 1 card / 4 cards test type
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
e2e-test:
|
||||
uses: ./.github/workflows/_e2e_test.yaml
|
||||
with:
|
||||
vllm: main
|
||||
runner: linux-aarch64-a2
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
|
||||
type: full
|
||||
177
.github.backup/workflows/vllm_ascend_test_models.yaml
Normal file
177
.github.backup/workflows/vllm_ascend_test_models.yaml
Normal file
@@ -0,0 +1,177 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
|
||||
# This test will be triggered:
|
||||
# 1. schedule
|
||||
# 2. pull_request change the related files
|
||||
# 3. workflow_dispatch with models input
|
||||
|
||||
name: ascend test / models
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Runs every 6 hours
|
||||
- cron: '0 */6 * * *'
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
paths:
|
||||
- '.github/workflows/vllm_ascend_test_models.yaml'
|
||||
- 'tests/e2e/models/test_lm_eval_correctness.py'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
vllm-ascend-version:
|
||||
description: 'vllm-ascend:'
|
||||
required: true
|
||||
type: choice
|
||||
# Current supported vLLM versions
|
||||
options:
|
||||
- latest
|
||||
- main
|
||||
default: main
|
||||
|
||||
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
||||
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
||||
# It's used to activate ascend-toolkit environment variables.
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -el {0}
|
||||
|
||||
# only cancel in-progress runs of the same workflow
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
run:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- model_name: Qwen3-8B
|
||||
runner: a2-1
|
||||
- model_name: Qwen2.5-VL-7B-Instruct
|
||||
runner: a2-1
|
||||
- model_name: Qwen2-Audio-7B-Instruct
|
||||
runner: a2-1
|
||||
- model_name: Qwen3-30B-A3B
|
||||
runner: a2-2
|
||||
- model_name: Qwen3-VL-30B-A3B-Instruct
|
||||
runner: a2-2
|
||||
- model_name: DeepSeek-V2-Lite
|
||||
runner: a2-2
|
||||
fail-fast: false
|
||||
uses: ./.github/workflows/_accuracy_test.yaml
|
||||
with:
|
||||
vllm: v0.11.0
|
||||
runner: linux-aarch64-${{ matrix.runner }}
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
|
||||
model_name: ${{ matrix.model_name }}
|
||||
upload: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.vllm-ascend-version == 'latest' }}
|
||||
|
||||
create_pr:
|
||||
runs-on: ubuntu-latest
|
||||
needs: run
|
||||
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.vllm-ascend-version == 'latest' }}
|
||||
env:
|
||||
UPSTREAM_REPO: vllm-project/vllm-ascend
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: vllm-ascend-ci/vllm-ascend
|
||||
token: ${{ secrets.PAT_TOKEN }}
|
||||
ref: main
|
||||
|
||||
- name: Add upstream remote
|
||||
run: |
|
||||
git remote add upstream https://github.com/${{ env.UPSTREAM_REPO }}.git
|
||||
git fetch upstream
|
||||
git remote -v
|
||||
|
||||
- name: Set Git user info dynamically
|
||||
run: |
|
||||
git config user.name "${{ github.actor }}"
|
||||
git config user.email "${{ github.actor }}@users.noreply.github.com"
|
||||
|
||||
- name: Create or switch to branch
|
||||
run: |
|
||||
TIMESTAMP=$(date +%Y%m%d%H%M%S)
|
||||
BRANCH_NAME="auto-pr/accuracy-report-${TIMESTAMP}"
|
||||
echo "BRANCH_NAME=${BRANCH_NAME}" >> $GITHUB_ENV
|
||||
git checkout -B "${BRANCH_NAME}" upstream/main
|
||||
|
||||
- name: Download only current run reports
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
path: ./docs/source/developer_guide/evaluation/accuracy_report
|
||||
pattern: report-*
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
run-id: ${{ github.run_id }}
|
||||
|
||||
- name: Delete old report
|
||||
run: |
|
||||
find ./docs/source/developer_guide/evaluation/accuracy_report -maxdepth 1 -type f -name '*.md' ! -name 'index.md' -delete
|
||||
find ./docs/source/developer_guide/evaluation/accuracy_report -mindepth 2 -type f -name '*.md' -exec mv -f {} ./docs/source/developer_guide/evaluation/accuracy_report \;
|
||||
find ./docs/source/developer_guide/evaluation/accuracy_report -mindepth 1 -type d -empty -delete
|
||||
|
||||
- name: Update accuracy_report/index.md
|
||||
run: |
|
||||
REPORT_DIR="./docs/source/developer_guide/evaluation/accuracy_report"
|
||||
INDEX_MD="$REPORT_DIR/index.md"
|
||||
{
|
||||
echo "# Accuracy Report"
|
||||
echo ""
|
||||
echo ":::{toctree}"
|
||||
echo ":caption: Accuracy Report"
|
||||
echo ":maxdepth: 1"
|
||||
|
||||
for report in "$REPORT_DIR"/*.md; do
|
||||
filename="$(basename "$report" .md)"
|
||||
if [ "$filename" != "index" ]; then
|
||||
echo "$filename"
|
||||
fi
|
||||
done
|
||||
echo ":::"
|
||||
} > "$INDEX_MD"
|
||||
|
||||
- name: push accuracy report
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
|
||||
run: |
|
||||
git add ./docs/source/developer_guide/evaluation/accuracy_report/*.md
|
||||
git commit -s -m "[Doc] Update accuracy reports for ${{ env.BRANCH_NAME }}"
|
||||
git push -f origin "${{ env.BRANCH_NAME }}"
|
||||
|
||||
- name: Create PR in upstream via API
|
||||
uses: actions/github-script@v8
|
||||
with:
|
||||
github-token: ${{ secrets.PAT_TOKEN }}
|
||||
script: |
|
||||
const pr = await github.rest.pulls.create({
|
||||
owner: 'vllm-project',
|
||||
repo: 'vllm-ascend',
|
||||
head: `vllm-ascend-ci:${{ env.BRANCH_NAME }}`,
|
||||
base: 'main',
|
||||
title: `[Doc] Update accuracy reports for ${{ env.BRANCH_NAME }}`,
|
||||
body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for: All models
|
||||
|
||||
- [Workflow run][1]
|
||||
|
||||
[1]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`
|
||||
});
|
||||
core.info(`Created PR #${pr.data.number}`);
|
||||
112
.github.backup/workflows/vllm_ascend_test_pd.yaml
Normal file
112
.github.backup/workflows/vllm_ascend_test_pd.yaml
Normal file
@@ -0,0 +1,112 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
name: 'e2e test / pd-disaggregation'
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Runs at 23:00 UTC (7:00 AM Beijing) every day
|
||||
- cron: '0 23 * * *'
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
|
||||
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
||||
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
||||
# It's used to activate ascend-toolkit environment variables.
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -el {0}
|
||||
|
||||
# only 1 job can runs on static-8-01-cards
|
||||
concurrency:
|
||||
group: static-8-01-cards
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
prefilling-decoding-disaggregation:
|
||||
# pd-test will be triggered when tag 'pd-test' & 'ready-for-test' or schedule job
|
||||
if: ${{ contains(github.event.pull_request.labels.*.name, 'pd-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' }}
|
||||
strategy:
|
||||
matrix:
|
||||
vllm_verison: [
|
||||
main,
|
||||
v0.9.1
|
||||
]
|
||||
name: vLLM Ascend prefilling decoding disaggregation test
|
||||
runs-on: linux-arm64-npu-static-8
|
||||
|
||||
container:
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
|
||||
volumes:
|
||||
- /usr/local/dcmi:/usr/local/dcmi
|
||||
- /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
|
||||
- /usr/local/Ascend/driver/:/usr/local/Ascend/driver/
|
||||
# Use self-host cache speed up pip and model download
|
||||
- /home/action/.cache:/github/home/.cache/
|
||||
options: >-
|
||||
--device /dev/davinci0
|
||||
--device /dev/davinci1
|
||||
--device /dev/davinci_manager
|
||||
--device /dev/devmm_svm
|
||||
--device /dev/hisi_hdc
|
||||
env:
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
steps:
|
||||
- name: Check npu and CANN info
|
||||
run: |
|
||||
npu-smi info
|
||||
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
|
||||
|
||||
- name: Config mirrors
|
||||
run: |
|
||||
# keep using tuna's proxy since linux-arm64-npu-static-8 is in another region
|
||||
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
|
||||
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
|
||||
apt-get update -y
|
||||
apt install git -y
|
||||
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
|
||||
|
||||
- name: Checkout vllm-project/vllm-ascend repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
apt-get -y install `cat packages.txt`
|
||||
apt-get -y install gcc g++ cmake libnuma-dev
|
||||
|
||||
- name: Checkout vllm-project/vllm repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: vllm-project/vllm
|
||||
ref: ${{ matrix.vllm_verison }}
|
||||
path: ./vllm-empty
|
||||
|
||||
- name: Install vllm-project/vllm from source
|
||||
working-directory: ./vllm-empty
|
||||
run: |
|
||||
VLLM_TARGET_DEVICE=empty pip install -e .
|
||||
|
||||
- name: Install vllm-project/vllm-ascend
|
||||
env:
|
||||
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
|
||||
run: |
|
||||
pip install -r requirements-dev.txt
|
||||
pip install -v -e .
|
||||
|
||||
- name: Run vllm-project/vllm-ascend PD Disaggregation edge test
|
||||
run: |
|
||||
git config --global --add safe.directory/__w/vllm-ascend/vllm-ascend
|
||||
bash tests/e2e/pd_disaggreate/run_edge_case_test.sh
|
||||
Reference in New Issue
Block a user