Files
xc-llm-ascend/tests/e2e/nightly/multi_node/scripts/run.sh
zhangxinyuehfad 1e4017e3fa [CI] support nightly ci for per pr by labels (#6483)
### What this PR does / why we need it?

This PR refactors the nightly CI workflows (A2 and A3) to support
running tests against a specific PR's code, in addition to the existing
scheduled/dispatch runs using pre-built images.

#### Motivation:
Previously, nightly tests could only be triggered by schedule or
workflow_dispatch, always using the pre-built nightly image. This change
allows developers to trigger nightly tests against their own PR's source
code, enabling early validation without waiting for a nightly build.

#### Changes
Trigger logic (parse-trigger job)

A new parse-trigger job is introduced in both
schedule_nightly_test_a2.yaml and schedule_nightly_test_a3.yaml to
centralize trigger evaluation:

`schedule / workflow_dispatch`: runs all tests with the pre-built image
(existing behavior preserved)
`pull_request (labeled + synchronize)`: runs only when:The PR has the
nightly-test label, and /nightly [test-names] comment exists (latest one
wins)

1. /nightly or /nightly all — runs all tests
2. /nightly test1 test2 — runs only named tests (comma-wrapped for exact
matching)

#### How to trigger
1. Add the nightly-test label to your PR
2. Comment /nightly (all tests) or /nightly test1 test2 (specific tests)
4. Re-triggering: add another /nightly comment and push a new commit
(synchronize event)

### Does this PR introduce _any_ user-facing change?
None

### How was this patch tested?

- vLLM version: v0.14.1
- vLLM main:
dc917cceb8

---------

Signed-off-by: hfadzxy <starmoon_zhang@163.com>
2026-03-05 16:46:37 +08:00

244 lines
7.5 KiB
Bash

#!/bin/bash
set -euo pipefail
# Color definitions
GREEN="\033[0;32m"
BLUE="\033[0;34m"
YELLOW="\033[0;33m"
RED="\033[0;31m"
NC="\033[0m" # No Color
# Configuration
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
# cann and atb environment setup
source /usr/local/Ascend/ascend-toolkit/set_env.sh
source /usr/local/Ascend/cann-8.5.1/share/info/ascendnpu-ir/bin/set_env.sh
set +eu
source /usr/local/Ascend/nnal/atb/set_env.sh
set -eu
# Home path for aisbench
export BENCHMARK_HOME=${WORKSPACE}/vllm-ascend/benchmark
# Logging configurations
export VLLM_LOGGING_LEVEL="INFO"
# Reduce glog verbosity for mooncake
export GLOG_minloglevel=1
# Set transformers to offline mode to avoid downloading models during tests
export HF_HUB_OFFLINE="1"
# Default is 600s
export VLLM_ENGINE_READY_TIMEOUT_S=1800
# Function to print section headers
print_section() {
echo -e "\n${BLUE}=== $1 ===${NC}"
}
print_failure() {
echo -e "${RED}${FAIL_TAG:-test_failed} ✗ ERROR: $1${NC}"
exit 1
}
# Function to print success messages
print_success() {
echo -e "${GREEN}$1${NC}"
}
# Function to print error messages and exit
print_error() {
echo -e "${RED}✗ ERROR: $1${NC}"
exit 1
}
show_vllm_info() {
cd "$WORKSPACE"
echo "Installed vLLM-related Python packages:"
pip list | grep vllm || echo "No vllm packages found."
echo ""
echo "============================"
echo "vLLM Git information"
echo "============================"
cd vllm
if [ -d .git ]; then
echo "Branch: $(git rev-parse --abbrev-ref HEAD)"
echo "Commit hash: $(git rev-parse HEAD)"
echo "Author: $(git log -1 --pretty=format:'%an <%ae>')"
echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)"
echo "Message: $(git log -1 --pretty=format:'%s')"
echo "Tags: $(git tag --points-at HEAD || echo 'None')"
echo "Remote: $(git remote -v | head -n1)"
echo ""
else
echo "No .git directory found in vllm"
fi
cd ..
echo ""
echo "============================"
echo "vLLM-Ascend Git information"
echo "============================"
cd vllm-ascend
if [ -d .git ]; then
echo "Branch: $(git rev-parse --abbrev-ref HEAD)"
echo "Commit hash: $(git rev-parse HEAD)"
echo "Author: $(git log -1 --pretty=format:'%an <%ae>')"
echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)"
echo "Message: $(git log -1 --pretty=format:'%s')"
echo "Tags: $(git tag --points-at HEAD || echo 'None')"
echo "Remote: $(git remote -v | head -n1)"
echo ""
else
echo "No .git directory found in vllm-ascend"
fi
cd ..
}
check_npu_info() {
echo "====> Check NPU info"
npu-smi info
cat "/usr/local/Ascend/ascend-toolkit/latest/$(uname -i)-linux/ascend_toolkit_install.info"
}
check_and_config() {
echo "====> Configure mirrors and git proxy"
git config --global url."https://ghfast.top/https://github.com/".insteadOf "https://github.com/"
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
}
install_extra_components() {
echo "====> Installing extra components for DeepSeek-v3.2-exp-bf16"
if ! wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/CANN-custom_ops-sfa-linux.aarch64.run; then
echo "Failed to download CANN-custom_ops-sfa-linux.aarch64.run"
return 1
fi
chmod +x ./CANN-custom_ops-sfa-linux.aarch64.run
./CANN-custom_ops-sfa-linux.aarch64.run --quiet
if ! wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/custom_ops-1.0-cp311-cp311-linux_aarch64.whl; then
echo "Failed to download custom_ops wheel"
return 1
fi
pip install custom_ops-1.0-cp311-cp311-linux_aarch64.whl
export ASCEND_CUSTOM_OPP_PATH="/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize${ASCEND_CUSTOM_OPP_PATH:+:${ASCEND_CUSTOM_OPP_PATH}}"
export LD_LIBRARY_PATH="/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize/op_api/lib/${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
source /usr/local/Ascend/ascend-toolkit/set_env.sh
rm -f CANN-custom_ops-sfa-linux.aarch64.run \
custom_ops-1.0-cp311-cp311-linux_aarch64.whl
echo "====> Extra components installation completed"
}
checkout_src() {
echo "====> Checkout source code"
mkdir -p "$WORKSPACE"
cd "$WORKSPACE"
pip uninstall -y vllm vllm-ascend || true
rm -rf "$WORKSPACE/vllm" "$WORKSPACE/vllm-ascend"
if [ ! -d "$WORKSPACE/vllm-ascend" ]; then
echo "Cloning vllm-ascend from $VLLM_ASCEND_REMOTE_URL"
git clone --depth 1 "$VLLM_ASCEND_REMOTE_URL" "$WORKSPACE/vllm-ascend"
cd "$WORKSPACE/vllm-ascend"
PR_REF=$(git ls-remote origin 'refs/pull/*/head' | grep "^${VLLM_ASCEND_REF}" | awk '{print $2}' | head -1)
if [ -n "$PR_REF" ]; then
git fetch --depth 1 origin "$PR_REF"
git checkout FETCH_HEAD
else
git fetch origin '+refs/pull/*/head:refs/remotes/pull/*' 2>/dev/null || true
git checkout "$VLLM_ASCEND_REF"
fi
fi
if [ ! -d "$WORKSPACE/vllm" ]; then
echo "Cloning vllm version/ref: $VLLM_VERSION"
git clone --depth 1 --branch "$VLLM_VERSION" https://github.com/vllm-project/vllm.git "$WORKSPACE/vllm"
fi
}
install_vllm() {
echo "====> Install vllm and vllm-ascend"
VLLM_TARGET_DEVICE=empty pip install -e "$WORKSPACE/vllm"
pip install -r "$WORKSPACE/vllm-ascend/requirements-dev.txt"
pip install -e "$WORKSPACE/vllm-ascend"
}
install_aisbench() {
echo "====> Install AISBench benchmark"
export AIS_BENCH_URL="https://gitee.com/aisbench/benchmark.git"
: "${AIS_BENCH_TAG:=v3.0-20250930-master}"
BENCH_DIR="$WORKSPACE/vllm-ascend/benchmark"
if [ -d "$BENCH_DIR" ]; then
echo "Removing existing benchmark directory..."
rm -rf "$BENCH_DIR"
fi
git clone -b "${AIS_BENCH_TAG}" --depth 1 \
"${AIS_BENCH_URL}" "${BENCH_DIR}"
cd "$BENCH_DIR"
pip install -e . \
-r requirements/api.txt \
-r requirements/extra.txt
python3 -m pip cache purge || echo "WARNING: pip cache purge failed, but proceeding..."
}
show_triton_ascend_info() {
echo "====> Check triton ascend info"
clang -v
which bishengir-compile
pip show triton-ascend
}
kill_npu_processes() {
pgrep python3 | xargs -r kill -9
pgrep VLLM | xargs -r kill -9
sleep 4
}
run_tests_with_log() {
set +e
kill_npu_processes
pytest -sv --show-capture=no tests/e2e/nightly/multi_node/scripts/test_multi_node.py
ret=$?
set -e
if [ "$LWS_WORKER_INDEX" -eq 0 ]; then
if [ $ret -eq 0 ]; then
print_success "All tests passed!"
else
print_failure "Some tests failed, please check the error stack above for details. \
If this is insufficient to pinpoint the error, please download and review the logs of all other nodes from the job's summary."
fi
fi
}
main() {
check_npu_info
check_and_config
if [[ "$IS_PR_TEST" == "true" ]]; then
checkout_src
install_vllm
install_aisbench
fi
show_vllm_info
show_triton_ascend_info
if [[ "$CONFIG_YAML_PATH" == *"DeepSeek-V3_2-Exp-bf16.yaml" ]]; then
install_extra_components
fi
cd "$WORKSPACE/vllm-ascend"
run_tests_with_log
}
main "$@"