Files
xc-llm-ascend/tests/e2e/nightly/multi_node/scripts/run.sh
zhangxinyuehfad 886756aea0 [Bugfix][CI] Fix aisbench installation to avoid Gitee authentication (#7536)
### What this PR does / why we need it?
- Pass GITEE_USERNAME (var) and GITEE_TOKEN (secret) as Docker build
  args in nightly image build so Dockerfile can authenticate to Gitee
- In Dockerfile.nightly.a2/a3, embed credentials into clone URL to
  avoid auth failure during `git clone`
- In single-node and multi-node PR test workflows, backup the
  pre-installed benchmark from the nightly image before wiping
  vllm-ascend, then restore it instead of re-cloning from Gitee,
  which is inaccessible from fork PR contexts

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.18.0
- vLLM main:
8b6325758c

Signed-off-by: hfadzxy <starmoon_zhang@163.com>
2026-03-23 20:16:51 +08:00

236 lines
7.3 KiB
Bash

#!/bin/bash
set -euo pipefail
# Color definitions
GREEN="\033[0;32m"
BLUE="\033[0;34m"
YELLOW="\033[0;33m"
RED="\033[0;31m"
NC="\033[0m" # No Color
# Configuration
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
# cann and atb environment setup
source /usr/local/Ascend/ascend-toolkit/set_env.sh
source /usr/local/Ascend/cann-8.5.1/share/info/ascendnpu-ir/bin/set_env.sh
set +eu
source /usr/local/Ascend/nnal/atb/set_env.sh
set -eu
# Home path for aisbench
export BENCHMARK_HOME=${WORKSPACE}/vllm-ascend/benchmark
# Logging configurations
export VLLM_LOGGING_LEVEL="INFO"
# Reduce glog verbosity for mooncake
export GLOG_minloglevel=1
# Set transformers to offline mode to avoid downloading models during tests
export HF_HUB_OFFLINE="1"
# Default is 600s
export VLLM_ENGINE_READY_TIMEOUT_S=1800
# Function to print section headers
print_section() {
echo -e "\n${BLUE}=== $1 ===${NC}"
}
print_failure() {
echo -e "${RED}${FAIL_TAG:-test_failed} ✗ ERROR: $1${NC}"
exit 1
}
# Function to print success messages
print_success() {
echo -e "${GREEN}$1${NC}"
}
# Function to print error messages and exit
print_error() {
echo -e "${RED}✗ ERROR: $1${NC}"
exit 1
}
show_vllm_info() {
cd "$WORKSPACE"
echo "Installed vLLM-related Python packages:"
pip list | grep vllm || echo "No vllm packages found."
echo ""
echo "============================"
echo "vLLM Git information"
echo "============================"
cd vllm
if [ -d .git ]; then
echo "Branch: $(git rev-parse --abbrev-ref HEAD)"
echo "Commit hash: $(git rev-parse HEAD)"
echo "Author: $(git log -1 --pretty=format:'%an <%ae>')"
echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)"
echo "Message: $(git log -1 --pretty=format:'%s')"
echo "Tags: $(git tag --points-at HEAD || echo 'None')"
echo "Remote: $(git remote -v | head -n1)"
echo ""
else
echo "No .git directory found in vllm"
fi
cd ..
echo ""
echo "============================"
echo "vLLM-Ascend Git information"
echo "============================"
cd vllm-ascend
if [ -d .git ]; then
echo "Branch: $(git rev-parse --abbrev-ref HEAD)"
echo "Commit hash: $(git rev-parse HEAD)"
echo "Author: $(git log -1 --pretty=format:'%an <%ae>')"
echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)"
echo "Message: $(git log -1 --pretty=format:'%s')"
echo "Tags: $(git tag --points-at HEAD || echo 'None')"
echo "Remote: $(git remote -v | head -n1)"
echo ""
else
echo "No .git directory found in vllm-ascend"
fi
cd ..
}
check_npu_info() {
echo "====> Check NPU info"
npu-smi info
cat "/usr/local/Ascend/ascend-toolkit/latest/$(uname -i)-linux/ascend_toolkit_install.info"
}
check_and_config() {
echo "====> Configure mirrors and git proxy"
git config --global url."https://ghfast.top/https://github.com/".insteadOf "https://github.com/"
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
}
install_extra_components() {
echo "====> Installing extra components for DeepSeek-v3.2-exp-bf16"
if ! wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/CANN-custom_ops-sfa-linux.aarch64.run; then
echo "Failed to download CANN-custom_ops-sfa-linux.aarch64.run"
return 1
fi
chmod +x ./CANN-custom_ops-sfa-linux.aarch64.run
./CANN-custom_ops-sfa-linux.aarch64.run --quiet
if ! wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/custom_ops-1.0-cp311-cp311-linux_aarch64.whl; then
echo "Failed to download custom_ops wheel"
return 1
fi
pip install custom_ops-1.0-cp311-cp311-linux_aarch64.whl
export ASCEND_CUSTOM_OPP_PATH="/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize${ASCEND_CUSTOM_OPP_PATH:+:${ASCEND_CUSTOM_OPP_PATH}}"
export LD_LIBRARY_PATH="/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize/op_api/lib/${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
source /usr/local/Ascend/ascend-toolkit/set_env.sh
rm -f CANN-custom_ops-sfa-linux.aarch64.run \
custom_ops-1.0-cp311-cp311-linux_aarch64.whl
echo "====> Extra components installation completed"
}
checkout_src() {
echo "====> Checkout source code"
mkdir -p "$WORKSPACE"
cd "$WORKSPACE"
pip uninstall -y vllm vllm-ascend || true
cp -r "$WORKSPACE/vllm-ascend/benchmark" /tmp/aisbench-backup || true
rm -rf "$WORKSPACE/vllm" "$WORKSPACE/vllm-ascend"
if [ ! -d "$WORKSPACE/vllm-ascend" ]; then
echo "Cloning vllm-ascend from $VLLM_ASCEND_REMOTE_URL"
git clone --depth 1 "$VLLM_ASCEND_REMOTE_URL" "$WORKSPACE/vllm-ascend"
cd "$WORKSPACE/vllm-ascend"
PR_REF=$(git ls-remote origin 'refs/pull/*/head' | grep "^${VLLM_ASCEND_REF}" | awk '{print $2}' | head -1)
if [ -n "$PR_REF" ]; then
git fetch --depth 1 origin "$PR_REF"
git checkout FETCH_HEAD
else
git fetch origin '+refs/pull/*/head:refs/remotes/pull/*' 2>/dev/null || true
git checkout "$VLLM_ASCEND_REF"
fi
fi
if [ ! -d "$WORKSPACE/vllm" ]; then
echo "Cloning vllm version/ref: $VLLM_VERSION"
git clone --depth 1 --branch "$VLLM_VERSION" https://github.com/vllm-project/vllm.git "$WORKSPACE/vllm"
fi
}
install_vllm() {
echo "====> Install vllm and vllm-ascend"
VLLM_TARGET_DEVICE=empty pip install -e "$WORKSPACE/vllm"
pip install -r "$WORKSPACE/vllm-ascend/requirements-dev.txt"
pip install -e "$WORKSPACE/vllm-ascend"
}
install_aisbench() {
echo "====> Install AISBench benchmark"
BENCH_DIR="$WORKSPACE/vllm-ascend/benchmark"
cp -r /tmp/aisbench-backup "$BENCH_DIR"
cd "$BENCH_DIR"
pip install -e . \
-r requirements/api.txt \
-r requirements/extra.txt
python3 -m pip cache purge || echo "WARNING: pip cache purge failed, but proceeding..."
}
show_triton_ascend_info() {
echo "====> Check triton ascend info"
clang -v
which bishengir-compile
pip show triton-ascend
}
kill_npu_processes() {
pgrep python3 | xargs -r kill -9
pgrep VLLM | xargs -r kill -9
sleep 4
}
run_tests_with_log() {
set +e
kill_npu_processes
pytest -sv --show-capture=no tests/e2e/nightly/multi_node/scripts/test_multi_node.py
ret=$?
set -e
if [ "$LWS_WORKER_INDEX" -eq 0 ]; then
if [ $ret -eq 0 ]; then
print_success "All tests passed!"
else
print_failure "Some tests failed, please check the error stack above for details. \
If this is insufficient to pinpoint the error, please download and review the logs of all other nodes from the job's summary."
fi
fi
}
main() {
check_npu_info
check_and_config
if [[ "$IS_PR_TEST" == "true" ]]; then
checkout_src
install_vllm
install_aisbench
fi
show_vllm_info
show_triton_ascend_info
if [[ "$CONFIG_YAML_PATH" == *"DeepSeek-V3_2-Exp-bf16.yaml" ]]; then
install_extra_components
fi
cd "$WORKSPACE/vllm-ascend"
run_tests_with_log
}
main "$@"