Files
xc-llm-ascend/tests/e2e/nightly/multi_node/scripts/run.sh
Li Wang 7294f89e43 [CI] Add daily images build for nightly ci (#3989)
### What this PR does / why we need it?
Given the current excessively long build time of our nightly-ci, I
recommend installing necessary, confirmed versions of packages in the
Docker image to reduce the time required for integration testing.
Including Mooncake vllm with fixed tags, This is expected to reduce
nightly-ci duration by 2 hours.

- vLLM version: v0.11.0
- vLLM main:
2918c1b49c

---------

Signed-off-by: wangli <wangli858794774@gmail.com>
2025-11-13 20:10:12 +08:00

131 lines
3.6 KiB
Bash

#!/bin/bash
set -euo pipefail
# Color definitions
GREEN="\033[0;32m"
BLUE="\033[0;34m"
YELLOW="\033[0;33m"
RED="\033[0;31m"
NC="\033[0m" # No Color
# Configuration
LOG_DIR="/root/.cache/tests/logs"
OVERWRITE_LOGS=true
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
export BENCHMARK_HOME=${WORKSPACE}/vllm-ascend/benchmark
# Function to print section headers
print_section() {
echo -e "\n${BLUE}=== $1 ===${NC}"
}
print_failure() {
echo -e "${RED}${FAIL_TAG} ✗ ERROR: $1${NC}"
exit 1
}
# Function to print success messages
print_success() {
echo -e "${GREEN}$1${NC}"
}
# Function to print error messages and exit
print_error() {
echo -e "${RED}✗ ERROR: $1${NC}"
exit 1
}
show_vllm_info() {
cd "$WORKSPACE"
echo "Installed vLLM-related Python packages:"
pip list | grep vllm || echo "No vllm packages found."
echo ""
echo "============================"
echo "vLLM Git information"
echo "============================"
cd vllm
if [ -d .git ]; then
echo "Branch: $(git rev-parse --abbrev-ref HEAD)"
echo "Commit hash: $(git rev-parse HEAD)"
echo "Author: $(git log -1 --pretty=format:'%an <%ae>')"
echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)"
echo "Message: $(git log -1 --pretty=format:'%s')"
echo "Tags: $(git tag --points-at HEAD || echo 'None')"
echo "Remote: $(git remote -v | head -n1)"
echo ""
else
echo "No .git directory found in vllm"
fi
cd ..
echo ""
echo "============================"
echo "vLLM-Ascend Git information"
echo "============================"
cd vllm-ascend
if [ -d .git ]; then
echo "Branch: $(git rev-parse --abbrev-ref HEAD)"
echo "Commit hash: $(git rev-parse HEAD)"
echo "Author: $(git log -1 --pretty=format:'%an <%ae>')"
echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)"
echo "Message: $(git log -1 --pretty=format:'%s')"
echo "Tags: $(git tag --points-at HEAD || echo 'None')"
echo "Remote: $(git remote -v | head -n1)"
echo ""
else
echo "No .git directory found in vllm-ascend"
fi
cd ..
}
check_npu_info() {
echo "====> Check NPU info"
npu-smi info
cat "/usr/local/Ascend/ascend-toolkit/latest/$(uname -i)-linux/ascend_toolkit_install.info"
}
check_and_config() {
echo "====> Configure mirrors and git proxy"
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf "https://github.com/"
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
}
kill_npu_processes() {
pgrep python3 | xargs -r kill -9
pgrep VLLM | xargs -r kill -9
sleep 4
}
run_tests_with_log() {
set +e
kill_npu_processes
BASENAME=$(basename "$CONFIG_YAML_PATH" .yaml)
# each worker should have log file
LOG_FILE="${RESULT_FILE_PATH}/${BASENAME}_worker_${LWS_WORKER_INDEX}.log"
mkdir -p ${RESULT_FILE_PATH}
pytest -sv tests/e2e/nightly/multi_node/test_multi_node.py 2>&1 | tee $LOG_FILE
ret=${PIPESTATUS[0]}
set -e
if [ "$LWS_WORKER_INDEX" -eq 0 ]; then
if [ $ret -eq 0 ]; then
print_success "All tests passed!"
else
print_failure "Some tests failed!"
mv LOG_FILE error_${LOG_FILE}
fi
fi
}
main() {
check_npu_info
check_and_config
show_vllm_info
cd "$WORKSPACE/vllm-ascend"
run_tests_with_log
}
main "$@"