### What this PR does / why we need it?
Currently, our image build suffers from errors during cross-compilation,
which causing the image to fail to build sometimes(see
https://github.com/vllm-project/vllm-ascend/actions/runs/20152861650/job/57849208186).
This results in the nightly test code not being the latest version.
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
---------
Signed-off-by: wangli <wangli858794774@gmail.com>
165 lines
5.0 KiB
Bash
165 lines
5.0 KiB
Bash
#!/bin/bash
|
|
set -euo pipefail
|
|
|
|
# Color definitions
|
|
GREEN="\033[0;32m"
|
|
BLUE="\033[0;34m"
|
|
YELLOW="\033[0;33m"
|
|
RED="\033[0;31m"
|
|
NC="\033[0m" # No Color
|
|
|
|
# Configuration
|
|
LOG_DIR="/root/.cache/tests/logs"
|
|
OVERWRITE_LOGS=true
|
|
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
|
|
export BENCHMARK_HOME=${WORKSPACE}/vllm-ascend/benchmark
|
|
|
|
# Function to print section headers
|
|
print_section() {
|
|
echo -e "\n${BLUE}=== $1 ===${NC}"
|
|
}
|
|
|
|
print_failure() {
|
|
echo -e "${RED}${FAIL_TAG} ✗ ERROR: $1${NC}"
|
|
exit 1
|
|
}
|
|
|
|
# Function to print success messages
|
|
print_success() {
|
|
echo -e "${GREEN}✓ $1${NC}"
|
|
}
|
|
|
|
# Function to print error messages and exit
|
|
print_error() {
|
|
echo -e "${RED}✗ ERROR: $1${NC}"
|
|
exit 1
|
|
}
|
|
|
|
show_vllm_info() {
|
|
cd "$WORKSPACE"
|
|
echo "Installed vLLM-related Python packages:"
|
|
pip list | grep vllm || echo "No vllm packages found."
|
|
|
|
echo ""
|
|
echo "============================"
|
|
echo "vLLM Git information"
|
|
echo "============================"
|
|
cd vllm
|
|
if [ -d .git ]; then
|
|
echo "Branch: $(git rev-parse --abbrev-ref HEAD)"
|
|
echo "Commit hash: $(git rev-parse HEAD)"
|
|
echo "Author: $(git log -1 --pretty=format:'%an <%ae>')"
|
|
echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)"
|
|
echo "Message: $(git log -1 --pretty=format:'%s')"
|
|
echo "Tags: $(git tag --points-at HEAD || echo 'None')"
|
|
echo "Remote: $(git remote -v | head -n1)"
|
|
echo ""
|
|
else
|
|
echo "No .git directory found in vllm"
|
|
fi
|
|
cd ..
|
|
|
|
echo ""
|
|
echo "============================"
|
|
echo "vLLM-Ascend Git information"
|
|
echo "============================"
|
|
cd vllm-ascend
|
|
if [ -d .git ]; then
|
|
echo "Branch: $(git rev-parse --abbrev-ref HEAD)"
|
|
echo "Commit hash: $(git rev-parse HEAD)"
|
|
echo "Author: $(git log -1 --pretty=format:'%an <%ae>')"
|
|
echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)"
|
|
echo "Message: $(git log -1 --pretty=format:'%s')"
|
|
echo "Tags: $(git tag --points-at HEAD || echo 'None')"
|
|
echo "Remote: $(git remote -v | head -n1)"
|
|
echo ""
|
|
else
|
|
echo "No .git directory found in vllm-ascend"
|
|
fi
|
|
cd ..
|
|
}
|
|
|
|
check_npu_info() {
|
|
echo "====> Check NPU info"
|
|
npu-smi info
|
|
cat "/usr/local/Ascend/ascend-toolkit/latest/$(uname -i)-linux/ascend_toolkit_install.info"
|
|
}
|
|
|
|
check_and_config() {
|
|
echo "====> Configure mirrors and git proxy"
|
|
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf "https://github.com/"
|
|
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
|
|
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|
}
|
|
|
|
install_extra_components() {
|
|
echo "====> Installing extra components for DeepSeek-v3.2-exp-bf16"
|
|
|
|
if ! wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/CANN-custom_ops-sfa-linux.aarch64.run; then
|
|
echo "Failed to download CANN-custom_ops-sfa-linux.aarch64.run"
|
|
return 1
|
|
fi
|
|
chmod +x ./CANN-custom_ops-sfa-linux.aarch64.run
|
|
./CANN-custom_ops-sfa-linux.aarch64.run --quiet
|
|
|
|
if ! wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/custom_ops-1.0-cp311-cp311-linux_aarch64.whl; then
|
|
echo "Failed to download custom_ops wheel"
|
|
return 1
|
|
fi
|
|
pip install custom_ops-1.0-cp311-cp311-linux_aarch64.whl
|
|
|
|
export ASCEND_CUSTOM_OPP_PATH="/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize${ASCEND_CUSTOM_OPP_PATH:+:${ASCEND_CUSTOM_OPP_PATH}}"
|
|
export LD_LIBRARY_PATH="/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize/op_api/lib/${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
|
|
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
|
|
|
rm -f CANN-custom_ops-sfa-linux.aarch64.run \
|
|
custom_ops-1.0-cp311-cp311-linux_aarch64.whl
|
|
echo "====> Extra components installation completed"
|
|
}
|
|
|
|
kill_npu_processes() {
|
|
pgrep python3 | xargs -r kill -9
|
|
pgrep VLLM | xargs -r kill -9
|
|
|
|
sleep 4
|
|
}
|
|
|
|
upgrade_vllm_ascend_scr() {
|
|
# Fix me(Potabk): Remove this once our image build use
|
|
# The separate architecture build process currently suffers from errors during cross-compilation
|
|
# causing the image to fail to build correctly.
|
|
# This results in the nightly test code not being the latest version.
|
|
cd "$WORKSPACE/vllm-ascend"
|
|
git pull origin main
|
|
|
|
}
|
|
|
|
run_tests_with_log() {
|
|
set +e
|
|
kill_npu_processes
|
|
pytest -sv tests/e2e/nightly/multi_node/test_multi_node.py
|
|
ret=$?
|
|
set -e
|
|
if [ "$LWS_WORKER_INDEX" -eq 0 ]; then
|
|
if [ $ret -eq 0 ]; then
|
|
print_success "All tests passed!"
|
|
else
|
|
print_failure "Some tests failed!"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
main() {
|
|
check_npu_info
|
|
check_and_config
|
|
show_vllm_info
|
|
if [[ "$CONFIG_YAML_PATH" == *"DeepSeek-V3_2-Exp-bf16.yaml" ]]; then
|
|
install_extra_components
|
|
fi
|
|
upgrade_vllm_ascend_scr
|
|
cd "$WORKSPACE/vllm-ascend"
|
|
run_tests_with_log
|
|
}
|
|
|
|
main "$@"
|