Files
xc-llm-ascend/tests/e2e/nightly/multi_node/scripts/run.sh

156 lines
4.8 KiB
Bash
Raw Normal View History

#!/bin/bash
set -euo pipefail
# Color definitions
GREEN="\033[0;32m"
BLUE="\033[0;34m"
YELLOW="\033[0;33m"
RED="\033[0;31m"
NC="\033[0m" # No Color
# Configuration
LOG_DIR="/root/.cache/tests/logs"
OVERWRITE_LOGS=true
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
export BENCHMARK_HOME=${WORKSPACE}/vllm-ascend/benchmark
export VLLM_LOGGING_LEVEL="DEBUG"
# Function to print section headers
print_section() {
echo -e "\n${BLUE}=== $1 ===${NC}"
}
print_failure() {
echo -e "${RED}${FAIL_TAG} ✗ ERROR: $1${NC}"
exit 1
}
# Function to print success messages
print_success() {
echo -e "${GREEN}$1${NC}"
}
# Function to print error messages and exit
print_error() {
echo -e "${RED}✗ ERROR: $1${NC}"
exit 1
}
show_vllm_info() {
cd "$WORKSPACE"
echo "Installed vLLM-related Python packages:"
pip list | grep vllm || echo "No vllm packages found."
echo ""
echo "============================"
echo "vLLM Git information"
echo "============================"
cd vllm
if [ -d .git ]; then
echo "Branch: $(git rev-parse --abbrev-ref HEAD)"
echo "Commit hash: $(git rev-parse HEAD)"
echo "Author: $(git log -1 --pretty=format:'%an <%ae>')"
echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)"
echo "Message: $(git log -1 --pretty=format:'%s')"
echo "Tags: $(git tag --points-at HEAD || echo 'None')"
echo "Remote: $(git remote -v | head -n1)"
echo ""
else
echo "No .git directory found in vllm"
fi
cd ..
echo ""
echo "============================"
echo "vLLM-Ascend Git information"
echo "============================"
cd vllm-ascend
if [ -d .git ]; then
echo "Branch: $(git rev-parse --abbrev-ref HEAD)"
echo "Commit hash: $(git rev-parse HEAD)"
echo "Author: $(git log -1 --pretty=format:'%an <%ae>')"
echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)"
echo "Message: $(git log -1 --pretty=format:'%s')"
echo "Tags: $(git tag --points-at HEAD || echo 'None')"
echo "Remote: $(git remote -v | head -n1)"
echo ""
else
echo "No .git directory found in vllm-ascend"
fi
cd ..
}
check_npu_info() {
echo "====> Check NPU info"
npu-smi info
cat "/usr/local/Ascend/ascend-toolkit/latest/$(uname -i)-linux/ascend_toolkit_install.info"
}
check_and_config() {
echo "====> Configure mirrors and git proxy"
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf "https://github.com/"
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
}
install_extra_components() {
echo "====> Installing extra components for DeepSeek-v3.2-exp-bf16"
if ! wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/CANN-custom_ops-sfa-linux.aarch64.run; then
echo "Failed to download CANN-custom_ops-sfa-linux.aarch64.run"
return 1
fi
chmod +x ./CANN-custom_ops-sfa-linux.aarch64.run
./CANN-custom_ops-sfa-linux.aarch64.run --quiet
if ! wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/custom_ops-1.0-cp311-cp311-linux_aarch64.whl; then
echo "Failed to download custom_ops wheel"
return 1
fi
pip install custom_ops-1.0-cp311-cp311-linux_aarch64.whl
export ASCEND_CUSTOM_OPP_PATH="/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize${ASCEND_CUSTOM_OPP_PATH:+:${ASCEND_CUSTOM_OPP_PATH}}"
export LD_LIBRARY_PATH="/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize/op_api/lib/${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
source /usr/local/Ascend/ascend-toolkit/set_env.sh
rm -f CANN-custom_ops-sfa-linux.aarch64.run \
custom_ops-1.0-cp311-cp311-linux_aarch64.whl
echo "====> Extra components installation completed"
}
kill_npu_processes() {
pgrep python3 | xargs -r kill -9
pgrep VLLM | xargs -r kill -9
sleep 4
}
run_tests_with_log() {
set +e
kill_npu_processes
pytest -sv --show-capture=no tests/e2e/nightly/multi_node/test_multi_node.py
ret=$?
set -e
if [ "$LWS_WORKER_INDEX" -eq 0 ]; then
if [ $ret -eq 0 ]; then
print_success "All tests passed!"
else
print_failure "Some tests failed, please check the error stack above for details.\
If this is insufficient to pinpoint the error, please download and review the logs of all other nodes from the job's summary."
fi
fi
}
main() {
check_npu_info
check_and_config
show_vllm_info
if [[ "$CONFIG_YAML_PATH" == *"DeepSeek-V3_2-Exp-bf16.yaml" ]]; then
install_extra_components
fi
cd "$WORKSPACE/vllm-ascend"
run_tests_with_log
}
main "$@"