[CI] Optimize nightly CI (#3898)
### What this PR does / why we need it?
This patch mainly fix the the problem of not being able to determine the
exit status of the pod's entrypoint script and some other tiny
optimizations:
1. Shorten wait for server timeout
2. fix typo
3. fix the issue of ais_bench failing to correctly access the proxy URL
in a PD separation scenario.
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: v0.11.0
- vLLM main:
83f478bb19
---------
Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
@@ -20,6 +20,11 @@ print_section() {
|
||||
echo -e "\n${BLUE}=== $1 ===${NC}"
|
||||
}
|
||||
|
||||
print_failure() {
|
||||
echo -e "${RED}${FAIL_TAG} ✗ ERROR: $1${NC}"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Function to print success messages
|
||||
print_success() {
|
||||
echo -e "${GREEN}✓ $1${NC}"
|
||||
@@ -161,32 +166,24 @@ kill_npu_processes() {
|
||||
sleep 4
|
||||
}
|
||||
|
||||
run_tests() {
|
||||
run_tests_with_log() {
|
||||
set +e
|
||||
kill_npu_processes
|
||||
pytest -sv tests/e2e/nightly/multi_node/test_multi_node.py
|
||||
ret=$?
|
||||
BASENAME=$(basename "$CONFIG_YAML_PATH" .yaml)
|
||||
# each worker should have log file
|
||||
LOG_FILE="${RESULT_FILE_PATH}/${BASENAME}_worker_${LWS_WORKER_INDEX}.log"
|
||||
mkdir -p ${RESULT_FILE_PATH}
|
||||
pytest -sv tests/e2e/nightly/multi_node/test_multi_node.py 2>&1 | tee $LOG_FILE
|
||||
ret=${PIPESTATUS[0]}
|
||||
set -e
|
||||
if [ "$LWS_WORKER_INDEX" -eq 0 ]; then
|
||||
if [ $ret -eq 0 ]; then
|
||||
print_success "All tests passed!"
|
||||
else
|
||||
print_error "Some tests failed!"
|
||||
kubectl delete pod $CONTROLLER_NAME -n vllm-project
|
||||
print_failure "Some tests failed!"
|
||||
mv LOG_FILE error_${LOG_FILE}
|
||||
fi
|
||||
fi
|
||||
set -e
|
||||
}
|
||||
|
||||
install_kubectl() {
|
||||
arch=$(uname -m)
|
||||
KUBECTL=/root/.cache/.kube/kubectl
|
||||
if echo "$arch" | grep -qiE "arm|aarch64"; then
|
||||
echo "Detected ARM architecture: $arch"
|
||||
KUBECTL="$KUBECTL"_arm
|
||||
fi
|
||||
install -o root -g root -m 0755 $KUBECTL /usr/local/bin/kubectl
|
||||
echo "$SECRET" | base64 -d > /tmp/kubeconfig
|
||||
export KUBECONFIG=/tmp/kubeconfig
|
||||
}
|
||||
|
||||
main() {
|
||||
@@ -194,7 +191,6 @@ main() {
|
||||
check_and_config
|
||||
checkout_src
|
||||
install_sys_dependencies
|
||||
install_kubectl
|
||||
install_vllm
|
||||
install_ais_bench
|
||||
# to speed up mooncake build process, install Go here
|
||||
@@ -203,7 +199,7 @@ main() {
|
||||
. $SRC_DIR/vllm-ascend/tests/e2e/nightly/multi_node/scripts/build_mooncake.sh \
|
||||
pooling_async_memecpy_v1 9d96b2e1dd76cc601d76b1b4c5f6e04605cd81d3
|
||||
cd "$WORKSPACE/source_code/vllm-ascend"
|
||||
run_tests
|
||||
run_tests_with_log
|
||||
}
|
||||
|
||||
main "$@"
|
||||
|
||||
Reference in New Issue
Block a user