[Nightly] Optimize nightly CI (#4509)
### What this PR does / why we need it? 1. Optimize multi-node waiting logic 2. Remove the `tee` pipeline for logs, which will lead to hang issue ### How was this patch tested? - vLLM version: v0.12.0 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.12.0 Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
@@ -87,7 +87,7 @@ spec:
|
||||
- name: VLLM_ASCEND_REMOTE_URL
|
||||
value: {{ vllm_ascend_remote_url | default("https://github.com/vllm-project/vllm-ascend.git") }}
|
||||
- name: RESULT_FILE_PATH
|
||||
value: {{ result_file_path | default("/root/.cache/tests/ret/test_result.txt") }}
|
||||
value: {{ result_file_path | default("/root/.cache/tests/ret") }}
|
||||
- name: FAIL_TAG
|
||||
value: {{ fail_tag | default("FAIL_TAG") }}
|
||||
command:
|
||||
|
||||
@@ -127,19 +127,14 @@ kill_npu_processes() {
|
||||
run_tests_with_log() {
|
||||
set +e
|
||||
kill_npu_processes
|
||||
BASENAME=$(basename "$CONFIG_YAML_PATH" .yaml)
|
||||
# each worker should have log file
|
||||
LOG_FILE="${RESULT_FILE_PATH}/${BASENAME}_worker_${LWS_WORKER_INDEX}.log"
|
||||
mkdir -p ${RESULT_FILE_PATH}
|
||||
pytest -sv tests/e2e/nightly/multi_node/test_multi_node.py 2>&1 | tee $LOG_FILE
|
||||
ret=${PIPESTATUS[0]}
|
||||
pytest -sv tests/e2e/nightly/multi_node/test_multi_node.py
|
||||
ret=$?
|
||||
set -e
|
||||
if [ "$LWS_WORKER_INDEX" -eq 0 ]; then
|
||||
if [ $ret -eq 0 ]; then
|
||||
print_success "All tests passed!"
|
||||
else
|
||||
print_failure "Some tests failed!"
|
||||
mv LOG_FILE error_${LOG_FILE}
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user