[CI] Fix nightly test and raise better error message (#2626)

Co-authored-by: Sangbin <rkooo567@gmail.com>
This commit is contained in:
Lianmin Zheng
2024-12-27 22:16:39 -08:00
committed by GitHub
parent 9254a33ad4
commit 855d0ba381
3 changed files with 16 additions and 2 deletions

View File

@@ -30,5 +30,5 @@ jobs:
- name: Run test
timeout-minutes: 10
run: |
cd test/lang
cd test/srt
python3 run_suite.py --suite nightly --timeout-per-file 2400

View File

@@ -484,7 +484,16 @@ def launch_engine(
# Wait for model to finish loading
scheduler_infos = []
for i in range(len(scheduler_pipe_readers)):
data = scheduler_pipe_readers[i].recv()
try:
data = scheduler_pipe_readers[i].recv()
except EOFError as e:
logger.exception(e)
logger.error(
f"Rank {i} scheduler is dead. Please check if there are relevant logs."
)
scheduler_procs[i].join()
logger.error(f"Exit code: {scheduler_procs[i].exitcode}")
raise
if data["status"] != "ready":
raise RuntimeError(

View File

@@ -44,11 +44,16 @@ suites = {
"test_vision_openai_server.py",
"test_session_control.py",
],
"nightly": [
"test_nightly_gsm8k_eval.py",
"test_nightly_human_eval.py",
],
"sampling/penaltylib": glob.glob(
"sampling/penaltylib/**/test_*.py", recursive=True
),
}
# Expand suite
for target_suite_name, target_tests in suites.items():
for suite_name, tests in suites.items():
if suite_name == target_suite_name: