Return more infos for computing average acceptance length (#3152)

This commit is contained in:
Lianmin Zheng
2025-01-26 04:51:54 -08:00
committed by GitHub
parent 7e0976133c
commit 1dda8c5e4c
10 changed files with 97 additions and 15 deletions

View File

@@ -57,6 +57,7 @@ from sglang.srt.utils import (
assert_pkg_version,
configure_logger,
kill_process_tree,
launch_dummy_health_check_server,
maybe_set_triton_cache_manager,
prepare_model_and_tokenizer,
set_prometheus_multiproc_dir,
@@ -400,14 +401,16 @@ def _launch_subprocesses(server_args: ServerArgs) -> Tuple[TokenizerManager, Dic
if os.getenv("SGLANG_BLOCK_NONZERO_RANK_CHILDREN") == "0":
# When using `Engine` as a Python API, we don't want to block here.
return
return None, None
launch_dummy_health_check_server(server_args.host, server_args.port)
for proc in scheduler_procs:
proc.join()
logger.error(
f"Scheduler or DataParallelController {proc.pid} terminated with {proc.exitcode}"
)
return
return None, None
# Launch detokenizer process
detoken_proc = mp.Process(