[Feature] Simple Improve Health Check Mechanism for Production-Grade Stability (#8115)
Signed-off-by: ybyang <ybyang7@iflytek.com>
This commit is contained in:
@@ -143,6 +143,7 @@ from sglang.srt.two_batch_overlap import TboDPAttentionPreparer
|
||||
from sglang.srt.utils import (
|
||||
DeepEPMode,
|
||||
DynamicGradMode,
|
||||
ServerStatus,
|
||||
broadcast_pyobj,
|
||||
configure_gc_logger,
|
||||
configure_logger,
|
||||
@@ -154,6 +155,7 @@ from sglang.srt.utils import (
|
||||
kill_itself_when_parent_died,
|
||||
point_to_point_pyobj,
|
||||
pyspy_dump_schedulers,
|
||||
report_health,
|
||||
require_mlp_sync,
|
||||
require_mlp_tp_gather,
|
||||
set_gpu_proc_affinity,
|
||||
@@ -2964,4 +2966,5 @@ def run_scheduler_process(
|
||||
except Exception:
|
||||
traceback = get_exception_traceback()
|
||||
logger.error(f"Scheduler hit an exception: {traceback}")
|
||||
report_health(ServerStatus.Crashed, server_args.host, ServerArgs.port)
|
||||
parent_process.send_signal(signal.SIGQUIT)
|
||||
|
||||
Reference in New Issue
Block a user