Introduce naming convention in io_struct and base sglang io classes. (#10133)

This commit is contained in:
Liangsheng Yin
2025-10-03 10:55:13 +08:00
committed by GitHub
parent e810077488
commit 3c699772c9
10 changed files with 223 additions and 189 deletions

View File

@@ -78,6 +78,7 @@ from sglang.srt.managers.io_struct import (
DestroyWeightsUpdateGroupReqInput,
ExpertDistributionReq,
ExpertDistributionReqOutput,
ExpertDistributionReqType,
FlushCacheReqInput,
FlushCacheReqOutput,
FreezeGCReq,
@@ -1487,12 +1488,12 @@ class Scheduler(
req.priority = -sys.maxsize - 1
elif not self.enable_priority_scheduling and req.priority is not None:
abort_req = AbortReq(
req.rid,
finished_reason={
"type": "abort",
"status_code": HTTPStatus.SERVICE_UNAVAILABLE,
"message": "Using priority is disabled for this server. Please send a new request without a priority.",
},
rid=req.rid,
)
self.send_to_tokenizer.send_pyobj(abort_req)
@@ -1528,12 +1529,12 @@ class Scheduler(
self.send_to_tokenizer.send_pyobj(
AbortReq(
req_to_abort.rid,
finished_reason={
"type": "abort",
"status_code": HTTPStatus.SERVICE_UNAVAILABLE,
"message": message,
},
rid=req_to_abort.rid,
)
)
return req_to_abort.rid == recv_req.rid
@@ -2005,7 +2006,7 @@ class Scheduler(
self.new_token_ratio = new_token_ratio
for req in reqs_to_abort:
self.send_to_tokenizer.send_pyobj(
AbortReq(req.rid, abort_reason=req.to_abort_message)
AbortReq(abort_reason=req.to_abort_message, rid=req.rid)
)
logger.info(
@@ -2575,7 +2576,7 @@ class Scheduler(
if self.enable_hicache_storage:
# to release prefetch events associated with the request
self.tree_cache.release_aborted_request(req.rid)
self.send_to_tokenizer.send_pyobj(AbortReq(req.rid))
self.send_to_tokenizer.send_pyobj(AbortReq(rid=req.rid))
# For disaggregation decode mode, the request in the waiting queue has KV cache allocated.
if self.disaggregation_mode == DisaggregationMode.DECODE:
self.tree_cache.cache_finished_req(req)
@@ -2687,11 +2688,12 @@ class Scheduler(
return SlowDownReqOutput()
def expert_distribution_handle(self, recv_req: ExpertDistributionReq):
if recv_req == ExpertDistributionReq.START_RECORD:
action = recv_req.action
if action == ExpertDistributionReqType.START_RECORD:
get_global_expert_distribution_recorder().start_record()
elif recv_req == ExpertDistributionReq.STOP_RECORD:
elif action == ExpertDistributionReqType.STOP_RECORD:
get_global_expert_distribution_recorder().stop_record()
elif recv_req == ExpertDistributionReq.DUMP_RECORD:
elif action == ExpertDistributionReqType.DUMP_RECORD:
get_global_expert_distribution_recorder().dump_record()
else:
raise ValueError(f"Unrecognized ExpertDistributionReq value: {recv_req=}")
@@ -2774,7 +2776,8 @@ class IdleSleeper:
def is_health_check_generate_req(recv_req):
return getattr(recv_req, "rid", "").startswith("HEALTH_CHECK")
rid = getattr(recv_req, "rid", None)
return rid is not None and rid.startswith("HEALTH_CHECK")
def is_work_request(recv_req):