Support colocating requests (#7973)

This commit is contained in:
fzyzcjy
2025-07-29 13:51:49 +08:00
committed by GitHub
parent 59d0bf012f
commit 0ce84c822b
6 changed files with 179 additions and 6 deletions

View File

@@ -123,6 +123,7 @@ from sglang.srt.managers.schedule_policy import (
PrefillAdder,
SchedulePolicy,
)
from sglang.srt.managers.scheduler_input_blocker import SchedulerInputBlocker
from sglang.srt.managers.scheduler_output_processor_mixin import (
SchedulerOutputProcessorMixin,
)
@@ -504,6 +505,12 @@ class Scheduler(
)
self.init_profier()
self.input_blocker = (
SchedulerInputBlocker(noop=self.attn_tp_rank != 0)
if get_bool_env_var("SGLANG_ENABLE_COLOCATED_BATCH_GEN")
else None
)
# Init metrics stats
self.init_metrics(tp_rank, pp_rank, dp_rank)
self.init_kv_events(server_args.kv_events_config)
@@ -1035,6 +1042,9 @@ class Scheduler(
else:
recv_reqs = None
if self.input_blocker is not None:
recv_reqs = self.input_blocker.handle(recv_reqs)
if self.server_args.enable_dp_attention:
if self.attn_tp_rank == 0:
work_reqs = [