Support colocating requests (#7973)
This commit is contained in:
@@ -123,6 +123,7 @@ from sglang.srt.managers.schedule_policy import (
|
||||
PrefillAdder,
|
||||
SchedulePolicy,
|
||||
)
|
||||
from sglang.srt.managers.scheduler_input_blocker import SchedulerInputBlocker
|
||||
from sglang.srt.managers.scheduler_output_processor_mixin import (
|
||||
SchedulerOutputProcessorMixin,
|
||||
)
|
||||
@@ -504,6 +505,12 @@ class Scheduler(
|
||||
)
|
||||
self.init_profier()
|
||||
|
||||
self.input_blocker = (
|
||||
SchedulerInputBlocker(noop=self.attn_tp_rank != 0)
|
||||
if get_bool_env_var("SGLANG_ENABLE_COLOCATED_BATCH_GEN")
|
||||
else None
|
||||
)
|
||||
|
||||
# Init metrics stats
|
||||
self.init_metrics(tp_rank, pp_rank, dp_rank)
|
||||
self.init_kv_events(server_args.kv_events_config)
|
||||
@@ -1035,6 +1042,9 @@ class Scheduler(
|
||||
else:
|
||||
recv_reqs = None
|
||||
|
||||
if self.input_blocker is not None:
|
||||
recv_reqs = self.input_blocker.handle(recv_reqs)
|
||||
|
||||
if self.server_args.enable_dp_attention:
|
||||
if self.attn_tp_rank == 0:
|
||||
work_reqs = [
|
||||
|
||||
Reference in New Issue
Block a user