[PD] Add transfer backend abstraction (#5328)
This commit is contained in:
@@ -49,6 +49,7 @@ from sglang.srt.disaggregation.prefill import (
|
||||
from sglang.srt.disaggregation.utils import (
|
||||
DisaggregationMode,
|
||||
ReqToMetadataIdxAllocator,
|
||||
TransferBackend,
|
||||
)
|
||||
from sglang.srt.hf_transformers_utils import get_processor, get_tokenizer
|
||||
from sglang.srt.layers.dp_attention import compute_dp_attention_world_info
|
||||
@@ -530,6 +531,10 @@ class Scheduler(
|
||||
)
|
||||
|
||||
def init_disaggregation(self):
|
||||
self.transfer_backend = TransferBackend(
|
||||
self.server_args.disaggregation_transfer_backend
|
||||
)
|
||||
|
||||
if (
|
||||
self.disaggregation_mode == DisaggregationMode.DECODE
|
||||
): # *2 for the headroom.
|
||||
@@ -567,6 +572,7 @@ class Scheduler(
|
||||
tp_rank=self.tp_rank,
|
||||
tp_size=self.tp_size,
|
||||
bootstrap_port=self.server_args.disaggregation_bootstrap_port,
|
||||
transfer_backend=self.transfer_backend,
|
||||
)
|
||||
elif self.disaggregation_mode == DisaggregationMode.PREFILL:
|
||||
# *2 for the headroom.
|
||||
@@ -592,6 +598,7 @@ class Scheduler(
|
||||
tp_size=self.tp_size,
|
||||
bootstrap_port=self.server_args.disaggregation_bootstrap_port,
|
||||
gloo_group=self.tp_worker.get_attention_tp_cpu_group(),
|
||||
transfer_backend=self.transfer_backend,
|
||||
)
|
||||
# The prefill requests that are in the middle of kv sending
|
||||
self.disagg_prefill_inflight_queue: List[Req] = []
|
||||
|
||||
Reference in New Issue
Block a user