[PD] Support page size > 1 (#5561)

This commit is contained in:
Byron Hsu
2025-04-19 21:54:27 -07:00
committed by GitHub
parent 20f1c8e374
commit ab4b5606e4
4 changed files with 58 additions and 9 deletions

View File

@@ -31,6 +31,8 @@ from sglang.srt.disaggregation.utils import (
ReqToMetadataIdxAllocator,
TransferBackend,
get_kv_class,
kv_to_page_indices,
kv_to_page_num,
poll_and_all_reduce,
)
from sglang.srt.managers.schedule_batch import FINISH_LENGTH, Req, ScheduleBatch
@@ -154,7 +156,8 @@ class PrefillBootstrapQueue:
self.req_to_metadata_buffer_idx_allocator.alloc()
)
assert req.metadata_buffer_index is not None
req.disagg_kv_sender.init(num_kv_indices, req.metadata_buffer_index)
num_pages = kv_to_page_num(num_kv_indices, self.token_to_kv_pool.page_size)
req.disagg_kv_sender.init(num_pages, req.metadata_buffer_index)
bootstrapped_reqs.append(req)
indices_to_remove.add(i)
@@ -300,4 +303,7 @@ class SchedulerDisaggregationPrefillMixin:
req.metadata_buffer_index, token_id
)
is_last = token_id is not None
req.disagg_kv_sender.send(kv_indices, slice(start_idx, end_idx), is_last)
page_indices = kv_to_page_indices(
kv_indices, self.token_to_kv_pool_allocator.page_size
)
req.disagg_kv_sender.send(page_indices, slice(start_idx, end_idx), is_last)