[feature]Pooling Features and PCP Adaptation (#4143)

This PR let pooling kv connector support pcp feature

- vLLM version: v0.11.2

---------

Signed-off-by: fjw <2270923832@qq.com>
Signed-off-by: SlightwindSec <slightwindsec@gmail.com>
Co-authored-by: SlightwindSec <slightwindsec@gmail.com>
This commit is contained in:
DreamerLeader
2025-11-29 22:07:45 +08:00
committed by GitHub
parent 1eb5295a1b
commit 4dbe4fd123
5 changed files with 89 additions and 29 deletions

View File

@@ -29,7 +29,14 @@ class KVPoolScheduler:
"load_async", False)
# request_id -> (vllm cached tokes, kvpool cached tokens)
self.load_specs: dict[str, LoadSpec] = {}
self.pcp_size = vllm_config.parallel_config.prefill_context_parallel_size
self.dcp_size = vllm_config.parallel_config.decode_context_parallel_size
self._block_size = vllm_config.cache_config.block_size
if self.pcp_size > 1:
self._block_size *= self.pcp_size
if self.dcp_size > 1:
self._block_size *= self.dcp_size
# request_id -> full_token_ids
self._request_trackers: dict[str, RequestTracker] = {}
# Whether to discard partial chunks