[feature]Pooling Features and PCP Adaptation (#4143)

This PR let pooling kv connector support pcp feature

- vLLM version: v0.11.2

---------

Signed-off-by: fjw <2270923832@qq.com>
Signed-off-by: SlightwindSec <slightwindsec@gmail.com>
Co-authored-by: SlightwindSec <slightwindsec@gmail.com>
This commit is contained in:
DreamerLeader
2025-11-29 22:07:45 +08:00
committed by GitHub
parent 1eb5295a1b
commit 4dbe4fd123
5 changed files with 89 additions and 29 deletions

View File

@@ -17,6 +17,10 @@ class KeyMetadata:
model_name: str
""" worker id when running under a distributed setting """
head_or_tp_rank: int
""" Initialize the current prefill context model parallel rank """
pcp_rank: int
""" Initialize the current decode context model parallel rank """
dcp_rank: int
@dataclass(order=True)
@@ -28,12 +32,15 @@ class PoolKey:
return hash((
self.key_metadata.model_name,
self.key_metadata.head_or_tp_rank,
self.key_metadata.pcp_rank,
self.key_metadata.dcp_rank,
self.chunk_hash,
))
def to_string(self):
return (
f"{self.key_metadata.model_name}"
f"@pcp{self.key_metadata.pcp_rank}@dcp{self.key_metadata.dcp_rank}"
f"@head_or_tp_rank:{self.key_metadata.head_or_tp_rank}@{self.chunk_hash}"
)
@@ -60,6 +67,8 @@ class LayerPoolKey(PoolKey):
return hash((
self.key_metadata.model_name,
self.key_metadata.head_or_tp_rank,
self.key_metadata.pcp_rank,
self.key_metadata.dcp_rank,
self.chunk_hash,
self.layer_id,
))
@@ -67,6 +76,7 @@ class LayerPoolKey(PoolKey):
def to_string(self):
return (
f"{self.key_metadata.model_name}"
f"@pcp{self.key_metadata.pcp_rank}@dcp{self.key_metadata.dcp_rank}"
f"@head_or_tp_rank:{self.key_metadata.head_or_tp_rank}@{self.chunk_hash}@{self.layer_id}"
)