[feature]Pooling Features and PCP Adaptation (#4143)
This PR let pooling kv connector support pcp feature - vLLM version: v0.11.2 --------- Signed-off-by: fjw <2270923832@qq.com> Signed-off-by: SlightwindSec <slightwindsec@gmail.com> Co-authored-by: SlightwindSec <slightwindsec@gmail.com>
This commit is contained in:
@@ -17,6 +17,10 @@ class KeyMetadata:
|
||||
model_name: str
|
||||
""" worker id when running under a distributed setting """
|
||||
head_or_tp_rank: int
|
||||
""" Initialize the current prefill context model parallel rank """
|
||||
pcp_rank: int
|
||||
""" Initialize the current decode context model parallel rank """
|
||||
dcp_rank: int
|
||||
|
||||
|
||||
@dataclass(order=True)
|
||||
@@ -28,12 +32,15 @@ class PoolKey:
|
||||
return hash((
|
||||
self.key_metadata.model_name,
|
||||
self.key_metadata.head_or_tp_rank,
|
||||
self.key_metadata.pcp_rank,
|
||||
self.key_metadata.dcp_rank,
|
||||
self.chunk_hash,
|
||||
))
|
||||
|
||||
def to_string(self):
|
||||
return (
|
||||
f"{self.key_metadata.model_name}"
|
||||
f"@pcp{self.key_metadata.pcp_rank}@dcp{self.key_metadata.dcp_rank}"
|
||||
f"@head_or_tp_rank:{self.key_metadata.head_or_tp_rank}@{self.chunk_hash}"
|
||||
)
|
||||
|
||||
@@ -60,6 +67,8 @@ class LayerPoolKey(PoolKey):
|
||||
return hash((
|
||||
self.key_metadata.model_name,
|
||||
self.key_metadata.head_or_tp_rank,
|
||||
self.key_metadata.pcp_rank,
|
||||
self.key_metadata.dcp_rank,
|
||||
self.chunk_hash,
|
||||
self.layer_id,
|
||||
))
|
||||
@@ -67,6 +76,7 @@ class LayerPoolKey(PoolKey):
|
||||
def to_string(self):
|
||||
return (
|
||||
f"{self.key_metadata.model_name}"
|
||||
f"@pcp{self.key_metadata.pcp_rank}@dcp{self.key_metadata.dcp_rank}"
|
||||
f"@head_or_tp_rank:{self.key_metadata.head_or_tp_rank}@{self.chunk_hash}@{self.layer_id}"
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user