[bugfix][main]KV Pool for KV Transfer in PD Disaggregation Scenarios (#5398)
### What this PR does / why we need it?
1.KV Pool for KV Transfer in PD Disaggregation Scenarios Error
Resolution
2.Update KV Pool Documentation
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: release/v0.13.0
- vLLM main:
254f6b9867
---------
Signed-off-by: fems14 <1804143737@qq.com>
This commit is contained in:
@@ -55,7 +55,7 @@ class AscendStoreConnector(KVConnectorBase_V1):
|
||||
)
|
||||
|
||||
assert self.connector_worker is not None
|
||||
if vllm_config.parallel_config.rank == 0 and self.kv_role != "kv_consumer":
|
||||
if vllm_config.parallel_config.rank == 0:
|
||||
self.lookup_server = LookupKeyServer(self.connector_worker,
|
||||
vllm_config,
|
||||
self.use_layerwise)
|
||||
|
||||
@@ -26,8 +26,7 @@ class KVPoolScheduler:
|
||||
"consumer_is_to_load", False)
|
||||
self.load_async = vllm_config.kv_transfer_config.kv_connector_extra_config.get(
|
||||
"load_async", False)
|
||||
self.client = LookupKeyClient(
|
||||
vllm_config) if self.kv_role != "kv_consumer" else None
|
||||
self.client = LookupKeyClient(vllm_config)
|
||||
# request_id -> (vllm cached tokes, kvpool cached tokens)
|
||||
self.load_specs: dict[str, LoadSpec] = {}
|
||||
self.pcp_size = getattr(vllm_config.parallel_config,
|
||||
@@ -75,8 +74,8 @@ class KVPoolScheduler:
|
||||
else:
|
||||
token_len = len(request.prompt_token_ids)
|
||||
|
||||
num_external_hit_tokens = self.client.lookup( # type: ignore[union-attr]
|
||||
token_len, request.block_hashes)
|
||||
num_external_hit_tokens = self.client.lookup(token_len,
|
||||
request.block_hashes)
|
||||
|
||||
if num_external_hit_tokens == request.num_tokens:
|
||||
num_external_hit_tokens -= 1
|
||||
|
||||
Reference in New Issue
Block a user