[BugFix]GPQA Accuracy Issue Bugfix (#3476)
### What this PR does / why we need it? The GPQA dataset accuracy in the PD separation scenario of testing is 33.2, which does not meet the paper's requirement of 70. Resolve this accuracy issue. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? qpqa has accuracy issues, but modifying the code can ensure the accuracy meets the standard - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 --------- Signed-off-by: fjw <2270923832@qq.com>
This commit is contained in:
@@ -163,6 +163,8 @@ class MooncakeStoreConnectorV1Scheduler:
|
|||||||
self.client = MooncakeLookupClient(vllm_config)
|
self.client = MooncakeLookupClient(vllm_config)
|
||||||
self.use_layerwise = use_layerwise
|
self.use_layerwise = use_layerwise
|
||||||
self.kv_role = vllm_config.kv_transfer_config.kv_role
|
self.kv_role = vllm_config.kv_transfer_config.kv_role
|
||||||
|
self.consumer_is_to_load = vllm_config.kv_transfer_config.kv_connector_extra_config.get(
|
||||||
|
"consumer_is_to_load", False)
|
||||||
# request_id -> (vllm cached tokes, mooncake cached tokens)
|
# request_id -> (vllm cached tokes, mooncake cached tokens)
|
||||||
self.load_specs: dict[str, LoadSpec] = {}
|
self.load_specs: dict[str, LoadSpec] = {}
|
||||||
self._block_size = vllm_config.cache_config.block_size
|
self._block_size = vllm_config.cache_config.block_size
|
||||||
@@ -192,6 +194,8 @@ class MooncakeStoreConnectorV1Scheduler:
|
|||||||
the number of tokens that can be loaded from the
|
the number of tokens that can be loaded from the
|
||||||
external KV cache beyond what is already computed.
|
external KV cache beyond what is already computed.
|
||||||
"""
|
"""
|
||||||
|
if self.kv_role == "kv_consumer" and not self.consumer_is_to_load:
|
||||||
|
return 0, False
|
||||||
|
|
||||||
if self._discard_partial_chunks:
|
if self._discard_partial_chunks:
|
||||||
token_block_end = len(request.prompt_token_ids
|
token_block_end = len(request.prompt_token_ids
|
||||||
|
|||||||
Reference in New Issue
Block a user