[BugFix]GPQA Accuracy Issue Bugfix (#3476)

### What this PR does / why we need it?
The GPQA dataset accuracy in the PD separation scenario of testing is
33.2, which does not meet the paper's requirement of 70. Resolve this
accuracy issue.
### Does this PR introduce _any_ user-facing change?

### How was this patch tested?
qpqa has accuracy issues, but modifying the code can ensure the accuracy
meets the standard

- vLLM version: v0.11.0rc3
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0

---------

Signed-off-by: fjw <2270923832@qq.com>
This commit is contained in:
DreamerLeader
2025-10-15 23:28:17 +08:00
committed by GitHub
parent cec1fab509
commit aa6154703a

View File

@@ -163,6 +163,8 @@ class MooncakeStoreConnectorV1Scheduler:
self.client = MooncakeLookupClient(vllm_config) self.client = MooncakeLookupClient(vllm_config)
self.use_layerwise = use_layerwise self.use_layerwise = use_layerwise
self.kv_role = vllm_config.kv_transfer_config.kv_role self.kv_role = vllm_config.kv_transfer_config.kv_role
self.consumer_is_to_load = vllm_config.kv_transfer_config.kv_connector_extra_config.get(
"consumer_is_to_load", False)
# request_id -> (vllm cached tokes, mooncake cached tokens) # request_id -> (vllm cached tokes, mooncake cached tokens)
self.load_specs: dict[str, LoadSpec] = {} self.load_specs: dict[str, LoadSpec] = {}
self._block_size = vllm_config.cache_config.block_size self._block_size = vllm_config.cache_config.block_size
@@ -192,6 +194,8 @@ class MooncakeStoreConnectorV1Scheduler:
the number of tokens that can be loaded from the the number of tokens that can be loaded from the
external KV cache beyond what is already computed. external KV cache beyond what is already computed.
""" """
if self.kv_role == "kv_consumer" and not self.consumer_is_to_load:
return 0, False
if self._discard_partial_chunks: if self._discard_partial_chunks:
token_block_end = len(request.prompt_token_ids token_block_end = len(request.prompt_token_ids
@@ -481,4 +485,4 @@ class MooncakeLookupServer:
def close(self): def close(self):
self.socket.close(linger=0) self.socket.close(linger=0)
# TODO: close the thread! # TODO: close the thread!