Fix of DeepSeek Error in KV Pool Mixed Deployment Scenario (#3087)

### What this PR does / why we need it?
A new kv_role "kv_both" is added to run mixed deployment scenarios. The
mixed deployment will involve a decode phase, where with_prefill should
be false.

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.10.2
- vLLM main:
c60e6137f0

Signed-off-by: fems14 <1804143737@qq.com>
This commit is contained in:
fems14
2025-09-22 20:36:41 +08:00
committed by GitHub
parent 37a0715eda
commit 1c9f0fe26f
3 changed files with 12 additions and 6 deletions

View File

@@ -119,7 +119,7 @@ class MooncakeEngine:
if self.use_layerwise:
self.get_event = threading.Event()
if self.kv_role == 'kv_producer':
if self.kv_role in ['kv_producer', 'kv_both']:
ready_event_sending = threading.Event()
self.kv_send_thread = KVCacheStoreLayerSendingThread(
self.tp_rank, self.tp_size, self.m_store,
@@ -135,7 +135,7 @@ class MooncakeEngine:
self.kv_recv_thread.start()
ready_event.wait()
else:
if self.kv_role == 'kv_producer':
if self.kv_role in ['kv_producer', 'kv_both']:
ready_event_sending = threading.Event()
self.kv_send_thread = KVCacheStoreSendingThread(
self.tp_rank, self.tp_size, self.m_store,
@@ -429,7 +429,7 @@ class MooncakeEngine:
done_sending = (
self.kv_send_thread.
get_and_clear_finished_requests( # type: ignore[union-attr]
) if self.kv_role == 'kv_producer' else set())
) if self.kv_role in ['kv_producer', 'kv_both'] else set())
done_recving = self.kv_recv_thread.get_and_clear_finished_requests( # type: ignore[union-attr]
)

View File

@@ -2406,7 +2406,7 @@ class NPUModelRunner(LoRAModelRunnerMixin):
dtype=np.int32)
# Force dummy run on prefill stage when this node is deemed as kv producer.
if self.is_kv_producer:
if self.is_kv_producer and not self.is_kv_consumer:
with_prefill = True
attn_metadata = self._build_attention_metadata(