[KVPOOl]Support pp (#4761)
### What this PR does / why we need it?
Support pp for kv pool
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
---------
Signed-off-by: baxingpiaochong <771405853@qq.com>
This commit is contained in:
@@ -2450,6 +2450,7 @@ class NPUModelRunner(LoRAModelRunnerMixin, ECConnectorModelRunnerMixin):
|
||||
attn_metadata, self.with_prefill, maybe_padded_num_tokens,
|
||||
input_ids, positions, intermediate_tensors, inputs_embeds)
|
||||
|
||||
self.maybe_wait_for_kv_save()
|
||||
finished_sending, finished_recving = self.get_finished_kv_transfer(
|
||||
scheduler_output)
|
||||
|
||||
@@ -2711,7 +2712,7 @@ class NPUModelRunner(LoRAModelRunnerMixin, ECConnectorModelRunnerMixin):
|
||||
# ngram and other speculative decoding methods use the sampled
|
||||
# tokens on the CPU, so they are run after bookkeeping.
|
||||
propose_draft_token_ids(valid_sampled_token_ids)
|
||||
self.maybe_wait_for_kv_save()
|
||||
|
||||
if has_kv_transfer_group():
|
||||
get_kv_transfer_group().clear_connector_metadata()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user