[main][Refactor] Remove with_prefill parameter from set_ascend_forward_context (#5094)

Removes the redundant `with_prefill` parameter from
`set_ascend_forward_context` to align the interface with vLLM's
`set_forward_context` for future refactoring.
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c

---------

Signed-off-by: SlightwindSec <slightwindsec@gmail.com>
Signed-off-by: Slightwind <slightwindsec@gmail.com>
Co-authored-by: zzzzwwjj <34335947+zzzzwwjj@users.noreply.github.com>
This commit is contained in:
Slightwind
2025-12-23 14:30:50 +08:00
committed by GitHub
parent fa0c212bfa
commit 22138e2727
6 changed files with 22 additions and 21 deletions

View File

@@ -1424,7 +1424,6 @@ class NPUModelRunner(GPUModelRunner):
self.vllm_config,
num_tokens=num_input_tokens,
num_tokens_across_dp=num_tokens_across_dp,
with_prefill=self.with_prefill,
aclgraph_runtime_mode=aclgraph_runtime_mode,
batch_descriptor=batch_descriptor,
num_actual_tokens=scheduler_output.
@@ -2137,7 +2136,6 @@ class NPUModelRunner(GPUModelRunner):
self.vllm_config,
num_tokens=num_tokens_padded,
num_tokens_across_dp=num_tokens_across_dp,
with_prefill=with_prefill,
in_profile_run=is_profile,
num_actual_tokens=0,
aclgraph_runtime_mode=aclgraph_runtime_mode,