[main][Refactor] Remove with_prefill parameter from set_ascend_forward_context (#5094)
Removes the redundant `with_prefill` parameter from
`set_ascend_forward_context` to align the interface with vLLM's
`set_forward_context` for future refactoring.
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
---------
Signed-off-by: SlightwindSec <slightwindsec@gmail.com>
Signed-off-by: Slightwind <slightwindsec@gmail.com>
Co-authored-by: zzzzwwjj <34335947+zzzzwwjj@users.noreply.github.com>
This commit is contained in:
@@ -31,7 +31,6 @@ def set_ascend_forward_context(
|
||||
virtual_engine: int = 0,
|
||||
num_tokens: int = 0,
|
||||
num_tokens_across_dp: Optional[torch.Tensor] = None,
|
||||
with_prefill: bool = True,
|
||||
in_profile_run: bool = False,
|
||||
num_actual_tokens: Optional[int] = None,
|
||||
aclgraph_runtime_mode: CUDAGraphMode = CUDAGraphMode.NONE,
|
||||
@@ -60,7 +59,6 @@ def set_ascend_forward_context(
|
||||
forward_context.moe_comm_type = moe_comm_type
|
||||
forward_context.moe_comm_method = get_moe_comm_method(moe_comm_type)
|
||||
|
||||
forward_context.with_prefill = with_prefill
|
||||
tp_world_size = get_tensor_model_parallel_world_size()
|
||||
|
||||
forward_context.in_profile_run = in_profile_run
|
||||
|
||||
Reference in New Issue
Block a user