[refactor] Remove unnecessary attributes from set_ascend_forward_context (#5204)
### What this PR does / why we need it?
Remove unnecessary attributes from set_ascend_forward_context
1.prefetch_stream
2.weight_prefetch_method
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
Signed-off-by: Wang Kunpeng <1289706727@qq.com>
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
import math
|
||||
from contextlib import contextmanager
|
||||
from enum import Enum
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
from typing import Any, Optional
|
||||
|
||||
import torch
|
||||
from vllm.config import CUDAGraphMode, VllmConfig
|
||||
@@ -16,11 +16,6 @@ from vllm_ascend.utils import (AscendDeviceType, enable_sp, flashcomm2_enable,
|
||||
get_ascend_device_type, has_layer_idx,
|
||||
is_moe_model)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from vllm_ascend.ops.weight_prefetch import WeightPrefetchMethod
|
||||
else:
|
||||
WeightPrefetchMethod = None
|
||||
|
||||
|
||||
class MoECommType(Enum):
|
||||
ALLGATHER = 0
|
||||
@@ -41,9 +36,7 @@ def set_ascend_forward_context(
|
||||
num_actual_tokens: Optional[int] = None,
|
||||
aclgraph_runtime_mode: CUDAGraphMode = CUDAGraphMode.NONE,
|
||||
batch_descriptor: Optional[BatchDescriptor] = None,
|
||||
prefetch_stream: torch.npu.Stream = None,
|
||||
model_instance: torch.nn.Module = None,
|
||||
weight_prefetch_method: Optional[WeightPrefetchMethod] = None,
|
||||
is_mtp_model=False):
|
||||
"""A context manager that stores the current forward context,
|
||||
can be attention metadata, etc.
|
||||
@@ -116,13 +109,10 @@ def set_ascend_forward_context(
|
||||
forward_context.layer_idx is not None and \
|
||||
num_tokens is not None and num_tokens < 500
|
||||
if prefetch_mlp_enabled:
|
||||
forward_context.prefetch_stream = prefetch_stream
|
||||
forward_context.model_instance = model_instance
|
||||
forward_context.prefetch_mlp_gate_up_proj = False
|
||||
forward_context.prefetch_mlp_down_proj = False
|
||||
forward_context.prefetch_mlp_enabled = prefetch_mlp_enabled
|
||||
forward_context.model_instance = model_instance
|
||||
forward_context.weight_prefetch_method = weight_prefetch_method
|
||||
forward_context.is_mtp_model = is_mtp_model
|
||||
|
||||
if num_tokens is None and attn_metadata is not None:
|
||||
|
||||
Reference in New Issue
Block a user