[refactor] Remove unnecessary attributes from set_ascend_forward_context (#5204)
### What this PR does / why we need it?
Remove unnecessary attributes from set_ascend_forward_context
1.prefetch_stream
2.weight_prefetch_method
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
Signed-off-by: Wang Kunpeng <1289706727@qq.com>
This commit is contained in:
@@ -34,7 +34,7 @@ from vllm.logger import logger
|
||||
from vllm.sequence import IntermediateTensors
|
||||
|
||||
import vllm_ascend.envs as envs_ascend
|
||||
from vllm_ascend.ascend_config import get_ascend_config
|
||||
from vllm_ascend.ascend_config import WeightPrefetchConfig, get_ascend_config
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from vllm.config import VllmConfig
|
||||
@@ -52,6 +52,7 @@ ACL_FORMAT_FRACTAL_NZ = 29
|
||||
_CUSTOM_OP_ENABLED = None
|
||||
_CURRENT_STREAM = None
|
||||
_PREFETCH_STREAM = None
|
||||
_WEIGHT_PREFETCH_METHOD = None
|
||||
_GLOBAL_STREAM = None
|
||||
_SHARED_EXPERTS_CALCULATION_STREAM = None
|
||||
_ASCEND_CUSTOMOP_IS_REIGISTERED = False
|
||||
@@ -309,6 +310,18 @@ def prefetch_stream() -> torch.npu.Stream:
|
||||
return _PREFETCH_STREAM
|
||||
|
||||
|
||||
def set_weight_prefetch_method(weight_prefetch_config: WeightPrefetchConfig):
|
||||
global _WEIGHT_PREFETCH_METHOD
|
||||
if _WEIGHT_PREFETCH_METHOD is None:
|
||||
from vllm_ascend.ops.weight_prefetch import WeightPrefetchMethod
|
||||
_WEIGHT_PREFETCH_METHOD = WeightPrefetchMethod(weight_prefetch_config)
|
||||
return _WEIGHT_PREFETCH_METHOD
|
||||
|
||||
|
||||
def get_weight_prefetch_method():
|
||||
return _WEIGHT_PREFETCH_METHOD
|
||||
|
||||
|
||||
def global_stream() -> torch.npu.Stream:
|
||||
global _GLOBAL_STREAM
|
||||
if _GLOBAL_STREAM is None:
|
||||
|
||||
Reference in New Issue
Block a user