[refactor] Remove unnecessary attributes from set_ascend_forward_context (#5204)

### What this PR does / why we need it? Remove unnecessary attributes from set_ascend_forward_context 1.prefetch_stream 2.weight_prefetch_method ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? - vLLM version: v0.12.0 - vLLM main: ad32e3e19c Signed-off-by: Wang Kunpeng <1289706727@qq.com>
2025-12-23 08:49:52 +08:00
parent 95e8a52156
commit c3a8d13ca7
10 changed files with 55 additions and 83 deletions
--- a/vllm_ascend/utils.py
+++ b/vllm_ascend/utils.py
@@ -34,7 +34,7 @@ from vllm.logger import logger
 from vllm.sequence import IntermediateTensors

 import vllm_ascend.envs as envs_ascend
-from vllm_ascend.ascend_config import get_ascend_config
+from vllm_ascend.ascend_config import WeightPrefetchConfig, get_ascend_config

 if TYPE_CHECKING:
    from vllm.config import VllmConfig
@@ -52,6 +52,7 @@ ACL_FORMAT_FRACTAL_NZ = 29
 _CUSTOM_OP_ENABLED = None
 _CURRENT_STREAM = None
 _PREFETCH_STREAM = None
+_WEIGHT_PREFETCH_METHOD = None
 _GLOBAL_STREAM = None
 _SHARED_EXPERTS_CALCULATION_STREAM = None
 _ASCEND_CUSTOMOP_IS_REIGISTERED = False
@@ -309,6 +310,18 @@ def prefetch_stream() -> torch.npu.Stream:
    return _PREFETCH_STREAM


+def set_weight_prefetch_method(weight_prefetch_config: WeightPrefetchConfig):
+    global _WEIGHT_PREFETCH_METHOD
+    if _WEIGHT_PREFETCH_METHOD is None:
+        from vllm_ascend.ops.weight_prefetch import WeightPrefetchMethod
+        _WEIGHT_PREFETCH_METHOD = WeightPrefetchMethod(weight_prefetch_config)
+    return _WEIGHT_PREFETCH_METHOD
+
+
+def get_weight_prefetch_method():
+    return _WEIGHT_PREFETCH_METHOD
+
+
 def global_stream() -> torch.npu.Stream:
    global _GLOBAL_STREAM
    if _GLOBAL_STREAM is None: