add env vars & misc

2026-02-11 06:27:58 +00:00
parent 739d074b0c
commit 389030a8f8
128 changed files with 89 additions and 59 deletions
--- a/vllm_ascend/worker/worker_v1.py
+++ b/vllm_ascend/worker/worker_v1.py
@@ -258,7 +258,7 @@ class NPUWorker(WorkerBase):
        )
        return available_kv_cache_memory

-    def determine_available_memory_idle_offload_mode(self) -> int:
+    def determine_available_memory_vnpu_offload_mode(self) -> int:
        allocator = CaMemAllocator.get_instance()
        free, total = allocator.get_pool_mem_info()
        available_kv_cache_memory = int(
@@ -317,7 +317,7 @@ class NPUWorker(WorkerBase):
                "Sleep mode can only be "
                "used for one instance per process.")
            context = allocator.use_memory_pool(tag="weights")
-        elif envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD:
+        elif envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
            if not sleep_mode_enabled():
                raise ValueError(
                    "Sleep mode is not enabled. Please compile vllm-ascend with COMPILE_CUSTOM_KERNELS=1."
@@ -336,7 +336,7 @@ class NPUWorker(WorkerBase):
            context = nullcontext()  # type: ignore
        with context:
            self.model_runner.load_model()
-            if envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD:
+            if envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
                # save memory to host with lock
                self.offload_vram()
                succ, _ = self.try_reload_vram()
@@ -402,7 +402,7 @@ class NPUWorker(WorkerBase):
        if self.vllm_config.model_config.enable_sleep_mode:
            allocator = CaMemAllocator.get_instance()
            context = allocator.use_memory_pool(tag="kv_cache")
-        elif envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD:
+        elif envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
            allocator = CaMemAllocator.get_instance()
            context = allocator.use_memory_pool(tag="kv_cache")
        else: