add env vars & misc

This commit is contained in:
starkwj
2026-02-11 06:27:58 +00:00
parent 739d074b0c
commit 389030a8f8
128 changed files with 89 additions and 59 deletions

View File

@@ -258,7 +258,7 @@ class NPUWorker(WorkerBase):
)
return available_kv_cache_memory
def determine_available_memory_idle_offload_mode(self) -> int:
def determine_available_memory_vnpu_offload_mode(self) -> int:
allocator = CaMemAllocator.get_instance()
free, total = allocator.get_pool_mem_info()
available_kv_cache_memory = int(
@@ -317,7 +317,7 @@ class NPUWorker(WorkerBase):
"Sleep mode can only be "
"used for one instance per process.")
context = allocator.use_memory_pool(tag="weights")
elif envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD:
elif envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
if not sleep_mode_enabled():
raise ValueError(
"Sleep mode is not enabled. Please compile vllm-ascend with COMPILE_CUSTOM_KERNELS=1."
@@ -336,7 +336,7 @@ class NPUWorker(WorkerBase):
context = nullcontext() # type: ignore
with context:
self.model_runner.load_model()
if envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD:
if envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
# save memory to host with lock
self.offload_vram()
succ, _ = self.try_reload_vram()
@@ -402,7 +402,7 @@ class NPUWorker(WorkerBase):
if self.vllm_config.model_config.enable_sleep_mode:
allocator = CaMemAllocator.get_instance()
context = allocator.use_memory_pool(tag="kv_cache")
elif envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD:
elif envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
allocator = CaMemAllocator.get_instance()
context = allocator.use_memory_pool(tag="kv_cache")
else: