add env vars & misc
This commit is contained in:
@@ -258,7 +258,7 @@ class NPUWorker(WorkerBase):
|
||||
)
|
||||
return available_kv_cache_memory
|
||||
|
||||
def determine_available_memory_idle_offload_mode(self) -> int:
|
||||
def determine_available_memory_vnpu_offload_mode(self) -> int:
|
||||
allocator = CaMemAllocator.get_instance()
|
||||
free, total = allocator.get_pool_mem_info()
|
||||
available_kv_cache_memory = int(
|
||||
@@ -317,7 +317,7 @@ class NPUWorker(WorkerBase):
|
||||
"Sleep mode can only be "
|
||||
"used for one instance per process.")
|
||||
context = allocator.use_memory_pool(tag="weights")
|
||||
elif envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD:
|
||||
elif envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
|
||||
if not sleep_mode_enabled():
|
||||
raise ValueError(
|
||||
"Sleep mode is not enabled. Please compile vllm-ascend with COMPILE_CUSTOM_KERNELS=1."
|
||||
@@ -336,7 +336,7 @@ class NPUWorker(WorkerBase):
|
||||
context = nullcontext() # type: ignore
|
||||
with context:
|
||||
self.model_runner.load_model()
|
||||
if envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD:
|
||||
if envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
|
||||
# save memory to host with lock
|
||||
self.offload_vram()
|
||||
succ, _ = self.try_reload_vram()
|
||||
@@ -402,7 +402,7 @@ class NPUWorker(WorkerBase):
|
||||
if self.vllm_config.model_config.enable_sleep_mode:
|
||||
allocator = CaMemAllocator.get_instance()
|
||||
context = allocator.use_memory_pool(tag="kv_cache")
|
||||
elif envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD:
|
||||
elif envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
|
||||
allocator = CaMemAllocator.get_instance()
|
||||
context = allocator.use_memory_pool(tag="kv_cache")
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user