add env vars & misc

2026-02-11 06:27:58 +00:00
parent 739d074b0c
commit 389030a8f8
128 changed files with 89 additions and 59 deletions
--- a/vllm_ascend/device_allocator/camem.py
+++ b/vllm_ascend/device_allocator/camem.py
@@ -58,7 +58,7 @@ def find_loaded_library(lib_name) -> Optional[str]:

 camem_available = False
 try:
-    if envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD:
+    if envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
        from vllm_ascend.vllm_ascend_C import (  # type: ignore # noqa: F401
            init_module_offload as init_module,
            python_create_and_map_offload as python_create_and_map,python_unmap_and_release_offload as python_unmap_and_release,
@@ -109,7 +109,7 @@ def get_pluggable_allocator(
    python_malloc_fn: Callable[[tuple[int, int, int, int]], None],
    python_free_func: Callable[[int], tuple[int, int, int, int]]
 ) -> torch.npu.memory.NPUPluggableAllocator:
-    if envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD:
+    if envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
        current_device = torch.npu.current_device()
        init_module(python_malloc_fn, python_free_func, current_device)
        new_alloc = torch.npu.memory.NPUPluggableAllocator(
@@ -281,7 +281,7 @@ class CaMemAllocator:
            # see https://github.com/pytorch/pytorch/issues/146431 .
            self.allocator_and_pools[tag] = data
            # lock gpu
-            if envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD:
+            if envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
                self._vnpu_lock_gpu()
            yield
            # PyTorch's bug, calling torch.cuda.empty_cache() will error
@@ -294,7 +294,7 @@ class CaMemAllocator:
            # allocate memory.
            # TODO: we need to find a way to release the memory,
            # i.e. calling torch.cuda.empty_cache()
-            if envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD:
+            if envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
                self.vnpu_unlock_gpu()
            self.current_tag = old_tag

@@ -321,12 +321,11 @@ class CaMemAllocator:
                return True
            time.sleep(0.001)

-
    def vnpu_unlock_gpu(self):
        if python_unlock_gpu:
            python_unlock_gpu()

-    def get_pool_mem_info(self) -> int:
+    def get_pool_mem_info(self) -> tuple[int, int]:
        """
        get available memory in reserved pool."""
        return python_get_mem_info()