add env vars & misc
This commit is contained in:
@@ -58,7 +58,7 @@ def find_loaded_library(lib_name) -> Optional[str]:
|
||||
|
||||
camem_available = False
|
||||
try:
|
||||
if envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD:
|
||||
if envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
|
||||
from vllm_ascend.vllm_ascend_C import ( # type: ignore # noqa: F401
|
||||
init_module_offload as init_module,
|
||||
python_create_and_map_offload as python_create_and_map,python_unmap_and_release_offload as python_unmap_and_release,
|
||||
@@ -109,7 +109,7 @@ def get_pluggable_allocator(
|
||||
python_malloc_fn: Callable[[tuple[int, int, int, int]], None],
|
||||
python_free_func: Callable[[int], tuple[int, int, int, int]]
|
||||
) -> torch.npu.memory.NPUPluggableAllocator:
|
||||
if envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD:
|
||||
if envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
|
||||
current_device = torch.npu.current_device()
|
||||
init_module(python_malloc_fn, python_free_func, current_device)
|
||||
new_alloc = torch.npu.memory.NPUPluggableAllocator(
|
||||
@@ -281,7 +281,7 @@ class CaMemAllocator:
|
||||
# see https://github.com/pytorch/pytorch/issues/146431 .
|
||||
self.allocator_and_pools[tag] = data
|
||||
# lock gpu
|
||||
if envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD:
|
||||
if envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
|
||||
self._vnpu_lock_gpu()
|
||||
yield
|
||||
# PyTorch's bug, calling torch.cuda.empty_cache() will error
|
||||
@@ -294,7 +294,7 @@ class CaMemAllocator:
|
||||
# allocate memory.
|
||||
# TODO: we need to find a way to release the memory,
|
||||
# i.e. calling torch.cuda.empty_cache()
|
||||
if envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD:
|
||||
if envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
|
||||
self.vnpu_unlock_gpu()
|
||||
self.current_tag = old_tag
|
||||
|
||||
@@ -321,12 +321,11 @@ class CaMemAllocator:
|
||||
return True
|
||||
time.sleep(0.001)
|
||||
|
||||
|
||||
def vnpu_unlock_gpu(self):
|
||||
if python_unlock_gpu:
|
||||
python_unlock_gpu()
|
||||
|
||||
def get_pool_mem_info(self) -> int:
|
||||
def get_pool_mem_info(self) -> tuple[int, int]:
|
||||
"""
|
||||
get available memory in reserved pool."""
|
||||
return python_get_mem_info()
|
||||
|
||||
Reference in New Issue
Block a user