Add feature: priority

Signed-off-by: Jing Wang <jingwang96@qq.com>
This commit is contained in:
Jing Wang
2026-05-12 11:51:57 +00:00
parent d627a45881
commit b6549b6e38
11 changed files with 382 additions and 66 deletions

View File

@@ -62,7 +62,10 @@ try:
python_create_and_map_offload as python_create_and_map,python_unmap_and_release_offload as python_unmap_and_release,
python_get_mem_info_offload as python_get_mem_info,
python_try_lock_gpu_offload as python_try_lock_gpu,
python_unlock_gpu_offload as python_unlock_gpu
python_unlock_gpu_offload as python_unlock_gpu,
python_start_wait_offload as python_start_wait,
python_cancel_wait_offload as python_cancel_wait,
python_has_higher_priority_waiter_offload as python_has_higher_priority_waiter
)
else:
from vllm_ascend.vllm_ascend_C import ( # type: ignore # noqa: F401
@@ -73,6 +76,9 @@ try:
python_get_mem_info = None
python_try_lock_gpu = None
python_unlock_gpu = None
python_start_wait = None
python_cancel_wait = None
python_has_higher_priority_waiter = None
lib_name = find_loaded_library("vllm_ascend_C")
camem_available = True
@@ -84,6 +90,9 @@ except ImportError as e:
python_get_mem_info = None
python_try_lock_gpu = None
python_unlock_gpu = None
python_start_wait = None
python_cancel_wait = None
python_has_higher_priority_waiter = None
lib_name = None
libcudart = None
@@ -306,15 +315,37 @@ class CaMemAllocator:
return False, False
def _vnpu_lock_gpu(self) -> bool:
is_waiting = False
while True:
success, _ = self.vnpu_try_lock_gpu()
if success:
if is_waiting:
self.vnpu_cancel_wait()
return True
else:
if not is_waiting:
self.vnpu_start_wait()
is_waiting = True
self.vnpu_unlock_gpu(keep_wait=True)
time.sleep(0.001)
def vnpu_unlock_gpu(self):
def vnpu_unlock_gpu(self, keep_wait: bool = False):
if python_unlock_gpu:
python_unlock_gpu()
python_unlock_gpu(keep_wait)
def vnpu_start_wait(self) -> None:
if python_start_wait:
python_start_wait()
def vnpu_cancel_wait(self) -> None:
if python_cancel_wait:
python_cancel_wait()
def vnpu_has_higher_priority_waiter(self) -> bool:
if python_has_higher_priority_waiter:
return python_has_higher_priority_waiter()
return False
def get_pool_mem_info(self) -> tuple[int, int]:
"""