Add feature: priority

Signed-off-by: Jing Wang <jingwang96@qq.com>
This commit is contained in:
Jing Wang
2026-05-12 11:51:57 +00:00
parent d627a45881
commit b6549b6e38
11 changed files with 382 additions and 66 deletions

View File

@@ -470,7 +470,7 @@ class NPUWorker(WorkerBase):
# save memory to host with lock
self.offload_vram()
succ, _ = self.try_reload_vram()
assert succ, "Failed to reload model weights after offloading."
# assert succ, "Failed to reload model weights after offloading."
def offload_vram(self) -> None:
allocator = CaMemAllocator.get_instance()
@@ -480,9 +480,21 @@ class NPUWorker(WorkerBase):
allocator = CaMemAllocator.get_instance()
return allocator.try_reload_vram(tags=None)
def vnpu_unlock_gpu(self) -> None:
def vnpu_unlock_gpu(self, keep_wait: bool = False) -> None:
allocator = CaMemAllocator.get_instance()
allocator.vnpu_unlock_gpu()
allocator.vnpu_unlock_gpu(keep_wait)
def vnpu_start_wait(self) -> None:
allocator = CaMemAllocator.get_instance()
allocator.vnpu_start_wait()
def vnpu_cancel_wait(self) -> None:
allocator = CaMemAllocator.get_instance()
allocator.vnpu_cancel_wait()
def vnpu_has_higher_priority_waiter(self) -> bool:
allocator = CaMemAllocator.get_instance()
return allocator.vnpu_has_higher_priority_waiter()
def compile_or_warm_up_model(self) -> float:
# Note: need to adapt for graph mode.