@@ -470,7 +470,7 @@ class NPUWorker(WorkerBase):
|
||||
# save memory to host with lock
|
||||
self.offload_vram()
|
||||
succ, _ = self.try_reload_vram()
|
||||
assert succ, "Failed to reload model weights after offloading."
|
||||
# assert succ, "Failed to reload model weights after offloading."
|
||||
|
||||
def offload_vram(self) -> None:
|
||||
allocator = CaMemAllocator.get_instance()
|
||||
@@ -480,9 +480,21 @@ class NPUWorker(WorkerBase):
|
||||
allocator = CaMemAllocator.get_instance()
|
||||
return allocator.try_reload_vram(tags=None)
|
||||
|
||||
def vnpu_unlock_gpu(self) -> None:
|
||||
def vnpu_unlock_gpu(self, keep_wait: bool = False) -> None:
|
||||
allocator = CaMemAllocator.get_instance()
|
||||
allocator.vnpu_unlock_gpu()
|
||||
allocator.vnpu_unlock_gpu(keep_wait)
|
||||
|
||||
def vnpu_start_wait(self) -> None:
|
||||
allocator = CaMemAllocator.get_instance()
|
||||
allocator.vnpu_start_wait()
|
||||
|
||||
def vnpu_cancel_wait(self) -> None:
|
||||
allocator = CaMemAllocator.get_instance()
|
||||
allocator.vnpu_cancel_wait()
|
||||
|
||||
def vnpu_has_higher_priority_waiter(self) -> bool:
|
||||
allocator = CaMemAllocator.get_instance()
|
||||
return allocator.vnpu_has_higher_priority_waiter()
|
||||
|
||||
def compile_or_warm_up_model(self) -> float:
|
||||
# Note: need to adapt for graph mode.
|
||||
|
||||
Reference in New Issue
Block a user