vllm-ascend vnpu v1
This commit is contained in:
44
vllm_ascend/patch/platform/patch_executor.py
Normal file
44
vllm_ascend/patch/platform/patch_executor.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import time
|
||||
|
||||
from vllm.executor.executor_base import logger, ExecutorBase
|
||||
|
||||
|
||||
original_init = ExecutorBase.__init__
|
||||
def init(self, *args, **kwargs):
|
||||
original_init(self, *args, **kwargs)
|
||||
self.is_offloaded = False
|
||||
|
||||
|
||||
def offload_vram(self):
|
||||
if self.is_offloaded:
|
||||
logger.warning("Executor is already offloaded.")
|
||||
return
|
||||
time_before_offload = time.perf_counter()
|
||||
self.collective_rpc("offload_vram")
|
||||
time_after_offload = time.perf_counter()
|
||||
|
||||
self.is_offloaded = True
|
||||
logger.info(f"Offloading VRAM costs {time_after_offload - time_before_offload:.6f} seconds.")
|
||||
|
||||
|
||||
def reload_vram(self) -> bool:
|
||||
if not self.is_offloaded:
|
||||
logger.warning("Executor is not offloaded.")
|
||||
return True
|
||||
|
||||
time_before_reload = time.perf_counter()
|
||||
prev_is_self = self.collective_rpc("reload_vram")
|
||||
time_after_reload = time.perf_counter()
|
||||
self.is_offloaded = False
|
||||
logger.info(f"Reloading VRAM costs {time_after_reload - time_before_reload:.6f} seconds.")
|
||||
return prev_is_self
|
||||
|
||||
|
||||
def determine_available_memory_idle_offload_mode(self) -> int:
|
||||
return self.collective_rpc("determine_available_memory_idle_offload_mode")
|
||||
|
||||
|
||||
ExecutorBase.__init__ = init
|
||||
ExecutorBase.offload_vram = offload_vram
|
||||
ExecutorBase.reload_vram = reload_vram
|
||||
ExecutorBase.determine_available_memory_idle_offload_mode = determine_available_memory_idle_offload_mode
|
||||
Reference in New Issue
Block a user