diff --git a/vllm_ascend/patch/platform/patch_core.py b/vllm_ascend/patch/platform/patch_core.py index 34a2884..d3e9bcd 100644 --- a/vllm_ascend/patch/platform/patch_core.py +++ b/vllm_ascend/patch/platform/patch_core.py @@ -1,6 +1,7 @@ import signal from typing import Optional from logging import DEBUG +import os import time from vllm.config import ParallelConfig, VllmConfig diff --git a/vllm_ascend/patch/platform/patch_executor.py b/vllm_ascend/patch/platform/patch_executor.py index 6a4170f..2c558b4 100644 --- a/vllm_ascend/patch/platform/patch_executor.py +++ b/vllm_ascend/patch/platform/patch_executor.py @@ -31,7 +31,7 @@ def reload_vram(self) -> bool: time_after_reload = time.perf_counter() self.is_offloaded = False logger.info(f"Reloading VRAM costs {time_after_reload - time_before_reload:.6f} seconds.") - return prev_is_self + return all(prev_is_self) def determine_available_memory_idle_offload_mode(self) -> int: