From 1645546661a9be7ee48eb3bb129a9d666650819f Mon Sep 17 00:00:00 2001 From: ChenCangtao <50493711+ChenCangtao@users.noreply.github.com> Date: Mon, 26 Jan 2026 15:03:18 +0800 Subject: [PATCH] [bugfix][npugraph_ex]fix static kernel uninstall issue (#6128) ### What this PR does / why we need it? The static kernel in torch_npu is uninstalled through Python's atexit mechanism. However, in vllm-ascend, when inference ends or the service stops, the worker process is terminated. This way, ending the process does not trigger the atexit mechanism, causing the static kernel not to be unloaded. When using the nougraph_ex backend and enabling the static kernel, we registered a signal handler to explicitly unload the static kernel. When there are many static kernels, unloading usually takes some time, whereas vllm will directly kill the process after sending a terminate event. Therefore, we choose to handle it by starting a new process. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/d68209402ddab3f54a09bc1f4de9a9495a283b60 --------- Signed-off-by: chencangtao Co-authored-by: chencangtao --- vllm_ascend/worker/worker.py | 51 ++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/vllm_ascend/worker/worker.py b/vllm_ascend/worker/worker.py index b1ccb74c..15b45ee4 100644 --- a/vllm_ascend/worker/worker.py +++ b/vllm_ascend/worker/worker.py @@ -131,6 +131,57 @@ class NPUWorker(WorkerBase): self.use_v2_model_runner = envs_vllm.VLLM_USE_V2_MODEL_RUNNER + npugraph_ex_config = get_ascend_config().npugraph_ex_config + if npugraph_ex_config.enable and npugraph_ex_config.enable_static_kernel: + # Prevent duplicate triggers, execute the exit logic only once + shutdown_request = False + + def signal_handler(signum, frame): + nonlocal shutdown_request + if not shutdown_request: + shutdown_request = True + self.uninstall_static_kernel() + raise SystemExit() + + # Either SIGTERM or SIGINT will terminate the worker + import signal + signal.signal(signal.SIGTERM, signal_handler) + signal.signal(signal.SIGINT, signal_handler) + + + def uninstall_static_kernel(self): + import os + import fcntl + import subprocess + + ascend_home_path = os.environ["ASCEND_HOME_PATH"] + static_kernel_dir_path = os.path.join(ascend_home_path, 'opp/static_kernel') + uninstall_script_path = os.path.join(static_kernel_dir_path, 'ai_core/uninstall.sh') + lock_file_path = os.path.join(static_kernel_dir_path, 'uninstall.lock') + + if not os.path.exists(uninstall_script_path): + return + with open(lock_file_path, 'w') as lock_fd: + try: + fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + subprocess.Popen( + ['bash', uninstall_script_path], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + start_new_session=True + ) + except (BlockingIOError, OSError) as e: + return + finally: + try: + fcntl.flock(lock_fd, fcntl.LOCK_UN) + if os.path.exists(lock_file_path): + os.remove(lock_file_path) + except Exception: + return + + def sleep(self, level: int = 1) -> None: free_bytes_before_sleep = torch.npu.mem_get_info()[0] # Save the buffers before level 2 sleep