From c726d44cc7c0662b13f6edb6a259a338efa9df7a Mon Sep 17 00:00:00 2001 From: harrisonlimh <97203667+harrisonlimh@users.noreply.github.com> Date: Sun, 19 Oct 2025 19:50:03 -0700 Subject: [PATCH] Recapture cuda graph after model weight update to resolve IMA error (#11780) --- python/sglang/srt/model_executor/model_runner.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index 6fce4cda4..0c6407130 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -981,6 +981,10 @@ class ModelRunner: self.server_args.load_format = load_format self.load_config = load_config + # Recapture device graph after model weight update. + if not self.server_args.disable_cuda_graph and self.device == "cuda": + self.init_device_graphs() + logger.info("Update weights end.") return True, "Succeeded to update model weights."