From c0dbbdd12ba54953fe469d08096529090e980cbd Mon Sep 17 00:00:00 2001
From: sogalin <39478626+sogalin@users.noreply.github.com>
Date: Thu, 2 Oct 2025 14:53:14 +0800
Subject: [PATCH] [ROCm] To reduce the compiling time when using torch compile.
 (#10559)

---
 python/sglang/srt/model_executor/cuda_graph_runner.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/python/sglang/srt/model_executor/cuda_graph_runner.py b/python/sglang/srt/model_executor/cuda_graph_runner.py
index 864ade8b3..81f0ce9e4 100644
--- a/python/sglang/srt/model_executor/cuda_graph_runner.py
+++ b/python/sglang/srt/model_executor/cuda_graph_runner.py
@@ -53,7 +53,9 @@ from sglang.srt.two_batch_overlap import TboCudaGraphRunnerPlugin
 from sglang.srt.utils import (
     empty_context,
     get_available_gpu_memory,
+    get_bool_env_var,
     get_device_memory_capacity,
+    is_hip,
     log_info_on_rank0,
     require_attn_tp_gather,
     require_gathered_buffer,
@@ -61,6 +63,8 @@ from sglang.srt.utils import (
     require_mlp_tp_gather,
 )
 
+_is_hip = is_hip()
+
 logger = logging.getLogger(__name__)
 
 if TYPE_CHECKING:
@@ -137,7 +141,7 @@ def patch_model(
                 mode=os.environ.get(
                     "SGLANG_TORCH_COMPILE_MODE", "max-autotune-no-cudagraphs"
                 ),
-                dynamic=False,
+                dynamic=_is_hip and get_bool_env_var("SGLANG_TORCH_DYNAMIC_SHAPE"),
             )
         else:
             yield model.forward