From c0dbbdd12ba54953fe469d08096529090e980cbd Mon Sep 17 00:00:00 2001 From: sogalin <39478626+sogalin@users.noreply.github.com> Date: Thu, 2 Oct 2025 14:53:14 +0800 Subject: [PATCH] [ROCm] To reduce the compiling time when using torch compile. (#10559) --- python/sglang/srt/model_executor/cuda_graph_runner.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/model_executor/cuda_graph_runner.py b/python/sglang/srt/model_executor/cuda_graph_runner.py index 864ade8b3..81f0ce9e4 100644 --- a/python/sglang/srt/model_executor/cuda_graph_runner.py +++ b/python/sglang/srt/model_executor/cuda_graph_runner.py @@ -53,7 +53,9 @@ from sglang.srt.two_batch_overlap import TboCudaGraphRunnerPlugin from sglang.srt.utils import ( empty_context, get_available_gpu_memory, + get_bool_env_var, get_device_memory_capacity, + is_hip, log_info_on_rank0, require_attn_tp_gather, require_gathered_buffer, @@ -61,6 +63,8 @@ from sglang.srt.utils import ( require_mlp_tp_gather, ) +_is_hip = is_hip() + logger = logging.getLogger(__name__) if TYPE_CHECKING: @@ -137,7 +141,7 @@ def patch_model( mode=os.environ.get( "SGLANG_TORCH_COMPILE_MODE", "max-autotune-no-cudagraphs" ), - dynamic=False, + dynamic=_is_hip and get_bool_env_var("SGLANG_TORCH_DYNAMIC_SHAPE"), ) else: yield model.forward