diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index 601d36387..ce2a9c4c5 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -203,7 +203,7 @@ _is_xpu_xmx_available = xpu_has_xmx_support() SGLANG_CI_SMALL_KV_SIZE = os.getenv("SGLANG_CI_SMALL_KV_SIZE", None) # Detect stragger ranks in model loading -UNBALANCED_MODEL_LOADING_TIMEOUT_S = 36000 +UNBALANCED_MODEL_LOADING_TIMEOUT_S = 3600 # the ratio of mamba cache pool size to max_running_requests, it will be safe when it is larger than 2 (yizhang2077) MAMBA_CACHE_SIZE_MAX_RUNNING_REQUESTS_RATIO = 3