From ba2ce28fe906fc18e6124afd9a5e1490ee123a4e Mon Sep 17 00:00:00 2001
From: Lianmin Zheng <lianminzheng@gmail.com>
Date: Mon, 13 Oct 2025 22:16:34 -0700
Subject: [PATCH] [Auto Sync] Update model_config.py (20251014) (#11580)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: Hanming Lu <69857889+hanming-lu@users.noreply.github.com>
---
 python/sglang/srt/configs/model_config.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/python/sglang/srt/configs/model_config.py b/python/sglang/srt/configs/model_config.py
index b67479f96..8cbbded9d 100644
--- a/python/sglang/srt/configs/model_config.py
+++ b/python/sglang/srt/configs/model_config.py
@@ -25,7 +25,7 @@ from transformers import PretrainedConfig
 from sglang.srt.environ import envs
 from sglang.srt.layers.quantization import QUANTIZATION_METHODS
 from sglang.srt.server_args import ServerArgs
-from sglang.srt.utils import is_hip
+from sglang.srt.utils import is_hip, retry
 from sglang.srt.utils.hf_transformers_utils import (
     get_config,
     get_context_length,
@@ -492,7 +492,16 @@ class ModelConfig:
                     from huggingface_hub import HfApi, hf_hub_download
 
                     hf_api = HfApi()
-                    if hf_api.file_exists(self.model_path, "hf_quant_config.json"):
+                    # Retry HF API call up to 3 times
+                    file_exists = retry(
+                        lambda: hf_api.file_exists(
+                            self.model_path, "hf_quant_config.json"
+                        ),
+                        max_retry=2,
+                        initial_delay=1.0,
+                        max_delay=5.0,
+                    )
+                    if file_exists:
                         # Download and parse the quantization config for remote models
                         quant_config_file = hf_hub_download(
                             repo_id=self.model_path,
@@ -506,7 +515,10 @@ class ModelConfig:
                     logger.warning(
                         "Offline mode is enabled, skipping hf_quant_config.json check"
                     )
-                    pass
+                except Exception as e:
+                    logger.warning(
+                        f"Failed to check hf_quant_config.json: {self.model_path} {e}"
+                    )
             elif os.path.exists(os.path.join(self.model_path, "hf_quant_config.json")):
                 quant_config_file = os.path.join(
                     self.model_path, "hf_quant_config.json"