diff --git a/python/sglang/srt/configs/model_config.py b/python/sglang/srt/configs/model_config.py index f16442e4d..0dbe37aa0 100644 --- a/python/sglang/srt/configs/model_config.py +++ b/python/sglang/srt/configs/model_config.py @@ -420,11 +420,20 @@ class ModelConfig: is_local = os.path.exists(self.model_path) modelopt_quant_config = {"quant_method": "modelopt"} if not is_local: - from huggingface_hub import HfApi + import huggingface_hub + + try: + from huggingface_hub import HfApi + + hf_api = HfApi() + if hf_api.file_exists(self.model_path, "hf_quant_config.json"): + quant_cfg = modelopt_quant_config + except huggingface_hub.errors.OfflineModeIsEnabled: + logger.warning( + "Offline mode is enabled, skipping hf_quant_config.json check" + ) + pass - hf_api = HfApi() - if hf_api.file_exists(self.model_path, "hf_quant_config.json"): - quant_cfg = modelopt_quant_config elif os.path.exists(os.path.join(self.model_path, "hf_quant_config.json")): quant_config_file = os.path.join( self.model_path, "hf_quant_config.json"