[Auto Sync] Update model_config.py (20251014) (#11580)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Hanming Lu <69857889+hanming-lu@users.noreply.github.com>
This commit is contained in:
@@ -25,7 +25,7 @@ from transformers import PretrainedConfig
|
|||||||
from sglang.srt.environ import envs
|
from sglang.srt.environ import envs
|
||||||
from sglang.srt.layers.quantization import QUANTIZATION_METHODS
|
from sglang.srt.layers.quantization import QUANTIZATION_METHODS
|
||||||
from sglang.srt.server_args import ServerArgs
|
from sglang.srt.server_args import ServerArgs
|
||||||
from sglang.srt.utils import is_hip
|
from sglang.srt.utils import is_hip, retry
|
||||||
from sglang.srt.utils.hf_transformers_utils import (
|
from sglang.srt.utils.hf_transformers_utils import (
|
||||||
get_config,
|
get_config,
|
||||||
get_context_length,
|
get_context_length,
|
||||||
@@ -492,7 +492,16 @@ class ModelConfig:
|
|||||||
from huggingface_hub import HfApi, hf_hub_download
|
from huggingface_hub import HfApi, hf_hub_download
|
||||||
|
|
||||||
hf_api = HfApi()
|
hf_api = HfApi()
|
||||||
if hf_api.file_exists(self.model_path, "hf_quant_config.json"):
|
# Retry HF API call up to 3 times
|
||||||
|
file_exists = retry(
|
||||||
|
lambda: hf_api.file_exists(
|
||||||
|
self.model_path, "hf_quant_config.json"
|
||||||
|
),
|
||||||
|
max_retry=2,
|
||||||
|
initial_delay=1.0,
|
||||||
|
max_delay=5.0,
|
||||||
|
)
|
||||||
|
if file_exists:
|
||||||
# Download and parse the quantization config for remote models
|
# Download and parse the quantization config for remote models
|
||||||
quant_config_file = hf_hub_download(
|
quant_config_file = hf_hub_download(
|
||||||
repo_id=self.model_path,
|
repo_id=self.model_path,
|
||||||
@@ -506,7 +515,10 @@ class ModelConfig:
|
|||||||
logger.warning(
|
logger.warning(
|
||||||
"Offline mode is enabled, skipping hf_quant_config.json check"
|
"Offline mode is enabled, skipping hf_quant_config.json check"
|
||||||
)
|
)
|
||||||
pass
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Failed to check hf_quant_config.json: {self.model_path} {e}"
|
||||||
|
)
|
||||||
elif os.path.exists(os.path.join(self.model_path, "hf_quant_config.json")):
|
elif os.path.exists(os.path.join(self.model_path, "hf_quant_config.json")):
|
||||||
quant_config_file = os.path.join(
|
quant_config_file = os.path.join(
|
||||||
self.model_path, "hf_quant_config.json"
|
self.model_path, "hf_quant_config.json"
|
||||||
|
|||||||
Reference in New Issue
Block a user