diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index 8b06d2cea..134effc5a 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -233,7 +233,10 @@ class ModelRunner: # Prepare the vllm model config monkey_patch_vllm_dummy_weight_loader() - self.load_config = LoadConfig(load_format=self.server_args.load_format) + self.load_config = LoadConfig( + load_format=self.server_args.load_format, + download_dir=self.server_args.download_dir, + ) self.vllm_model_config = VllmModelConfig( model=self.server_args.model_path, quantization=self.server_args.quantization, diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 7003d2c53..c20aac847 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -64,6 +64,7 @@ class ServerArgs: random_seed: Optional[int] = None constrained_json_whitespace_pattern: Optional[str] = None watchdog_timeout: float = 300 + download_dir: Optional[str] = None # Logging log_level: str = "info" @@ -405,6 +406,12 @@ class ServerArgs: default=ServerArgs.watchdog_timeout, help="Set watchdog timeout in seconds. If a forward batch takes longer than this, the server will crash to prevent hanging.", ) + parser.add_argument( + "--download-dir", + type=str, + default=ServerArgs.download_dir, + help="Model download directory.", + ) # Logging parser.add_argument(