diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index 3e0a8945d..f9601c9ac 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -658,6 +658,12 @@ class Scheduler( def launch_draft_worker( self, gpu_id, tp_rank, moe_ep_rank, server_args, port_args, dp_rank ): + if server_args.speculative_draft_load_format is not None: + server_args.load_format = server_args.speculative_draft_load_format + logger.info( + f"Using draft model load_format: '{server_args.speculative_draft_load_format}'" + ) + if self.spec_algorithm.is_eagle(): from sglang.srt.speculative.eagle_worker import EAGLEWorker from sglang.srt.speculative.eagle_worker_v2 import EAGLEWorkerV2 diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 07a122f65..b777eebb7 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -325,6 +325,7 @@ class ServerArgs: speculative_algorithm: Optional[str] = None speculative_draft_model_path: Optional[str] = None speculative_draft_model_revision: Optional[str] = None + speculative_draft_load_format: Optional[str] = None speculative_num_steps: Optional[int] = None speculative_eagle_topk: Optional[int] = None speculative_num_draft_tokens: Optional[int] = None @@ -2223,6 +2224,15 @@ class ServerArgs: "name, a tag name, or a commit id. If unspecified, will use " "the default version.", ) + parser.add_argument( + "--speculative-draft-load-format", + type=str, + default=ServerArgs.speculative_draft_load_format, + choices=LOAD_FORMAT_CHOICES, + help="The format of the draft model weights to load. " + "If not specified, will use the same format as --load-format. " + "Use 'dummy' to initialize draft model weights with random values for profiling.", + ) parser.add_argument( "--speculative-num-steps", type=int,