[Auto Sync] Update scheduler.py, server_args.py (20251014) (#11623)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Stefan He <hebiaobuaa@gmail.com>
This commit is contained in:
@@ -658,6 +658,12 @@ class Scheduler(
|
||||
def launch_draft_worker(
|
||||
self, gpu_id, tp_rank, moe_ep_rank, server_args, port_args, dp_rank
|
||||
):
|
||||
if server_args.speculative_draft_load_format is not None:
|
||||
server_args.load_format = server_args.speculative_draft_load_format
|
||||
logger.info(
|
||||
f"Using draft model load_format: '{server_args.speculative_draft_load_format}'"
|
||||
)
|
||||
|
||||
if self.spec_algorithm.is_eagle():
|
||||
from sglang.srt.speculative.eagle_worker import EAGLEWorker
|
||||
from sglang.srt.speculative.eagle_worker_v2 import EAGLEWorkerV2
|
||||
|
||||
@@ -325,6 +325,7 @@ class ServerArgs:
|
||||
speculative_algorithm: Optional[str] = None
|
||||
speculative_draft_model_path: Optional[str] = None
|
||||
speculative_draft_model_revision: Optional[str] = None
|
||||
speculative_draft_load_format: Optional[str] = None
|
||||
speculative_num_steps: Optional[int] = None
|
||||
speculative_eagle_topk: Optional[int] = None
|
||||
speculative_num_draft_tokens: Optional[int] = None
|
||||
@@ -2223,6 +2224,15 @@ class ServerArgs:
|
||||
"name, a tag name, or a commit id. If unspecified, will use "
|
||||
"the default version.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--speculative-draft-load-format",
|
||||
type=str,
|
||||
default=ServerArgs.speculative_draft_load_format,
|
||||
choices=LOAD_FORMAT_CHOICES,
|
||||
help="The format of the draft model weights to load. "
|
||||
"If not specified, will use the same format as --load-format. "
|
||||
"Use 'dummy' to initialize draft model weights with random values for profiling.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--speculative-num-steps",
|
||||
type=int,
|
||||
|
||||
Reference in New Issue
Block a user