[Auto Sync] Update scheduler.py, server_args.py (20251014) (#11623)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Stefan He <hebiaobuaa@gmail.com>
This commit is contained in:
@@ -658,6 +658,12 @@ class Scheduler(
|
|||||||
def launch_draft_worker(
|
def launch_draft_worker(
|
||||||
self, gpu_id, tp_rank, moe_ep_rank, server_args, port_args, dp_rank
|
self, gpu_id, tp_rank, moe_ep_rank, server_args, port_args, dp_rank
|
||||||
):
|
):
|
||||||
|
if server_args.speculative_draft_load_format is not None:
|
||||||
|
server_args.load_format = server_args.speculative_draft_load_format
|
||||||
|
logger.info(
|
||||||
|
f"Using draft model load_format: '{server_args.speculative_draft_load_format}'"
|
||||||
|
)
|
||||||
|
|
||||||
if self.spec_algorithm.is_eagle():
|
if self.spec_algorithm.is_eagle():
|
||||||
from sglang.srt.speculative.eagle_worker import EAGLEWorker
|
from sglang.srt.speculative.eagle_worker import EAGLEWorker
|
||||||
from sglang.srt.speculative.eagle_worker_v2 import EAGLEWorkerV2
|
from sglang.srt.speculative.eagle_worker_v2 import EAGLEWorkerV2
|
||||||
|
|||||||
@@ -325,6 +325,7 @@ class ServerArgs:
|
|||||||
speculative_algorithm: Optional[str] = None
|
speculative_algorithm: Optional[str] = None
|
||||||
speculative_draft_model_path: Optional[str] = None
|
speculative_draft_model_path: Optional[str] = None
|
||||||
speculative_draft_model_revision: Optional[str] = None
|
speculative_draft_model_revision: Optional[str] = None
|
||||||
|
speculative_draft_load_format: Optional[str] = None
|
||||||
speculative_num_steps: Optional[int] = None
|
speculative_num_steps: Optional[int] = None
|
||||||
speculative_eagle_topk: Optional[int] = None
|
speculative_eagle_topk: Optional[int] = None
|
||||||
speculative_num_draft_tokens: Optional[int] = None
|
speculative_num_draft_tokens: Optional[int] = None
|
||||||
@@ -2223,6 +2224,15 @@ class ServerArgs:
|
|||||||
"name, a tag name, or a commit id. If unspecified, will use "
|
"name, a tag name, or a commit id. If unspecified, will use "
|
||||||
"the default version.",
|
"the default version.",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--speculative-draft-load-format",
|
||||||
|
type=str,
|
||||||
|
default=ServerArgs.speculative_draft_load_format,
|
||||||
|
choices=LOAD_FORMAT_CHOICES,
|
||||||
|
help="The format of the draft model weights to load. "
|
||||||
|
"If not specified, will use the same format as --load-format. "
|
||||||
|
"Use 'dummy' to initialize draft model weights with random values for profiling.",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--speculative-num-steps",
|
"--speculative-num-steps",
|
||||||
type=int,
|
type=int,
|
||||||
|
|||||||
Reference in New Issue
Block a user