Standalone speculative decoding (#10090)

This commit is contained in:
Qiaolin Yu
2025-09-07 20:55:09 -07:00
committed by GitHub
parent 400d3b97ae
commit 8cda5a622c
11 changed files with 285 additions and 9 deletions

View File

@@ -349,6 +349,18 @@ class Scheduler(
target_worker=self.tp_worker,
dp_rank=dp_rank,
)
elif self.spec_algorithm.is_standalone():
from sglang.srt.speculative.standalone_worker import StandaloneWorker
self.draft_worker = StandaloneWorker(
gpu_id=gpu_id,
tp_rank=tp_rank,
moe_ep_rank=moe_ep_rank,
server_args=server_args,
nccl_port=port_args.nccl_port,
target_worker=self.tp_worker,
dp_rank=dp_rank,
)
else:
self.draft_worker = None