Standalone speculative decoding (#10090)
This commit is contained in:
@@ -349,6 +349,18 @@ class Scheduler(
|
||||
target_worker=self.tp_worker,
|
||||
dp_rank=dp_rank,
|
||||
)
|
||||
elif self.spec_algorithm.is_standalone():
|
||||
from sglang.srt.speculative.standalone_worker import StandaloneWorker
|
||||
|
||||
self.draft_worker = StandaloneWorker(
|
||||
gpu_id=gpu_id,
|
||||
tp_rank=tp_rank,
|
||||
moe_ep_rank=moe_ep_rank,
|
||||
server_args=server_args,
|
||||
nccl_port=port_args.nccl_port,
|
||||
target_worker=self.tp_worker,
|
||||
dp_rank=dp_rank,
|
||||
)
|
||||
else:
|
||||
self.draft_worker = None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user