[speculative decoding] rename lookahead to ngram (#11010)

Co-authored-by: a4zhangfei <a4zhangfei@qq.com>
This commit is contained in:
Zhihao Zhang
2025-09-29 12:06:59 +08:00
committed by GitHub
parent e05555fad8
commit 24f7cb1ece
22 changed files with 154 additions and 181 deletions

View File

@@ -388,10 +388,10 @@ class Scheduler(
target_worker=self.tp_worker,
dp_rank=dp_rank,
)
elif self.spec_algorithm.is_lookahead():
from sglang.srt.speculative.lookahead_worker import LOOKAHEADWorker
elif self.spec_algorithm.is_ngram():
from sglang.srt.speculative.ngram_worker import NGRAMWorker
self.draft_worker = LOOKAHEADWorker(
self.draft_worker = NGRAMWorker(
gpu_id=gpu_id,
tp_rank=tp_rank,
moe_ep_rank=moe_ep_rank,
@@ -826,7 +826,7 @@ class Scheduler(
token_to_kv_pool_allocator=self.token_to_kv_pool_allocator,
draft_token_to_kv_pool=(
None
if self.draft_worker is None or self.spec_algorithm.is_lookahead()
if self.draft_worker is None or self.spec_algorithm.is_ngram()
else self.draft_worker.model_runner.token_to_kv_pool
),
req_to_metadata_buffer_idx_allocator=self.req_to_metadata_buffer_idx_allocator,
@@ -863,7 +863,7 @@ class Scheduler(
token_to_kv_pool=self.token_to_kv_pool_allocator.get_kvcache(),
draft_token_to_kv_pool=(
None
if self.draft_worker is None or self.spec_algorithm.is_lookahead()
if self.draft_worker is None or self.spec_algorithm.is_ngram()
else self.draft_worker.model_runner.token_to_kv_pool
),
req_to_metadata_buffer_idx_allocator=self.req_to_metadata_buffer_idx_allocator,