[Feature] Speculative decoding support lookahead (#9873)

Co-authored-by: a4zhangfei <a4zhangfei@qq.com> Co-authored-by: Qiaolin-Yu <liin1211@outlook.com>
2025-09-19 07:42:41 +08:00
parent 2a2ff9a840
commit e7bc600304
30 changed files with 2058 additions and 32 deletions
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -1402,7 +1402,7 @@ class ModelRunner:
        if self.is_hybrid_gdn:
            max_num_reqs = min(max_num_reqs, self.server_args.max_mamba_cache_size)

-        if not self.spec_algorithm.is_none():
+        if self.spec_algorithm.is_eagle() or self.spec_algorithm.is_standalone():
            if self.is_draft_worker:
                self.max_total_num_tokens = self.server_args.draft_runner_cache_size
                max_num_reqs = self.server_args.max_num_reqs