[Feature] Speculative decoding support lookahead (#9873)
Co-authored-by: a4zhangfei <a4zhangfei@qq.com> Co-authored-by: Qiaolin-Yu <liin1211@outlook.com>
This commit is contained in:
@@ -1402,7 +1402,7 @@ class ModelRunner:
|
||||
if self.is_hybrid_gdn:
|
||||
max_num_reqs = min(max_num_reqs, self.server_args.max_mamba_cache_size)
|
||||
|
||||
if not self.spec_algorithm.is_none():
|
||||
if self.spec_algorithm.is_eagle() or self.spec_algorithm.is_standalone():
|
||||
if self.is_draft_worker:
|
||||
self.max_total_num_tokens = self.server_args.draft_runner_cache_size
|
||||
max_num_reqs = self.server_args.max_num_reqs
|
||||
|
||||
Reference in New Issue
Block a user