Eagle speculative decoding part 3: small modifications to the general scheduler (#2709)

Co-authored-by: kavioyu <kavioyu@tencent.com>
This commit is contained in:
Lianmin Zheng
2025-01-02 02:09:08 -08:00
committed by GitHub
parent 9183c23eca
commit ad20b7957e
13 changed files with 224 additions and 69 deletions

View File

@@ -63,6 +63,7 @@ from sglang.srt.model_executor.model_runner import ModelRunner
from sglang.srt.sampling.sampling_params import SamplingParams
from sglang.srt.server import _set_envs_and_config
from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
from sglang.srt.utils import configure_logger, kill_process_tree, suppress_other_loggers
@@ -214,6 +215,7 @@ def extend(reqs, model_runner):
tree_cache=None,
model_config=model_runner.model_config,
enable_overlap=False,
spec_algorithm=SpeculativeAlgorithm.NONE,
)
batch.prepare_for_extend()
model_worker_batch = batch.get_model_worker_batch()