refactor EAGLE 2 (#3269)

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
Co-authored-by: merrymercy <lianminzheng@gmail.com>
Co-authored-by: Ying1123 <sqy1415@gmail.com>
This commit is contained in:
Yineng Zhang
2025-02-03 20:52:30 +08:00
committed by GitHub
parent 3c8ac78dc1
commit 013021b6a1
9 changed files with 1271 additions and 687 deletions

View File

@@ -52,6 +52,7 @@ from sglang.srt.mem_cache.memory_pool import (
MLATokenToKVPool,
ReqToTokenPool,
)
from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from sglang.srt.model_loader import get_model
from sglang.srt.server_args import ServerArgs
@@ -714,8 +715,6 @@ class ModelRunner:
def init_cuda_graphs(self):
"""Capture cuda graphs."""
from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
self.cuda_graph_runner = None
if not self.is_generation: