diff --git a/python/sglang/srt/model_executor/cuda_graph_runner.py b/python/sglang/srt/model_executor/cuda_graph_runner.py index 287efd1de..8bfb077f9 100644 --- a/python/sglang/srt/model_executor/cuda_graph_runner.py +++ b/python/sglang/srt/model_executor/cuda_graph_runner.py @@ -849,7 +849,7 @@ class CudaGraphRunner: ) elif self.model_runner.spec_algorithm.is_ngram(): - from sglang.srt.speculative.ngram_utils import NgramVerifyInput + from sglang.srt.speculative.ngram_info import NgramVerifyInput spec_info = NgramVerifyInput( draft_token=None, diff --git a/python/sglang/srt/speculative/ngram_utils.py b/python/sglang/srt/speculative/ngram_info.py similarity index 100% rename from python/sglang/srt/speculative/ngram_utils.py rename to python/sglang/srt/speculative/ngram_info.py diff --git a/python/sglang/srt/speculative/ngram_worker.py b/python/sglang/srt/speculative/ngram_worker.py index d2197023d..e1676ad1e 100644 --- a/python/sglang/srt/speculative/ngram_worker.py +++ b/python/sglang/srt/speculative/ngram_worker.py @@ -11,7 +11,7 @@ from sglang.srt.managers.tp_worker import TpModelWorker from sglang.srt.model_executor.forward_batch_info import ForwardMode from sglang.srt.server_args import ServerArgs from sglang.srt.speculative.cpp_ngram.ngram_cache import NgramCache -from sglang.srt.speculative.ngram_utils import NgramVerifyInput +from sglang.srt.speculative.ngram_info import NgramVerifyInput from sglang.srt.speculative.spec_info import SpeculativeAlgorithm logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/speculative/spec_utils.py b/python/sglang/srt/speculative/spec_utils.py index 714da8573..8478ac14c 100644 --- a/python/sglang/srt/speculative/spec_utils.py +++ b/python/sglang/srt/speculative/spec_utils.py @@ -1,7 +1,6 @@ from __future__ import annotations import logging -import os import time from typing import TYPE_CHECKING, List