[minor] simplify the TokenToKVPoolAllocator (#7414)
This commit is contained in:
@@ -71,14 +71,17 @@ from sglang.srt.managers.schedule_batch import (
|
||||
GLOBAL_SERVER_ARGS_KEYS,
|
||||
global_server_args_dict,
|
||||
)
|
||||
from sglang.srt.mem_cache.allocator import (
|
||||
BaseTokenToKVPoolAllocator,
|
||||
PagedTokenToKVPoolAllocator,
|
||||
TokenToKVPoolAllocator,
|
||||
)
|
||||
from sglang.srt.mem_cache.memory_pool import (
|
||||
DoubleSparseTokenToKVPool,
|
||||
MHATokenToKVPool,
|
||||
MLATokenToKVPool,
|
||||
ReqToTokenPool,
|
||||
TokenToKVPoolAllocator,
|
||||
)
|
||||
from sglang.srt.mem_cache.paged_allocator import PagedTokenToKVPoolAllocator
|
||||
from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
|
||||
from sglang.srt.model_executor.expert_location_updater import ExpertLocationUpdater
|
||||
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors
|
||||
@@ -152,7 +155,7 @@ class ModelRunner:
|
||||
server_args: ServerArgs,
|
||||
is_draft_worker: bool = False,
|
||||
req_to_token_pool: Optional[ReqToTokenPool] = None,
|
||||
token_to_kv_pool_allocator: Optional[TokenToKVPoolAllocator] = None,
|
||||
token_to_kv_pool_allocator: Optional[BaseTokenToKVPoolAllocator] = None,
|
||||
):
|
||||
# Parse args
|
||||
self.model_config = model_config
|
||||
|
||||
Reference in New Issue
Block a user