[minor] simplify the TokenToKVPoolAllocator (#7414)

This commit is contained in:
Liangsheng Yin
2025-06-22 12:37:18 +08:00
committed by GitHub
parent b7a2df0a44
commit 05c9bc8956
14 changed files with 165 additions and 149 deletions

View File

@@ -21,13 +21,11 @@ Life cycle of a request in the decode server
from __future__ import annotations
import logging
import os
from collections import deque
from dataclasses import dataclass
from http import HTTPStatus
from typing import TYPE_CHECKING, List, Optional, Tuple, Union
import numpy as np
import torch
from torch.distributed import ProcessGroup
@@ -47,12 +45,9 @@ from sglang.srt.disaggregation.utils import (
prepare_abort,
)
from sglang.srt.managers.schedule_batch import FINISH_ABORT, ScheduleBatch
from sglang.srt.mem_cache.allocator import BaseTokenToKVPoolAllocator
from sglang.srt.mem_cache.base_prefix_cache import BasePrefixCache
from sglang.srt.mem_cache.memory_pool import (
KVCache,
ReqToTokenPool,
TokenToKVPoolAllocator,
)
from sglang.srt.mem_cache.memory_pool import KVCache, ReqToTokenPool
from sglang.srt.model_executor.forward_batch_info import ForwardMode
from sglang.srt.torch_memory_saver_adapter import TorchMemorySaverAdapter
from sglang.srt.utils import require_mlp_sync
@@ -141,7 +136,7 @@ class DecodePreallocQueue:
def __init__(
self,
req_to_token_pool: ReqToTokenPool,
token_to_kv_pool_allocator: TokenToKVPoolAllocator,
token_to_kv_pool_allocator: BaseTokenToKVPoolAllocator,
draft_token_to_kv_pool: Optional[KVCache],
req_to_metadata_buffer_idx_allocator: ReqToMetadataIdxAllocator,
metadata_buffers: MetadataBuffers,

View File

@@ -25,7 +25,6 @@ from collections import deque
from http import HTTPStatus
from typing import TYPE_CHECKING, List, Optional
import numpy as np
import torch
from sglang.srt.disaggregation.base import BaseKVManager, KVPoll