diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py index cec2262c4..afbc98b7c 100644 --- a/python/sglang/srt/managers/schedule_batch.py +++ b/python/sglang/srt/managers/schedule_batch.py @@ -158,7 +158,6 @@ class ImageInputs: im_end_id: Optional[torch.Tensor] = None slice_start_id: Optional[torch.Tensor] = None slice_end_id: Optional[torch.Tensor] = None - tgt_sizes: Optional[list] = None @staticmethod diff --git a/python/sglang/srt/mem_cache/memory_pool.py b/python/sglang/srt/mem_cache/memory_pool.py index ab27e81b7..e30736722 100644 --- a/python/sglang/srt/mem_cache/memory_pool.py +++ b/python/sglang/srt/mem_cache/memory_pool.py @@ -49,7 +49,6 @@ class ReqToTokenPool: size: int, max_context_len: int, device: str, - use_records: bool, enable_memory_saver: bool, ): memory_saver_adapter = TorchMemorySaverAdapter.create( @@ -64,17 +63,9 @@ class ReqToTokenPool: (size, max_context_len), dtype=torch.int32, device=device ) self.free_slots = list(range(size)) - self.write_records = [] - self.use_records = use_records - - if self.use_records: - self.write = self.write_with_records - else: - self.write = self.write_without_records def write(self, indices, values): - # Keep the signature for type checking. It will be assigned during runtime. - raise NotImplementedError() + self.req_to_token[indices] = values def available_size(self): return len(self.free_slots) @@ -96,23 +87,6 @@ class ReqToTokenPool: def clear(self): self.free_slots = list(range(self.size)) - self.write_records = [] - - def write_without_records(self, indices, values): - self.req_to_token[indices] = values - - def write_with_records(self, indices, values): - self.req_to_token[indices] = values - self.write_records.append((indices, values)) - - def get_write_records(self): - ret = self.write_records - self.write_records = [] - return ret - - def apply_write_records(self, write_records: List[Tuple]): - for indices, values in write_records: - self.req_to_token[indices] = values class BaseTokenToKVPool: diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index bca4711eb..46920d922 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -617,7 +617,6 @@ class ModelRunner: size=max_num_reqs + 1, max_context_len=self.model_config.context_len + 4, device=self.device, - use_records=False, enable_memory_saver=self.server_args.enable_memory_saver, ) if (