From 7906d1d29863bc3b33c4bcfb942a5d61f9867127 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sat, 18 Jan 2025 20:20:23 -0800 Subject: [PATCH] Remove the unused write_with_records (#2972) --- python/sglang/srt/managers/schedule_batch.py | 1 - python/sglang/srt/mem_cache/memory_pool.py | 28 +------------------ .../sglang/srt/model_executor/model_runner.py | 1 - 3 files changed, 1 insertion(+), 29 deletions(-) diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py index cec2262c4..afbc98b7c 100644 --- a/python/sglang/srt/managers/schedule_batch.py +++ b/python/sglang/srt/managers/schedule_batch.py @@ -158,7 +158,6 @@ class ImageInputs: im_end_id: Optional[torch.Tensor] = None slice_start_id: Optional[torch.Tensor] = None slice_end_id: Optional[torch.Tensor] = None - tgt_sizes: Optional[list] = None @staticmethod diff --git a/python/sglang/srt/mem_cache/memory_pool.py b/python/sglang/srt/mem_cache/memory_pool.py index ab27e81b7..e30736722 100644 --- a/python/sglang/srt/mem_cache/memory_pool.py +++ b/python/sglang/srt/mem_cache/memory_pool.py @@ -49,7 +49,6 @@ class ReqToTokenPool: size: int, max_context_len: int, device: str, - use_records: bool, enable_memory_saver: bool, ): memory_saver_adapter = TorchMemorySaverAdapter.create( @@ -64,17 +63,9 @@ class ReqToTokenPool: (size, max_context_len), dtype=torch.int32, device=device ) self.free_slots = list(range(size)) - self.write_records = [] - self.use_records = use_records - - if self.use_records: - self.write = self.write_with_records - else: - self.write = self.write_without_records def write(self, indices, values): - # Keep the signature for type checking. It will be assigned during runtime. - raise NotImplementedError() + self.req_to_token[indices] = values def available_size(self): return len(self.free_slots) @@ -96,23 +87,6 @@ class ReqToTokenPool: def clear(self): self.free_slots = list(range(self.size)) - self.write_records = [] - - def write_without_records(self, indices, values): - self.req_to_token[indices] = values - - def write_with_records(self, indices, values): - self.req_to_token[indices] = values - self.write_records.append((indices, values)) - - def get_write_records(self): - ret = self.write_records - self.write_records = [] - return ret - - def apply_write_records(self, write_records: List[Tuple]): - for indices, values in write_records: - self.req_to_token[indices] = values class BaseTokenToKVPool: diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index bca4711eb..46920d922 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -617,7 +617,6 @@ class ModelRunner: size=max_num_reqs + 1, max_context_len=self.model_config.context_len + 4, device=self.device, - use_records=False, enable_memory_saver=self.server_args.enable_memory_saver, ) if (