From 718c391fd7b2c22bb42afb2983d9e4e232f763d8 Mon Sep 17 00:00:00 2001 From: Wenxuan Tan Date: Wed, 5 Mar 2025 21:32:42 -0600 Subject: [PATCH] [Hoxfix] Fix incomplete token_to_kv_pool refactor (#4121) --- python/sglang/srt/managers/scheduler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index 1ee04d3a7..da6c1f1ce 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -484,7 +484,7 @@ class Scheduler: logger.error( f"{self.cur_batch.batch_size()=}, " f"{self.cur_batch.reqs=}, " - f"{self.token_to_kv_pool.available_size()=}, " + f"{self.token_to_kv_pool_allocator.available_size()=}, " f"{self.tree_cache.evictable_size()=}, " ) # Wait for some time so that the parent process can print the error. @@ -932,7 +932,7 @@ class Scheduler: ): # During idle time, also collect metrics every 30 seconds. num_used = self.max_total_num_tokens - ( - self.token_to_kv_pool.available_size() + self.token_to_kv_pool_allocator.available_size() + self.tree_cache.evictable_size() ) num_running_reqs = len(self.running_batch.reqs) if self.running_batch else 0