From 3f5ac88d029964f756270c25a9f677f60adb28e7 Mon Sep 17 00:00:00 2001 From: Liangsheng Yin Date: Wed, 23 Oct 2024 15:20:39 -0700 Subject: [PATCH] Fix out of memory message. (#1771) --- python/sglang/srt/managers/schedule_batch.py | 7 ++++++- python/sglang/srt/managers/scheduler.py | 1 - 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py index bcf3103ad..fac008d3f 100644 --- a/python/sglang/srt/managers/schedule_batch.py +++ b/python/sglang/srt/managers/schedule_batch.py @@ -514,7 +514,12 @@ class ScheduleBatch: out_cache_loc = self.token_to_kv_pool.alloc(num_tokens) if out_cache_loc is None: - logger.error("Prefill out of memory. Try to lower your batch size.") + phase_str = "Prefill" if self.forward_mode.is_extend() else "Decode" + logger.error( + f"{phase_str} out of memory. Try to lower your batch size.\n" + f"Try to allocate {num_tokens} tokens.\n" + f"Avaliable tokens: {self.token_to_kv_pool.available_size() + self.tree_cache.evictable_size()}\n" + ) if self.tree_cache is not None: self.tree_cache.pretty_print() exit(1) diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index 60531ce25..e9bf7be8e 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -71,7 +71,6 @@ from sglang.srt.utils import ( is_generation_model, is_multimodal_model, kill_parent_process, - pytorch_profile, set_random_seed, suppress_other_loggers, )