Fix out of memory message. (#1771)
This commit is contained in:
@@ -514,7 +514,12 @@ class ScheduleBatch:
|
|||||||
out_cache_loc = self.token_to_kv_pool.alloc(num_tokens)
|
out_cache_loc = self.token_to_kv_pool.alloc(num_tokens)
|
||||||
|
|
||||||
if out_cache_loc is None:
|
if out_cache_loc is None:
|
||||||
logger.error("Prefill out of memory. Try to lower your batch size.")
|
phase_str = "Prefill" if self.forward_mode.is_extend() else "Decode"
|
||||||
|
logger.error(
|
||||||
|
f"{phase_str} out of memory. Try to lower your batch size.\n"
|
||||||
|
f"Try to allocate {num_tokens} tokens.\n"
|
||||||
|
f"Avaliable tokens: {self.token_to_kv_pool.available_size() + self.tree_cache.evictable_size()}\n"
|
||||||
|
)
|
||||||
if self.tree_cache is not None:
|
if self.tree_cache is not None:
|
||||||
self.tree_cache.pretty_print()
|
self.tree_cache.pretty_print()
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|||||||
@@ -71,7 +71,6 @@ from sglang.srt.utils import (
|
|||||||
is_generation_model,
|
is_generation_model,
|
||||||
is_multimodal_model,
|
is_multimodal_model,
|
||||||
kill_parent_process,
|
kill_parent_process,
|
||||||
pytorch_profile,
|
|
||||||
set_random_seed,
|
set_random_seed,
|
||||||
suppress_other_loggers,
|
suppress_other_loggers,
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user