Fix GPU OOM (#6564)

Co-authored-by: michael <michael.zhang@amd.com>
This commit is contained in:
kk
2025-05-25 07:38:39 +08:00
committed by GitHub
parent 24c035f2e3
commit 7a5e6ce1cb
8 changed files with 15 additions and 16 deletions

View File

@@ -412,6 +412,10 @@ class ModelRunner:
if not server_args.disable_chunked_prefix_cache:
logger.info("Chunked prefix cache is turned on.")
if server_args.attention_backend == "aiter":
if self.model_config.context_len > 8192:
self.mem_fraction_static *= 0.85
def init_torch_distributed(self):
logger.info("Init torch distributed begin.")