Move mem_fraction_static adjustment for multimodal models to server_args.py & Fix session control & Other cleanups (#7748)

This commit is contained in:
Lianmin Zheng
2025-07-04 16:33:33 -07:00
committed by GitHub
parent 975a5ec69c
commit 14229ccf8f
16 changed files with 339 additions and 137 deletions

View File

@@ -319,6 +319,14 @@ class ServerArgs:
else:
self.mem_fraction_static = 0.88
# Lazy init to avoid circular import
from sglang.srt.configs.model_config import ModelConfig
# Multimodal models need more memory for the image processor
model_config = ModelConfig.from_server_args(self)
if model_config.is_multimodal:
self.mem_fraction_static *= 0.90
# Set chunked prefill size, which depends on the gpu memory capacity
if self.chunked_prefill_size is None:
if gpu_mem is not None: