Move mem_fraction_static adjustment for multimodal models to server_args.py & Fix session control & Other cleanups (#7748)
This commit is contained in:
@@ -319,6 +319,14 @@ class ServerArgs:
|
||||
else:
|
||||
self.mem_fraction_static = 0.88
|
||||
|
||||
# Lazy init to avoid circular import
|
||||
from sglang.srt.configs.model_config import ModelConfig
|
||||
|
||||
# Multimodal models need more memory for the image processor
|
||||
model_config = ModelConfig.from_server_args(self)
|
||||
if model_config.is_multimodal:
|
||||
self.mem_fraction_static *= 0.90
|
||||
|
||||
# Set chunked prefill size, which depends on the gpu memory capacity
|
||||
if self.chunked_prefill_size is None:
|
||||
if gpu_mem is not None:
|
||||
|
||||
Reference in New Issue
Block a user