Move mem_fraction_static adjustment for multimodal models to server_args.py & Fix session control & Other cleanups (#7748)

This commit is contained in:
Lianmin Zheng
2025-07-04 16:33:33 -07:00
committed by GitHub
parent 975a5ec69c
commit 14229ccf8f
16 changed files with 339 additions and 137 deletions

View File

@@ -24,6 +24,9 @@ class MultiModalCache:
self.current_size += data_size
return True
def has(self, mm_hash: int) -> bool:
return mm_hash in self.mm_cache
def get(self, mm_hash: int) -> torch.Tensor:
return self.mm_cache.get(mm_hash)