Adjust default mem fraction to avoid OOM (#823)

This commit is contained in:
Ying Sheng
2024-07-30 01:58:31 -07:00
committed by GitHub
parent ae5c0fc442
commit e7487b08bc
4 changed files with 22 additions and 17 deletions

View File

@@ -91,15 +91,15 @@ class ServerArgs:
self.tokenizer_path = self.model_path
if self.mem_fraction_static is None:
if self.tp_size >= 16:
self.mem_fraction_static = 0.80
self.mem_fraction_static = 0.79
elif self.tp_size >= 8:
self.mem_fraction_static = 0.84
self.mem_fraction_static = 0.83
elif self.tp_size >= 4:
self.mem_fraction_static = 0.86
self.mem_fraction_static = 0.85
elif self.tp_size >= 2:
self.mem_fraction_static = 0.88
self.mem_fraction_static = 0.87
else:
self.mem_fraction_static = 0.89
self.mem_fraction_static = 0.88
if isinstance(self.additional_ports, int):
self.additional_ports = [self.additional_ports]
elif self.additional_ports is None: