Teak mem fraction (#20)

This commit is contained in:
Lianmin Zheng
2024-01-17 04:43:17 -08:00
committed by GitHub
parent bf51ddc6e5
commit f9d723816a
2 changed files with 8 additions and 4 deletions

View File

@@ -278,7 +278,7 @@ class ModelRunner:
load_format=self.load_format,
revision=None,
)
self.model = model
self.model = model.eval()
def profile_max_num_token(self, total_gpu_memory):
available_gpu_memory = get_available_gpu_memory(

View File

@@ -26,10 +26,14 @@ class ServerArgs:
if self.tokenizer_path is None:
self.tokenizer_path = self.model_path
if self.mem_fraction_static is None:
if self.tp_size > 1:
self.mem_fraction_static = 0.8
if self.tp_size >= 8:
self.mem_fraction_static = 0.80
elif self.tp_size >= 4:
self.mem_fraction_static = 0.82
elif self.tp_size >= 2:
self.mem_fraction_static = 0.85
else:
self.mem_fraction_static = 0.9
self.mem_fraction_static = 0.90
@staticmethod
def add_cli_args(parser: argparse.ArgumentParser):