Teak mem fraction (#20)

This commit is contained in:
Lianmin Zheng
2024-01-17 04:43:17 -08:00
committed by GitHub
parent bf51ddc6e5
commit f9d723816a
2 changed files with 8 additions and 4 deletions

View File

@@ -278,7 +278,7 @@ class ModelRunner:
load_format=self.load_format, load_format=self.load_format,
revision=None, revision=None,
) )
self.model = model self.model = model.eval()
def profile_max_num_token(self, total_gpu_memory): def profile_max_num_token(self, total_gpu_memory):
available_gpu_memory = get_available_gpu_memory( available_gpu_memory = get_available_gpu_memory(

View File

@@ -26,10 +26,14 @@ class ServerArgs:
if self.tokenizer_path is None: if self.tokenizer_path is None:
self.tokenizer_path = self.model_path self.tokenizer_path = self.model_path
if self.mem_fraction_static is None: if self.mem_fraction_static is None:
if self.tp_size > 1: if self.tp_size >= 8:
self.mem_fraction_static = 0.8 self.mem_fraction_static = 0.80
elif self.tp_size >= 4:
self.mem_fraction_static = 0.82
elif self.tp_size >= 2:
self.mem_fraction_static = 0.85
else: else:
self.mem_fraction_static = 0.9 self.mem_fraction_static = 0.90
@staticmethod @staticmethod
def add_cli_args(parser: argparse.ArgumentParser): def add_cli_args(parser: argparse.ArgumentParser):