Teak mem fraction (#20)
This commit is contained in:
@@ -278,7 +278,7 @@ class ModelRunner:
|
|||||||
load_format=self.load_format,
|
load_format=self.load_format,
|
||||||
revision=None,
|
revision=None,
|
||||||
)
|
)
|
||||||
self.model = model
|
self.model = model.eval()
|
||||||
|
|
||||||
def profile_max_num_token(self, total_gpu_memory):
|
def profile_max_num_token(self, total_gpu_memory):
|
||||||
available_gpu_memory = get_available_gpu_memory(
|
available_gpu_memory = get_available_gpu_memory(
|
||||||
|
|||||||
@@ -26,10 +26,14 @@ class ServerArgs:
|
|||||||
if self.tokenizer_path is None:
|
if self.tokenizer_path is None:
|
||||||
self.tokenizer_path = self.model_path
|
self.tokenizer_path = self.model_path
|
||||||
if self.mem_fraction_static is None:
|
if self.mem_fraction_static is None:
|
||||||
if self.tp_size > 1:
|
if self.tp_size >= 8:
|
||||||
self.mem_fraction_static = 0.8
|
self.mem_fraction_static = 0.80
|
||||||
|
elif self.tp_size >= 4:
|
||||||
|
self.mem_fraction_static = 0.82
|
||||||
|
elif self.tp_size >= 2:
|
||||||
|
self.mem_fraction_static = 0.85
|
||||||
else:
|
else:
|
||||||
self.mem_fraction_static = 0.9
|
self.mem_fraction_static = 0.90
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def add_cli_args(parser: argparse.ArgumentParser):
|
def add_cli_args(parser: argparse.ArgumentParser):
|
||||||
|
|||||||
Reference in New Issue
Block a user