From f9d723816ab762c20279463797f3b1a95158f23b Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Wed, 17 Jan 2024 04:43:17 -0800 Subject: [PATCH] Teak mem fraction (#20) --- python/sglang/srt/managers/router/model_runner.py | 2 +- python/sglang/srt/server_args.py | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/python/sglang/srt/managers/router/model_runner.py b/python/sglang/srt/managers/router/model_runner.py index 2a42eb362..756cef685 100644 --- a/python/sglang/srt/managers/router/model_runner.py +++ b/python/sglang/srt/managers/router/model_runner.py @@ -278,7 +278,7 @@ class ModelRunner: load_format=self.load_format, revision=None, ) - self.model = model + self.model = model.eval() def profile_max_num_token(self, total_gpu_memory): available_gpu_memory = get_available_gpu_memory( diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 7c2957abc..e8e7d6f8c 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -26,10 +26,14 @@ class ServerArgs: if self.tokenizer_path is None: self.tokenizer_path = self.model_path if self.mem_fraction_static is None: - if self.tp_size > 1: - self.mem_fraction_static = 0.8 + if self.tp_size >= 8: + self.mem_fraction_static = 0.80 + elif self.tp_size >= 4: + self.mem_fraction_static = 0.82 + elif self.tp_size >= 2: + self.mem_fraction_static = 0.85 else: - self.mem_fraction_static = 0.9 + self.mem_fraction_static = 0.90 @staticmethod def add_cli_args(parser: argparse.ArgumentParser):