From f30abd090a1d02377a1211a8c8f5b10deac0e763 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Fri, 19 Jan 2024 17:03:33 -0800 Subject: [PATCH] Improve error message & Add vicuna template (#57) --- python/sglang/srt/conversation.py | 12 ++++++++++++ python/sglang/srt/managers/router/model_runner.py | 5 +++++ 2 files changed, 17 insertions(+) diff --git a/python/sglang/srt/conversation.py b/python/sglang/srt/conversation.py index 92d999770..24c84a5c9 100644 --- a/python/sglang/srt/conversation.py +++ b/python/sglang/srt/conversation.py @@ -388,3 +388,15 @@ register_conv_template( stop_str=["<|endoftext|>", "<|im_end|>"], ) ) + +register_conv_template( + Conversation( + name="vicuna_v1.1", + system_message="A chat between a curious user and an artificial intelligence assistant. " + "The assistant gives helpful, detailed, and polite answers to the user's questions.", + roles=("USER", "ASSISTANT"), + sep_style=SeparatorStyle.ADD_COLON_TWO, + sep=" ", + sep2="", + ) +) diff --git a/python/sglang/srt/managers/router/model_runner.py b/python/sglang/srt/managers/router/model_runner.py index 756cef685..bcdb3125b 100644 --- a/python/sglang/srt/managers/router/model_runner.py +++ b/python/sglang/srt/managers/router/model_runner.py @@ -297,6 +297,11 @@ class ModelRunner: def init_memory_pool(self, total_gpu_memory): self.max_total_num_token = self.profile_max_num_token(total_gpu_memory) + + if self.max_total_num_token <= 0: + raise RuntimeError("Not enought memory. " + "Please try to increase --mem-fraction-static.") + self.req_to_token_pool = ReqToTokenPool( int(self.max_total_num_token / self.model_config.context_len * 256), self.model_config.context_len + 8,