Improve error message & Add vicuna template (#57)
This commit is contained in:
@@ -388,3 +388,15 @@ register_conv_template(
|
||||
stop_str=["<|endoftext|>", "<|im_end|>"],
|
||||
)
|
||||
)
|
||||
|
||||
register_conv_template(
|
||||
Conversation(
|
||||
name="vicuna_v1.1",
|
||||
system_message="A chat between a curious user and an artificial intelligence assistant. "
|
||||
"The assistant gives helpful, detailed, and polite answers to the user's questions.",
|
||||
roles=("USER", "ASSISTANT"),
|
||||
sep_style=SeparatorStyle.ADD_COLON_TWO,
|
||||
sep=" ",
|
||||
sep2="</s>",
|
||||
)
|
||||
)
|
||||
|
||||
@@ -297,6 +297,11 @@ class ModelRunner:
|
||||
|
||||
def init_memory_pool(self, total_gpu_memory):
|
||||
self.max_total_num_token = self.profile_max_num_token(total_gpu_memory)
|
||||
|
||||
if self.max_total_num_token <= 0:
|
||||
raise RuntimeError("Not enought memory. "
|
||||
"Please try to increase --mem-fraction-static.")
|
||||
|
||||
self.req_to_token_pool = ReqToTokenPool(
|
||||
int(self.max_total_num_token / self.model_config.context_len * 256),
|
||||
self.model_config.context_len + 8,
|
||||
|
||||
Reference in New Issue
Block a user