diff --git a/README.md b/README.md index 3b3fceb36..bceec868b 100644 --- a/README.md +++ b/README.md @@ -275,8 +275,7 @@ In above example, the server uses the chat template specified in the model token You can override the chat template if needed when launching the server: ``` -python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 ---chat-template llama-2 +python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 --chat-template llama-2 ``` If the chat template you are looking for is missing, you are welcome to contribute it. @@ -295,8 +294,7 @@ Meanwhile, you can also temporary register your chat template as follows: ``` ``` -python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 ---chat-template ./my_model_template.json +python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 --chat-template ./my_model_template.json ``` ### Additional Arguments diff --git a/python/sglang/srt/conversation.py b/python/sglang/srt/conversation.py index 41d153fd4..92d999770 100644 --- a/python/sglang/srt/conversation.py +++ b/python/sglang/srt/conversation.py @@ -1,10 +1,11 @@ # Adapted from # https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py -from sglang.srt.managers.openai_protocol import ChatCompletionRequest -from enum import IntEnum, auto import dataclasses +from enum import IntEnum, auto from typing import Dict, List, Tuple, Union +from sglang.srt.managers.openai_protocol import ChatCompletionRequest + class SeparatorStyle(IntEnum): """Separator styles.""" @@ -109,7 +110,11 @@ class Conversation: ret = system_prompt for i, (role, message) in enumerate(self.messages): if message: - ret += role + ": " + message.replace("\r\n", "\n").replace("\n\n", "\n") + ret += ( + role + + ": " + + message.replace("\r\n", "\n").replace("\n\n", "\n") + ) ret += "\n\n" else: ret += role + ":" @@ -310,7 +315,9 @@ chat_templates: Dict[str, Conversation] = {} def register_conv_template(template: Conversation, override: bool = False): """Register a new conversation template.""" if not override: - assert template.name not in chat_templates, f"{template.name} has been registered." + assert ( + template.name not in chat_templates + ), f"{template.name} has been registered." chat_templates[template.name] = template @@ -319,7 +326,9 @@ def chat_template_exists(template_name: str) -> bool: return template_name in chat_templates -def generate_chat_conv(request: ChatCompletionRequest, template_name: str) -> Conversation: +def generate_chat_conv( + request: ChatCompletionRequest, template_name: str +) -> Conversation: conv = chat_templates[template_name].copy() conv = Conversation( name=conv.name, diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index 320c0e86a..c5bbe0674 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -224,7 +224,9 @@ async def v1_chat_completions(raw_request: Request): finish_reason=None, ) chunk = ChatCompletionStreamResponse( - id=content["meta_info"]["id"], choices=[choice_data], model=request.model + id=content["meta_info"]["id"], + choices=[choice_data], + model=request.model, ) yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n" @@ -235,7 +237,9 @@ async def v1_chat_completions(raw_request: Request): index=0, delta=DeltaMessage(content=delta), finish_reason=None ) chunk = ChatCompletionStreamResponse( - id=content["meta_info"]["id"], choices=[choice_data], model=request.model + id=content["meta_info"]["id"], + choices=[choice_data], + model=request.model, ) yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n" yield "data: [DONE]\n\n" @@ -293,7 +297,9 @@ def launch_server(server_args, pipe_finish_writer): try: sep_style = SeparatorStyle[template["sep_style"]] except KeyError: - raise ValueError(f"Unknown separator style: {template['sep_style']}") from None + raise ValueError( + f"Unknown separator style: {template['sep_style']}" + ) from None register_conv_template( Conversation( name=template["name"],