Format code & Improve readme (#52)

This commit is contained in:
Lianmin Zheng
2024-01-18 23:51:19 -08:00
committed by GitHub
parent 23471f9aa3
commit 199e82a15d
3 changed files with 25 additions and 12 deletions

View File

@@ -275,8 +275,7 @@ In above example, the server uses the chat template specified in the model token
You can override the chat template if needed when launching the server: You can override the chat template if needed when launching the server:
``` ```
python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 --chat-template llama-2
--chat-template llama-2
``` ```
If the chat template you are looking for is missing, you are welcome to contribute it. If the chat template you are looking for is missing, you are welcome to contribute it.
@@ -295,8 +294,7 @@ Meanwhile, you can also temporary register your chat template as follows:
``` ```
``` ```
python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 --chat-template ./my_model_template.json
--chat-template ./my_model_template.json
``` ```
### Additional Arguments ### Additional Arguments

View File

@@ -1,10 +1,11 @@
# Adapted from # Adapted from
# https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py # https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
from sglang.srt.managers.openai_protocol import ChatCompletionRequest
from enum import IntEnum, auto
import dataclasses import dataclasses
from enum import IntEnum, auto
from typing import Dict, List, Tuple, Union from typing import Dict, List, Tuple, Union
from sglang.srt.managers.openai_protocol import ChatCompletionRequest
class SeparatorStyle(IntEnum): class SeparatorStyle(IntEnum):
"""Separator styles.""" """Separator styles."""
@@ -109,7 +110,11 @@ class Conversation:
ret = system_prompt ret = system_prompt
for i, (role, message) in enumerate(self.messages): for i, (role, message) in enumerate(self.messages):
if message: if message:
ret += role + ": " + message.replace("\r\n", "\n").replace("\n\n", "\n") ret += (
role
+ ": "
+ message.replace("\r\n", "\n").replace("\n\n", "\n")
)
ret += "\n\n" ret += "\n\n"
else: else:
ret += role + ":" ret += role + ":"
@@ -310,7 +315,9 @@ chat_templates: Dict[str, Conversation] = {}
def register_conv_template(template: Conversation, override: bool = False): def register_conv_template(template: Conversation, override: bool = False):
"""Register a new conversation template.""" """Register a new conversation template."""
if not override: if not override:
assert template.name not in chat_templates, f"{template.name} has been registered." assert (
template.name not in chat_templates
), f"{template.name} has been registered."
chat_templates[template.name] = template chat_templates[template.name] = template
@@ -319,7 +326,9 @@ def chat_template_exists(template_name: str) -> bool:
return template_name in chat_templates return template_name in chat_templates
def generate_chat_conv(request: ChatCompletionRequest, template_name: str) -> Conversation: def generate_chat_conv(
request: ChatCompletionRequest, template_name: str
) -> Conversation:
conv = chat_templates[template_name].copy() conv = chat_templates[template_name].copy()
conv = Conversation( conv = Conversation(
name=conv.name, name=conv.name,

View File

@@ -224,7 +224,9 @@ async def v1_chat_completions(raw_request: Request):
finish_reason=None, finish_reason=None,
) )
chunk = ChatCompletionStreamResponse( chunk = ChatCompletionStreamResponse(
id=content["meta_info"]["id"], choices=[choice_data], model=request.model id=content["meta_info"]["id"],
choices=[choice_data],
model=request.model,
) )
yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n" yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n"
@@ -235,7 +237,9 @@ async def v1_chat_completions(raw_request: Request):
index=0, delta=DeltaMessage(content=delta), finish_reason=None index=0, delta=DeltaMessage(content=delta), finish_reason=None
) )
chunk = ChatCompletionStreamResponse( chunk = ChatCompletionStreamResponse(
id=content["meta_info"]["id"], choices=[choice_data], model=request.model id=content["meta_info"]["id"],
choices=[choice_data],
model=request.model,
) )
yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n" yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n"
yield "data: [DONE]\n\n" yield "data: [DONE]\n\n"
@@ -293,7 +297,9 @@ def launch_server(server_args, pipe_finish_writer):
try: try:
sep_style = SeparatorStyle[template["sep_style"]] sep_style = SeparatorStyle[template["sep_style"]]
except KeyError: except KeyError:
raise ValueError(f"Unknown separator style: {template['sep_style']}") from None raise ValueError(
f"Unknown separator style: {template['sep_style']}"
) from None
register_conv_template( register_conv_template(
Conversation( Conversation(
name=template["name"], name=template["name"],