Format code & Improve readme (#52)
This commit is contained in:
@@ -275,8 +275,7 @@ In above example, the server uses the chat template specified in the model token
|
|||||||
You can override the chat template if needed when launching the server:
|
You can override the chat template if needed when launching the server:
|
||||||
|
|
||||||
```
|
```
|
||||||
python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
|
python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 --chat-template llama-2
|
||||||
--chat-template llama-2
|
|
||||||
```
|
```
|
||||||
|
|
||||||
If the chat template you are looking for is missing, you are welcome to contribute it.
|
If the chat template you are looking for is missing, you are welcome to contribute it.
|
||||||
@@ -295,8 +294,7 @@ Meanwhile, you can also temporary register your chat template as follows:
|
|||||||
```
|
```
|
||||||
|
|
||||||
```
|
```
|
||||||
python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
|
python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 --chat-template ./my_model_template.json
|
||||||
--chat-template ./my_model_template.json
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Additional Arguments
|
### Additional Arguments
|
||||||
|
|||||||
@@ -1,10 +1,11 @@
|
|||||||
# Adapted from
|
# Adapted from
|
||||||
# https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
|
# https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
|
||||||
from sglang.srt.managers.openai_protocol import ChatCompletionRequest
|
|
||||||
from enum import IntEnum, auto
|
|
||||||
import dataclasses
|
import dataclasses
|
||||||
|
from enum import IntEnum, auto
|
||||||
from typing import Dict, List, Tuple, Union
|
from typing import Dict, List, Tuple, Union
|
||||||
|
|
||||||
|
from sglang.srt.managers.openai_protocol import ChatCompletionRequest
|
||||||
|
|
||||||
|
|
||||||
class SeparatorStyle(IntEnum):
|
class SeparatorStyle(IntEnum):
|
||||||
"""Separator styles."""
|
"""Separator styles."""
|
||||||
@@ -109,7 +110,11 @@ class Conversation:
|
|||||||
ret = system_prompt
|
ret = system_prompt
|
||||||
for i, (role, message) in enumerate(self.messages):
|
for i, (role, message) in enumerate(self.messages):
|
||||||
if message:
|
if message:
|
||||||
ret += role + ": " + message.replace("\r\n", "\n").replace("\n\n", "\n")
|
ret += (
|
||||||
|
role
|
||||||
|
+ ": "
|
||||||
|
+ message.replace("\r\n", "\n").replace("\n\n", "\n")
|
||||||
|
)
|
||||||
ret += "\n\n"
|
ret += "\n\n"
|
||||||
else:
|
else:
|
||||||
ret += role + ":"
|
ret += role + ":"
|
||||||
@@ -310,7 +315,9 @@ chat_templates: Dict[str, Conversation] = {}
|
|||||||
def register_conv_template(template: Conversation, override: bool = False):
|
def register_conv_template(template: Conversation, override: bool = False):
|
||||||
"""Register a new conversation template."""
|
"""Register a new conversation template."""
|
||||||
if not override:
|
if not override:
|
||||||
assert template.name not in chat_templates, f"{template.name} has been registered."
|
assert (
|
||||||
|
template.name not in chat_templates
|
||||||
|
), f"{template.name} has been registered."
|
||||||
|
|
||||||
chat_templates[template.name] = template
|
chat_templates[template.name] = template
|
||||||
|
|
||||||
@@ -319,7 +326,9 @@ def chat_template_exists(template_name: str) -> bool:
|
|||||||
return template_name in chat_templates
|
return template_name in chat_templates
|
||||||
|
|
||||||
|
|
||||||
def generate_chat_conv(request: ChatCompletionRequest, template_name: str) -> Conversation:
|
def generate_chat_conv(
|
||||||
|
request: ChatCompletionRequest, template_name: str
|
||||||
|
) -> Conversation:
|
||||||
conv = chat_templates[template_name].copy()
|
conv = chat_templates[template_name].copy()
|
||||||
conv = Conversation(
|
conv = Conversation(
|
||||||
name=conv.name,
|
name=conv.name,
|
||||||
|
|||||||
@@ -224,7 +224,9 @@ async def v1_chat_completions(raw_request: Request):
|
|||||||
finish_reason=None,
|
finish_reason=None,
|
||||||
)
|
)
|
||||||
chunk = ChatCompletionStreamResponse(
|
chunk = ChatCompletionStreamResponse(
|
||||||
id=content["meta_info"]["id"], choices=[choice_data], model=request.model
|
id=content["meta_info"]["id"],
|
||||||
|
choices=[choice_data],
|
||||||
|
model=request.model,
|
||||||
)
|
)
|
||||||
yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n"
|
yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n"
|
||||||
|
|
||||||
@@ -235,7 +237,9 @@ async def v1_chat_completions(raw_request: Request):
|
|||||||
index=0, delta=DeltaMessage(content=delta), finish_reason=None
|
index=0, delta=DeltaMessage(content=delta), finish_reason=None
|
||||||
)
|
)
|
||||||
chunk = ChatCompletionStreamResponse(
|
chunk = ChatCompletionStreamResponse(
|
||||||
id=content["meta_info"]["id"], choices=[choice_data], model=request.model
|
id=content["meta_info"]["id"],
|
||||||
|
choices=[choice_data],
|
||||||
|
model=request.model,
|
||||||
)
|
)
|
||||||
yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n"
|
yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n"
|
||||||
yield "data: [DONE]\n\n"
|
yield "data: [DONE]\n\n"
|
||||||
@@ -293,7 +297,9 @@ def launch_server(server_args, pipe_finish_writer):
|
|||||||
try:
|
try:
|
||||||
sep_style = SeparatorStyle[template["sep_style"]]
|
sep_style = SeparatorStyle[template["sep_style"]]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise ValueError(f"Unknown separator style: {template['sep_style']}") from None
|
raise ValueError(
|
||||||
|
f"Unknown separator style: {template['sep_style']}"
|
||||||
|
) from None
|
||||||
register_conv_template(
|
register_conv_template(
|
||||||
Conversation(
|
Conversation(
|
||||||
name=template["name"],
|
name=template["name"],
|
||||||
|
|||||||
Reference in New Issue
Block a user