[API, Feature] Support response prefill for openai API (#1490)

This commit is contained in:
Ying Sheng
2024-09-22 06:46:17 -07:00
committed by GitHub
parent 39bb49d156
commit e4780cf839
4 changed files with 74 additions and 2 deletions

View File

@@ -0,0 +1,34 @@
"""
Usage:
python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --port 30000
python openai_chat.py
"""
import openai
from openai import OpenAI
client = openai.Client(base_url="http://127.0.0.1:30000/v1", api_key="EMPTY")
response = client.chat.completions.create(
model="meta-llama/Meta-Llama-3.1-8B-Instruct",
messages=[
{"role": "system", "content": "You are a helpful AI assistant"},
{
"role": "user",
"content": """
Extract the name, size, price, and color from this product description as a JSON object:
<description>
The SmartHome Mini is a compact smart home assistant available in black or white for only $49.99. At just 5 inches wide, it lets you control lights, thermostats, and other connected devices via voice or app—no matter where you place it in your home. This affordable little hub brings convenient hands-free control to your smart devices.
</description>
""",
},
{
"role": "assistant",
"content": "{\n",
},
],
temperature=0,
)
print(response.choices[0].message.content)

View File

@@ -858,11 +858,18 @@ def v1_chat_generate_request(
openai_compatible_messages.append(
{"role": message.role, "content": content["text"]}
)
if openai_compatible_messages[-1]["role"] == "assistant":
assistant_prefix = openai_compatible_messages[-1]["content"]
openai_compatible_messages = openai_compatible_messages[:-1]
else:
assistant_prefix = None
prompt_ids = tokenizer_manager.tokenizer.apply_chat_template(
openai_compatible_messages,
tokenize=True,
add_generation_prompt=True,
)
if assistant_prefix:
prompt_ids += tokenizer_manager.tokenizer.encode(assistant_prefix)
stop = request.stop
image_data = None
modalities = []

View File

@@ -27,7 +27,7 @@ class TestOpenAIServer(unittest.TestCase):
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
other_args=("--max-total-token", "1024"),
other_args=("--max-total-token", "1024", "--context-len", "8192"),
env={"SGLANG_CLIP_MAX_NEW_TOKENS": "256", **os.environ},
return_stdout_stderr=True,
)

View File

@@ -445,7 +445,7 @@ class TestOpenAIServer(unittest.TestCase):
for mode in ["completion", "chat"]:
self.run_batch(mode)
def test_calcel_batch(self):
def test_cancel_batch(self):
for mode in ["completion", "chat"]:
self.run_cancel_batch(mode)
@@ -495,6 +495,37 @@ class TestOpenAIServer(unittest.TestCase):
text = response.choices[0].message.content
assert isinstance(text, str)
def test_response_prefill(self):
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
response = client.chat.completions.create(
model="meta-llama/Meta-Llama-3.1-8B-Instruct",
messages=[
{"role": "system", "content": "You are a helpful AI assistant"},
{
"role": "user",
"content": """
Extract the name, size, price, and color from this product description as a JSON object:
<description>
The SmartHome Mini is a compact smart home assistant available in black or white for only $49.99. At just 5 inches wide, it lets you control lights, thermostats, and other connected devices via voice or app—no matter where you place it in your home. This affordable little hub brings convenient hands-free control to your smart devices.
</description>
""",
},
{
"role": "assistant",
"content": "{\n",
},
],
temperature=0,
)
assert (
response.choices[0]
.message.content.strip()
.startswith('"name": "SmartHome Mini",')
)
if __name__ == "__main__":
unittest.main()