diff --git a/examples/runtime/openai_chat_with_response_prefill.py b/examples/runtime/openai_chat_with_response_prefill.py new file mode 100644 index 000000000..a856019b5 --- /dev/null +++ b/examples/runtime/openai_chat_with_response_prefill.py @@ -0,0 +1,34 @@ +""" +Usage: +python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --port 30000 +python openai_chat.py +""" + +import openai +from openai import OpenAI + +client = openai.Client(base_url="http://127.0.0.1:30000/v1", api_key="EMPTY") + +response = client.chat.completions.create( + model="meta-llama/Meta-Llama-3.1-8B-Instruct", + messages=[ + {"role": "system", "content": "You are a helpful AI assistant"}, + { + "role": "user", + "content": """ +Extract the name, size, price, and color from this product description as a JSON object: + + +The SmartHome Mini is a compact smart home assistant available in black or white for only $49.99. At just 5 inches wide, it lets you control lights, thermostats, and other connected devices via voice or app—no matter where you place it in your home. This affordable little hub brings convenient hands-free control to your smart devices. + +""", + }, + { + "role": "assistant", + "content": "{\n", + }, + ], + temperature=0, +) + +print(response.choices[0].message.content) diff --git a/python/sglang/srt/openai_api/adapter.py b/python/sglang/srt/openai_api/adapter.py index a4869f5cc..f4ebc5fa4 100644 --- a/python/sglang/srt/openai_api/adapter.py +++ b/python/sglang/srt/openai_api/adapter.py @@ -858,11 +858,18 @@ def v1_chat_generate_request( openai_compatible_messages.append( {"role": message.role, "content": content["text"]} ) + if openai_compatible_messages[-1]["role"] == "assistant": + assistant_prefix = openai_compatible_messages[-1]["content"] + openai_compatible_messages = openai_compatible_messages[:-1] + else: + assistant_prefix = None prompt_ids = tokenizer_manager.tokenizer.apply_chat_template( openai_compatible_messages, tokenize=True, add_generation_prompt=True, ) + if assistant_prefix: + prompt_ids += tokenizer_manager.tokenizer.encode(assistant_prefix) stop = request.stop image_data = None modalities = [] diff --git a/test/srt/test_large_max_new_tokens.py b/test/srt/test_large_max_new_tokens.py index 10b82706a..8b493f9bd 100644 --- a/test/srt/test_large_max_new_tokens.py +++ b/test/srt/test_large_max_new_tokens.py @@ -27,7 +27,7 @@ class TestOpenAIServer(unittest.TestCase): cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, api_key=cls.api_key, - other_args=("--max-total-token", "1024"), + other_args=("--max-total-token", "1024", "--context-len", "8192"), env={"SGLANG_CLIP_MAX_NEW_TOKENS": "256", **os.environ}, return_stdout_stderr=True, ) diff --git a/test/srt/test_openai_server.py b/test/srt/test_openai_server.py index 87d85c0cd..d92a9de96 100644 --- a/test/srt/test_openai_server.py +++ b/test/srt/test_openai_server.py @@ -445,7 +445,7 @@ class TestOpenAIServer(unittest.TestCase): for mode in ["completion", "chat"]: self.run_batch(mode) - def test_calcel_batch(self): + def test_cancel_batch(self): for mode in ["completion", "chat"]: self.run_cancel_batch(mode) @@ -495,6 +495,37 @@ class TestOpenAIServer(unittest.TestCase): text = response.choices[0].message.content assert isinstance(text, str) + def test_response_prefill(self): + client = openai.Client(api_key=self.api_key, base_url=self.base_url) + + response = client.chat.completions.create( + model="meta-llama/Meta-Llama-3.1-8B-Instruct", + messages=[ + {"role": "system", "content": "You are a helpful AI assistant"}, + { + "role": "user", + "content": """ +Extract the name, size, price, and color from this product description as a JSON object: + + +The SmartHome Mini is a compact smart home assistant available in black or white for only $49.99. At just 5 inches wide, it lets you control lights, thermostats, and other connected devices via voice or app—no matter where you place it in your home. This affordable little hub brings convenient hands-free control to your smart devices. + +""", + }, + { + "role": "assistant", + "content": "{\n", + }, + ], + temperature=0, + ) + + assert ( + response.choices[0] + .message.content.strip() + .startswith('"name": "SmartHome Mini",') + ) + if __name__ == "__main__": unittest.main()