[API, Feature] Support response prefill for openai API (#1490)

2024-09-22 06:46:17 -07:00
parent 39bb49d156
commit e4780cf839
4 changed files with 74 additions and 2 deletions
--- a/examples/runtime/openai_chat_with_response_prefill.py
+++ b/examples/runtime/openai_chat_with_response_prefill.py
@@ -0,0 +1,34 @@
+"""
+Usage:
+python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --port 30000
+python openai_chat.py
+"""
+
+import openai
+from openai import OpenAI
+
+client = openai.Client(base_url="http://127.0.0.1:30000/v1", api_key="EMPTY")
+
+response = client.chat.completions.create(
+    model="meta-llama/Meta-Llama-3.1-8B-Instruct",
+    messages=[
+        {"role": "system", "content": "You are a helpful AI assistant"},
+        {
+            "role": "user",
+            "content": """
+Extract the name, size, price, and color from this product description as a JSON object:
+
+<description>
+The SmartHome Mini is a compact smart home assistant available in black or white for only $49.99. At just 5 inches wide, it lets you control lights, thermostats, and other connected devices via voice or app—no matter where you place it in your home. This affordable little hub brings convenient hands-free control to your smart devices.
+</description>
+""",
+        },
+        {
+            "role": "assistant",
+            "content": "{\n",
+        },
+    ],
+    temperature=0,
+)
+
+print(response.choices[0].message.content)
--- a/python/sglang/srt/openai_api/adapter.py
+++ b/python/sglang/srt/openai_api/adapter.py
@@ -858,11 +858,18 @@ def v1_chat_generate_request(
                                openai_compatible_messages.append(
                                    {"role": message.role, "content": content["text"]}
                                )
+                if openai_compatible_messages[-1]["role"] == "assistant":
+                    assistant_prefix = openai_compatible_messages[-1]["content"]
+                    openai_compatible_messages = openai_compatible_messages[:-1]
+                else:
+                    assistant_prefix = None
                prompt_ids = tokenizer_manager.tokenizer.apply_chat_template(
                    openai_compatible_messages,
                    tokenize=True,
                    add_generation_prompt=True,
                )
+                if assistant_prefix:
+                    prompt_ids += tokenizer_manager.tokenizer.encode(assistant_prefix)
                stop = request.stop
                image_data = None
                modalities = []
--- a/test/srt/test_large_max_new_tokens.py
+++ b/test/srt/test_large_max_new_tokens.py
@@ -27,7 +27,7 @@ class TestOpenAIServer(unittest.TestCase):
            cls.base_url,
            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
            api_key=cls.api_key,
-            other_args=("--max-total-token", "1024"),
+            other_args=("--max-total-token", "1024", "--context-len", "8192"),
            env={"SGLANG_CLIP_MAX_NEW_TOKENS": "256", **os.environ},
            return_stdout_stderr=True,
        )
--- a/test/srt/test_openai_server.py
+++ b/test/srt/test_openai_server.py
@@ -445,7 +445,7 @@ class TestOpenAIServer(unittest.TestCase):
        for mode in ["completion", "chat"]:
            self.run_batch(mode)

-    def test_calcel_batch(self):
+    def test_cancel_batch(self):
        for mode in ["completion", "chat"]:
            self.run_cancel_batch(mode)

@@ -495,6 +495,37 @@ class TestOpenAIServer(unittest.TestCase):
        text = response.choices[0].message.content
        assert isinstance(text, str)

+    def test_response_prefill(self):
+        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
+
+        response = client.chat.completions.create(
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct",
+            messages=[
+                {"role": "system", "content": "You are a helpful AI assistant"},
+                {
+                    "role": "user",
+                    "content": """
+Extract the name, size, price, and color from this product description as a JSON object:
+
+<description>
+The SmartHome Mini is a compact smart home assistant available in black or white for only $49.99. At just 5 inches wide, it lets you control lights, thermostats, and other connected devices via voice or app—no matter where you place it in your home. This affordable little hub brings convenient hands-free control to your smart devices.
+</description>
+""",
+                },
+                {
+                    "role": "assistant",
+                    "content": "{\n",
+                },
+            ],
+            temperature=0,
+        )
+
+        assert (
+            response.choices[0]
+            .message.content.strip()
+            .startswith('"name": "SmartHome Mini",')
+        )
+

 if __name__ == "__main__":
    unittest.main()