[API, Feature] Support response prefill for openai API (#1490)

2024-09-22 06:46:17 -07:00
parent 39bb49d156
commit e4780cf839
4 changed files with 74 additions and 2 deletions
--- a/python/sglang/srt/openai_api/adapter.py
+++ b/python/sglang/srt/openai_api/adapter.py
@@ -858,11 +858,18 @@ def v1_chat_generate_request(
                                openai_compatible_messages.append(
                                    {"role": message.role, "content": content["text"]}
                                )
+                if openai_compatible_messages[-1]["role"] == "assistant":
+                    assistant_prefix = openai_compatible_messages[-1]["content"]
+                    openai_compatible_messages = openai_compatible_messages[:-1]
+                else:
+                    assistant_prefix = None
                prompt_ids = tokenizer_manager.tokenizer.apply_chat_template(
                    openai_compatible_messages,
                    tokenize=True,
                    add_generation_prompt=True,
                )
+                if assistant_prefix:
+                    prompt_ids += tokenizer_manager.tokenizer.encode(assistant_prefix)
                stop = request.stop
                image_data = None
                modalities = []