[Feature] Prefill assistant response - add continue_final_message parameter (#4226)

Co-authored-by: Chayenne <zhaochen20@outlook.com>
This commit is contained in:
Adarsh Shirawalmath
2025-04-21 06:07:18 +05:30
committed by GitHub
parent 5156d5a413
commit 8b39274e34
6 changed files with 82 additions and 23 deletions

View File

@@ -1,33 +1,53 @@
"""
Usage:
python -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --port 30000
python openai_chat.py
1) Launch the server in one terminal:
python -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --port 30000
2) Run this script in another terminal:
python openai_chat_with_response_prefill.py
This example demonstrates two chat completion calls:
- One with continue_final_message enabled (the final assistant message is used as a prefill).
- One without continue_final_message (the final assistant message remains, starting a new turn).
"""
import openai
client = openai.Client(base_url="http://127.0.0.1:30000/v1", api_key="EMPTY")
response = client.chat.completions.create(
model="meta-llama/Llama-3.1-8B-Instruct",
messages=[
{"role": "system", "content": "You are a helpful AI assistant"},
{
"role": "user",
"content": """
messages = [
{"role": "system", "content": "You are a helpful AI assistant."},
{
"role": "user",
"content": """
Extract the name, size, price, and color from this product description as a JSON object:
<description>
The SmartHome Mini is a compact smart home assistant available in black or white for only $49.99. At just 5 inches wide, it lets you control lights, thermostats, and other connected devices via voice or app—no matter where you place it in your home. This affordable little hub brings convenient hands-free control to your smart devices.
The SmartHome Mini is a compact smart home assistant available in black or white for only $49.99.
At just 5 inches wide, it lets you control lights, thermostats, and other connected devices via voice or app—
no matter where you place it in your home.
This affordable little hub brings convenient hands-free control to your smart devices.
</description>
""",
},
{
"role": "assistant",
"content": "{\n",
},
],
},
{"role": "assistant", "content": "{\n"},
]
# Calling the API with continue_final_message enabled.
print("=== Prefill with continue_final_messagem ===")
response_with = client.chat.completions.create(
model="meta-llama/Llama-3.1-8B-Instruct",
messages=messages,
temperature=0,
extra_body={"continue_final_message": True},
)
print(response_with.choices[0].message.content)
# Calling the API without continue_final_message (using default behavior).
print("\n=== Prefill without continue_final_message ===")
response_without = client.chat.completions.create(
model="meta-llama/Llama-3.1-8B-Instruct",
messages=messages,
temperature=0,
)
print(response.choices[0].message.content)
print(response_without.choices[0].message.content)