Feat: Implement JSON Mode (response_format.type="json_object") (#4733)
Co-authored-by: Kyle Pena <kylepena@kyles-macbook-pro.turkey-marlin.ts.net>
This commit is contained in:
@@ -158,6 +158,7 @@ class XGrammarGrammarBackend(BaseGrammarBackend):
|
|||||||
def dispatch_json(self, key_string: str) -> Optional[XGrammarGrammar]:
|
def dispatch_json(self, key_string: str) -> Optional[XGrammarGrammar]:
|
||||||
try:
|
try:
|
||||||
if key_string == "$$ANY$$":
|
if key_string == "$$ANY$$":
|
||||||
|
# Note: This builtin JSON grammar includes *all* valid JSON (including, for example, arrays at the root)
|
||||||
ctx = self.grammar_compiler.compile_builtin_json_grammar()
|
ctx = self.grammar_compiler.compile_builtin_json_grammar()
|
||||||
else:
|
else:
|
||||||
ctx = self.grammar_compiler.compile_json_schema(schema=key_string)
|
ctx = self.grammar_compiler.compile_json_schema(schema=key_string)
|
||||||
|
|||||||
@@ -1105,6 +1105,8 @@ def v1_chat_generate_request(
|
|||||||
sampling_params["json_schema"] = convert_json_schema_to_str(
|
sampling_params["json_schema"] = convert_json_schema_to_str(
|
||||||
request.response_format.json_schema.schema_
|
request.response_format.json_schema.schema_
|
||||||
)
|
)
|
||||||
|
elif request.response_format and request.response_format.type == "json_object":
|
||||||
|
sampling_params["json_schema"] = '{"type": "object"}'
|
||||||
elif (
|
elif (
|
||||||
request.response_format and request.response_format.type == "structural_tag"
|
request.response_format and request.response_format.type == "structural_tag"
|
||||||
):
|
):
|
||||||
|
|||||||
137
test/srt/test_json_mode.py
Normal file
137
test/srt/test_json_mode.py
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
"""
|
||||||
|
python3 -m unittest test_json_mode.TestJSONModeOutlines.test_json_mode_response
|
||||||
|
python3 -m unittest test_json_mode.TestJSONModeOutlines.test_json_mode_with_streaming
|
||||||
|
|
||||||
|
python3 -m unittest test_json_mode.TestJSONModeXGrammar.test_json_mode_response
|
||||||
|
python3 -m unittest test_json_mode.TestJSONModeXGrammar.test_json_mode_with_streaming
|
||||||
|
|
||||||
|
python3 -m unittest test_json_mode.TestJSONModeLLGuidance.test_json_mode_response
|
||||||
|
python3 -m unittest test_json_mode.TestJSONModeLLGuidance.test_json_mode_with_streaming
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
import openai
|
||||||
|
|
||||||
|
from sglang.srt.utils import kill_process_tree
|
||||||
|
from sglang.test.test_utils import (
|
||||||
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
popen_launch_server,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def setup_class(cls, backend):
|
||||||
|
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
||||||
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
|
|
||||||
|
other_args = [
|
||||||
|
"--max-running-requests",
|
||||||
|
"10",
|
||||||
|
"--grammar-backend",
|
||||||
|
backend,
|
||||||
|
]
|
||||||
|
|
||||||
|
cls.process = popen_launch_server(
|
||||||
|
cls.model,
|
||||||
|
cls.base_url,
|
||||||
|
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
other_args=other_args,
|
||||||
|
)
|
||||||
|
cls.client = openai.Client(api_key="EMPTY", base_url=f"{cls.base_url}/v1")
|
||||||
|
|
||||||
|
|
||||||
|
class TestJSONModeOutlines(unittest.TestCase):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
setup_class(cls, "outlines")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def tearDownClass(cls):
|
||||||
|
kill_process_tree(cls.process.pid)
|
||||||
|
|
||||||
|
def test_json_mode_response(self):
|
||||||
|
"""Test that response_format json_object (also known as "json mode") produces valid JSON, even without a system prompt that mentions JSON."""
|
||||||
|
response = self.client.chat.completions.create(
|
||||||
|
model=self.model,
|
||||||
|
messages=[
|
||||||
|
# We are deliberately omitting "That produces JSON" or similar phrases from the assistant prompt so that we don't have misleading test results
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are a helpful AI assistant that gives a short answer.",
|
||||||
|
},
|
||||||
|
{"role": "user", "content": "What is the capital of Bulgaria?"},
|
||||||
|
],
|
||||||
|
temperature=0,
|
||||||
|
max_tokens=128,
|
||||||
|
response_format={"type": "json_object"},
|
||||||
|
)
|
||||||
|
text = response.choices[0].message.content
|
||||||
|
|
||||||
|
print(f"Response ({len(text)} characters): {text}")
|
||||||
|
|
||||||
|
# Verify the response is valid JSON
|
||||||
|
try:
|
||||||
|
js_obj = json.loads(text)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
self.fail(f"Response is not valid JSON. Error: {e}. Response: {text}")
|
||||||
|
|
||||||
|
# Verify it's actually an object (dict)
|
||||||
|
self.assertIsInstance(js_obj, dict, f"Response is not a JSON object: {text}")
|
||||||
|
|
||||||
|
def test_json_mode_with_streaming(self):
|
||||||
|
"""Test that streaming with json_object response (also known as "json mode") format works correctly, even without a system prompt that mentions JSON."""
|
||||||
|
stream = self.client.chat.completions.create(
|
||||||
|
model=self.model,
|
||||||
|
messages=[
|
||||||
|
# We are deliberately omitting "That produces JSON" or similar phrases from the assistant prompt so that we don't have misleading test results
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are a helpful AI assistant that gives a short answer.",
|
||||||
|
},
|
||||||
|
{"role": "user", "content": "What is the capital of Bulgaria?"},
|
||||||
|
],
|
||||||
|
temperature=0,
|
||||||
|
max_tokens=128,
|
||||||
|
response_format={"type": "json_object"},
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Collect all chunks
|
||||||
|
chunks = []
|
||||||
|
for chunk in stream:
|
||||||
|
if chunk.choices[0].delta.content is not None:
|
||||||
|
chunks.append(chunk.choices[0].delta.content)
|
||||||
|
full_response = "".join(chunks)
|
||||||
|
|
||||||
|
print(
|
||||||
|
f"Concatenated Response ({len(full_response)} characters): {full_response}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify the combined response is valid JSON
|
||||||
|
try:
|
||||||
|
js_obj = json.loads(full_response)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
self.fail(
|
||||||
|
f"Streamed response is not valid JSON. Error: {e}. Response: {full_response}"
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertIsInstance(js_obj, dict)
|
||||||
|
|
||||||
|
|
||||||
|
class TestJSONModeXGrammar(TestJSONModeOutlines):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
setup_class(cls, backend="xgrammar")
|
||||||
|
|
||||||
|
|
||||||
|
class TestJSONModeLLGuidance(TestJSONModeOutlines):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
setup_class(cls, backend="llguidance")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
Reference in New Issue
Block a user