diff --git a/python/sglang/srt/constrained/xgrammar_backend.py b/python/sglang/srt/constrained/xgrammar_backend.py index 5e15bc744..400ab421f 100644 --- a/python/sglang/srt/constrained/xgrammar_backend.py +++ b/python/sglang/srt/constrained/xgrammar_backend.py @@ -158,6 +158,7 @@ class XGrammarGrammarBackend(BaseGrammarBackend): def dispatch_json(self, key_string: str) -> Optional[XGrammarGrammar]: try: if key_string == "$$ANY$$": + # Note: This builtin JSON grammar includes *all* valid JSON (including, for example, arrays at the root) ctx = self.grammar_compiler.compile_builtin_json_grammar() else: ctx = self.grammar_compiler.compile_json_schema(schema=key_string) diff --git a/python/sglang/srt/openai_api/adapter.py b/python/sglang/srt/openai_api/adapter.py index e1c120055..c9a3dbb92 100644 --- a/python/sglang/srt/openai_api/adapter.py +++ b/python/sglang/srt/openai_api/adapter.py @@ -1105,6 +1105,8 @@ def v1_chat_generate_request( sampling_params["json_schema"] = convert_json_schema_to_str( request.response_format.json_schema.schema_ ) + elif request.response_format and request.response_format.type == "json_object": + sampling_params["json_schema"] = '{"type": "object"}' elif ( request.response_format and request.response_format.type == "structural_tag" ): diff --git a/test/srt/test_json_mode.py b/test/srt/test_json_mode.py new file mode 100644 index 000000000..30dadcdab --- /dev/null +++ b/test/srt/test_json_mode.py @@ -0,0 +1,137 @@ +""" +python3 -m unittest test_json_mode.TestJSONModeOutlines.test_json_mode_response +python3 -m unittest test_json_mode.TestJSONModeOutlines.test_json_mode_with_streaming + +python3 -m unittest test_json_mode.TestJSONModeXGrammar.test_json_mode_response +python3 -m unittest test_json_mode.TestJSONModeXGrammar.test_json_mode_with_streaming + +python3 -m unittest test_json_mode.TestJSONModeLLGuidance.test_json_mode_response +python3 -m unittest test_json_mode.TestJSONModeLLGuidance.test_json_mode_with_streaming +""" + +import json +import unittest + +import openai + +from sglang.srt.utils import kill_process_tree +from sglang.test.test_utils import ( + DEFAULT_SMALL_MODEL_NAME_FOR_TEST, + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, + popen_launch_server, +) + + +def setup_class(cls, backend): + cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST + cls.base_url = DEFAULT_URL_FOR_TEST + + other_args = [ + "--max-running-requests", + "10", + "--grammar-backend", + backend, + ] + + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=other_args, + ) + cls.client = openai.Client(api_key="EMPTY", base_url=f"{cls.base_url}/v1") + + +class TestJSONModeOutlines(unittest.TestCase): + @classmethod + def setUpClass(cls): + setup_class(cls, "outlines") + + @classmethod + def tearDownClass(cls): + kill_process_tree(cls.process.pid) + + def test_json_mode_response(self): + """Test that response_format json_object (also known as "json mode") produces valid JSON, even without a system prompt that mentions JSON.""" + response = self.client.chat.completions.create( + model=self.model, + messages=[ + # We are deliberately omitting "That produces JSON" or similar phrases from the assistant prompt so that we don't have misleading test results + { + "role": "system", + "content": "You are a helpful AI assistant that gives a short answer.", + }, + {"role": "user", "content": "What is the capital of Bulgaria?"}, + ], + temperature=0, + max_tokens=128, + response_format={"type": "json_object"}, + ) + text = response.choices[0].message.content + + print(f"Response ({len(text)} characters): {text}") + + # Verify the response is valid JSON + try: + js_obj = json.loads(text) + except json.JSONDecodeError as e: + self.fail(f"Response is not valid JSON. Error: {e}. Response: {text}") + + # Verify it's actually an object (dict) + self.assertIsInstance(js_obj, dict, f"Response is not a JSON object: {text}") + + def test_json_mode_with_streaming(self): + """Test that streaming with json_object response (also known as "json mode") format works correctly, even without a system prompt that mentions JSON.""" + stream = self.client.chat.completions.create( + model=self.model, + messages=[ + # We are deliberately omitting "That produces JSON" or similar phrases from the assistant prompt so that we don't have misleading test results + { + "role": "system", + "content": "You are a helpful AI assistant that gives a short answer.", + }, + {"role": "user", "content": "What is the capital of Bulgaria?"}, + ], + temperature=0, + max_tokens=128, + response_format={"type": "json_object"}, + stream=True, + ) + + # Collect all chunks + chunks = [] + for chunk in stream: + if chunk.choices[0].delta.content is not None: + chunks.append(chunk.choices[0].delta.content) + full_response = "".join(chunks) + + print( + f"Concatenated Response ({len(full_response)} characters): {full_response}" + ) + + # Verify the combined response is valid JSON + try: + js_obj = json.loads(full_response) + except json.JSONDecodeError as e: + self.fail( + f"Streamed response is not valid JSON. Error: {e}. Response: {full_response}" + ) + + self.assertIsInstance(js_obj, dict) + + +class TestJSONModeXGrammar(TestJSONModeOutlines): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="xgrammar") + + +class TestJSONModeLLGuidance(TestJSONModeOutlines): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="llguidance") + + +if __name__ == "__main__": + unittest.main()