Feat: Implement JSON Mode (response_format.type="json_object") (#4733)

Co-authored-by: Kyle Pena <kylepena@kyles-macbook-pro.turkey-marlin.ts.net>
2025-04-20 20:41:22 -04:00
parent 8de53da989
commit 9f3bd2ad39
3 changed files with 140 additions and 0 deletions
--- a/python/sglang/srt/constrained/xgrammar_backend.py
+++ b/python/sglang/srt/constrained/xgrammar_backend.py
@@ -158,6 +158,7 @@ class XGrammarGrammarBackend(BaseGrammarBackend):
    def dispatch_json(self, key_string: str) -> Optional[XGrammarGrammar]:
        try:
            if key_string == "$$ANY$$":
+                # Note: This builtin JSON grammar includes *all* valid JSON (including, for example, arrays at the root)
                ctx = self.grammar_compiler.compile_builtin_json_grammar()
            else:
                ctx = self.grammar_compiler.compile_json_schema(schema=key_string)
--- a/python/sglang/srt/openai_api/adapter.py
+++ b/python/sglang/srt/openai_api/adapter.py
@@ -1105,6 +1105,8 @@ def v1_chat_generate_request(
            sampling_params["json_schema"] = convert_json_schema_to_str(
                request.response_format.json_schema.schema_
            )
+        elif request.response_format and request.response_format.type == "json_object":
+            sampling_params["json_schema"] = '{"type": "object"}'
        elif (
            request.response_format and request.response_format.type == "structural_tag"
        ):
--- a/test/srt/test_json_mode.py
+++ b/test/srt/test_json_mode.py
@@ -0,0 +1,137 @@
+"""
+python3 -m unittest test_json_mode.TestJSONModeOutlines.test_json_mode_response
+python3 -m unittest test_json_mode.TestJSONModeOutlines.test_json_mode_with_streaming
+
+python3 -m unittest test_json_mode.TestJSONModeXGrammar.test_json_mode_response
+python3 -m unittest test_json_mode.TestJSONModeXGrammar.test_json_mode_with_streaming
+
+python3 -m unittest test_json_mode.TestJSONModeLLGuidance.test_json_mode_response
+python3 -m unittest test_json_mode.TestJSONModeLLGuidance.test_json_mode_with_streaming
+"""
+
+import json
+import unittest
+
+import openai
+
+from sglang.srt.utils import kill_process_tree
+from sglang.test.test_utils import (
+    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
+    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+    DEFAULT_URL_FOR_TEST,
+    popen_launch_server,
+)
+
+
+def setup_class(cls, backend):
+    cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
+    cls.base_url = DEFAULT_URL_FOR_TEST
+
+    other_args = [
+        "--max-running-requests",
+        "10",
+        "--grammar-backend",
+        backend,
+    ]
+
+    cls.process = popen_launch_server(
+        cls.model,
+        cls.base_url,
+        timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+        other_args=other_args,
+    )
+    cls.client = openai.Client(api_key="EMPTY", base_url=f"{cls.base_url}/v1")
+
+
+class TestJSONModeOutlines(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, "outlines")
+
+    @classmethod
+    def tearDownClass(cls):
+        kill_process_tree(cls.process.pid)
+
+    def test_json_mode_response(self):
+        """Test that response_format json_object (also known as "json mode") produces valid JSON, even without a system prompt that mentions JSON."""
+        response = self.client.chat.completions.create(
+            model=self.model,
+            messages=[
+                # We are deliberately omitting "That produces JSON" or similar phrases from the assistant prompt so that we don't have misleading test results
+                {
+                    "role": "system",
+                    "content": "You are a helpful AI assistant that gives a short answer.",
+                },
+                {"role": "user", "content": "What is the capital of Bulgaria?"},
+            ],
+            temperature=0,
+            max_tokens=128,
+            response_format={"type": "json_object"},
+        )
+        text = response.choices[0].message.content
+
+        print(f"Response ({len(text)} characters): {text}")
+
+        # Verify the response is valid JSON
+        try:
+            js_obj = json.loads(text)
+        except json.JSONDecodeError as e:
+            self.fail(f"Response is not valid JSON. Error: {e}. Response: {text}")
+
+        # Verify it's actually an object (dict)
+        self.assertIsInstance(js_obj, dict, f"Response is not a JSON object: {text}")
+
+    def test_json_mode_with_streaming(self):
+        """Test that streaming with json_object response (also known as "json mode") format works correctly, even without a system prompt that mentions JSON."""
+        stream = self.client.chat.completions.create(
+            model=self.model,
+            messages=[
+                # We are deliberately omitting "That produces JSON" or similar phrases from the assistant prompt so that we don't have misleading test results
+                {
+                    "role": "system",
+                    "content": "You are a helpful AI assistant that gives a short answer.",
+                },
+                {"role": "user", "content": "What is the capital of Bulgaria?"},
+            ],
+            temperature=0,
+            max_tokens=128,
+            response_format={"type": "json_object"},
+            stream=True,
+        )
+
+        # Collect all chunks
+        chunks = []
+        for chunk in stream:
+            if chunk.choices[0].delta.content is not None:
+                chunks.append(chunk.choices[0].delta.content)
+        full_response = "".join(chunks)
+
+        print(
+            f"Concatenated Response ({len(full_response)} characters): {full_response}"
+        )
+
+        # Verify the combined response is valid JSON
+        try:
+            js_obj = json.loads(full_response)
+        except json.JSONDecodeError as e:
+            self.fail(
+                f"Streamed response is not valid JSON. Error: {e}. Response: {full_response}"
+            )
+
+        self.assertIsInstance(js_obj, dict)
+
+
+class TestJSONModeXGrammar(TestJSONModeOutlines):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, backend="xgrammar")
+
+
+class TestJSONModeLLGuidance(TestJSONModeOutlines):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, backend="llguidance")
+
+
+if __name__ == "__main__":
+    unittest.main()