sglang/test/srt/openai_server/features/test_json_mode.py

"""
python3 -m unittest openai_server.features.test_json_mode.TestJSONModeOutlines.test_json_mode_response
python3 -m unittest openai_server.features.test_json_mode.TestJSONModeOutlines.test_json_mode_with_streaming

python3 -m unittest openai_server.features.test_json_mode.TestJSONModeXGrammar.test_json_mode_response
python3 -m unittest openai_server.features.test_json_mode.TestJSONModeXGrammar.test_json_mode_with_streaming

python3 -m unittest openai_server.features.test_json_mode.TestJSONModeLLGuidance.test_json_mode_response
python3 -m unittest openai_server.features.test_json_mode.TestJSONModeLLGuidance.test_json_mode_with_streaming
"""

import json
import unittest

import openai

from sglang.srt.utils import kill_process_tree
from sglang.test.test_utils import (
    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
    popen_launch_server,
)


def setup_class(cls, backend):
    cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
    cls.base_url = DEFAULT_URL_FOR_TEST

    other_args = [
        "--max-running-requests",
        "10",
        "--grammar-backend",
        backend,
    ]

    cls.process = popen_launch_server(
        cls.model,
        cls.base_url,
        timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
        other_args=other_args,
    )
    cls.client = openai.Client(api_key="EMPTY", base_url=f"{cls.base_url}/v1")


class TestJSONModeOutlines(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        setup_class(cls, "outlines")

    @classmethod
    def tearDownClass(cls):
        kill_process_tree(cls.process.pid)

    def test_json_mode_response(self):
        """Test that response_format json_object (also known as "json mode") produces valid JSON, even without a system prompt that mentions JSON."""
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[
                # We are deliberately omitting "That produces JSON" or similar phrases from the assistant prompt so that we don't have misleading test results
                {
                    "role": "system",
                    "content": "You are a helpful AI assistant that gives a short answer.",
                },
                {"role": "user", "content": "What is the capital of Bulgaria?"},
            ],
            temperature=0,
            max_tokens=128,
            response_format={"type": "json_object"},
        )
        text = response.choices[0].message.content

        print(f"Response ({len(text)} characters): {text}")

        # Verify the response is valid JSON
        try:
            js_obj = json.loads(text)
        except json.JSONDecodeError as e:
            self.fail(f"Response is not valid JSON. Error: {e}. Response: {text}")

        # Verify it's actually an object (dict)
        self.assertIsInstance(js_obj, dict, f"Response is not a JSON object: {text}")

    def test_json_mode_with_streaming(self):
        """Test that streaming with json_object response (also known as "json mode") format works correctly, even without a system prompt that mentions JSON."""
        stream = self.client.chat.completions.create(
            model=self.model,
            messages=[
                # We are deliberately omitting "That produces JSON" or similar phrases from the assistant prompt so that we don't have misleading test results
                {
                    "role": "system",
                    "content": "You are a helpful AI assistant that gives a short answer.",
                },
                {"role": "user", "content": "What is the capital of Bulgaria?"},
            ],
            temperature=0,
            max_tokens=128,
            response_format={"type": "json_object"},
            stream=True,
        )

        # Collect all chunks
        chunks = []
        for chunk in stream:
            if chunk.choices[0].delta.content is not None:
                chunks.append(chunk.choices[0].delta.content)
        full_response = "".join(chunks)

        print(
            f"Concatenated Response ({len(full_response)} characters): {full_response}"
        )

        # Verify the combined response is valid JSON
        try:
            js_obj = json.loads(full_response)
        except json.JSONDecodeError as e:
            self.fail(
                f"Streamed response is not valid JSON. Error: {e}. Response: {full_response}"
            )

        self.assertIsInstance(js_obj, dict)


class TestJSONModeXGrammar(TestJSONModeOutlines):
    @classmethod
    def setUpClass(cls):
        setup_class(cls, backend="xgrammar")


class TestJSONModeLLGuidance(TestJSONModeOutlines):
    @classmethod
    def setUpClass(cls):
        setup_class(cls, backend="llguidance")


if __name__ == "__main__":
    unittest.main()
Feat: Implement JSON Mode (response_format.type="json_object") (#4733) Co-authored-by: Kyle Pena <kylepena@kyles-macbook-pro.turkey-marlin.ts.net> 2025-04-20 20:41:22 -04:00			`"""`
refactor(test): reorganize OpenAI test file structure (#7408) 2025-06-21 19:37:48 -07:00			`python3 -m unittest openai_server.features.test_json_mode.TestJSONModeOutlines.test_json_mode_response`
			`python3 -m unittest openai_server.features.test_json_mode.TestJSONModeOutlines.test_json_mode_with_streaming`
Feat: Implement JSON Mode (response_format.type="json_object") (#4733) Co-authored-by: Kyle Pena <kylepena@kyles-macbook-pro.turkey-marlin.ts.net> 2025-04-20 20:41:22 -04:00
refactor(test): reorganize OpenAI test file structure (#7408) 2025-06-21 19:37:48 -07:00			`python3 -m unittest openai_server.features.test_json_mode.TestJSONModeXGrammar.test_json_mode_response`
			`python3 -m unittest openai_server.features.test_json_mode.TestJSONModeXGrammar.test_json_mode_with_streaming`
Feat: Implement JSON Mode (response_format.type="json_object") (#4733) Co-authored-by: Kyle Pena <kylepena@kyles-macbook-pro.turkey-marlin.ts.net> 2025-04-20 20:41:22 -04:00
refactor(test): reorganize OpenAI test file structure (#7408) 2025-06-21 19:37:48 -07:00			`python3 -m unittest openai_server.features.test_json_mode.TestJSONModeLLGuidance.test_json_mode_response`
			`python3 -m unittest openai_server.features.test_json_mode.TestJSONModeLLGuidance.test_json_mode_with_streaming`
Feat: Implement JSON Mode (response_format.type="json_object") (#4733) Co-authored-by: Kyle Pena <kylepena@kyles-macbook-pro.turkey-marlin.ts.net> 2025-04-20 20:41:22 -04:00			`"""`

			`import json`
			`import unittest`

			`import openai`

			`from sglang.srt.utils import kill_process_tree`
			`from sglang.test.test_utils import (`
			`DEFAULT_SMALL_MODEL_NAME_FOR_TEST,`
			`DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,`
			`DEFAULT_URL_FOR_TEST,`
			`popen_launch_server,`
			`)`


			`def setup_class(cls, backend):`
			`cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST`
			`cls.base_url = DEFAULT_URL_FOR_TEST`

			`other_args = [`
			`"--max-running-requests",`
			`"10",`
			`"--grammar-backend",`
			`backend,`
			`]`

			`cls.process = popen_launch_server(`
			`cls.model,`
			`cls.base_url,`
			`timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,`
			`other_args=other_args,`
			`)`
			`cls.client = openai.Client(api_key="EMPTY", base_url=f"{cls.base_url}/v1")`


			`class TestJSONModeOutlines(unittest.TestCase):`
			`@classmethod`
			`def setUpClass(cls):`
			`setup_class(cls, "outlines")`

			`@classmethod`
			`def tearDownClass(cls):`
			`kill_process_tree(cls.process.pid)`

			`def test_json_mode_response(self):`
			`"""Test that response_format json_object (also known as "json mode") produces valid JSON, even without a system prompt that mentions JSON."""`
			`response = self.client.chat.completions.create(`
			`model=self.model,`
			`messages=[`
			`# We are deliberately omitting "That produces JSON" or similar phrases from the assistant prompt so that we don't have misleading test results`
			`{`
			`"role": "system",`
			`"content": "You are a helpful AI assistant that gives a short answer.",`
			`},`
			`{"role": "user", "content": "What is the capital of Bulgaria?"},`
			`],`
			`temperature=0,`
			`max_tokens=128,`
			`response_format={"type": "json_object"},`
			`)`
			`text = response.choices[0].message.content`

			`print(f"Response ({len(text)} characters): {text}")`

			`# Verify the response is valid JSON`
			`try:`
			`js_obj = json.loads(text)`
			`except json.JSONDecodeError as e:`
			`self.fail(f"Response is not valid JSON. Error: {e}. Response: {text}")`

			`# Verify it's actually an object (dict)`
			`self.assertIsInstance(js_obj, dict, f"Response is not a JSON object: {text}")`

			`def test_json_mode_with_streaming(self):`
			`"""Test that streaming with json_object response (also known as "json mode") format works correctly, even without a system prompt that mentions JSON."""`
			`stream = self.client.chat.completions.create(`
			`model=self.model,`
			`messages=[`
			`# We are deliberately omitting "That produces JSON" or similar phrases from the assistant prompt so that we don't have misleading test results`
			`{`
			`"role": "system",`
			`"content": "You are a helpful AI assistant that gives a short answer.",`
			`},`
			`{"role": "user", "content": "What is the capital of Bulgaria?"},`
			`],`
			`temperature=0,`
			`max_tokens=128,`
			`response_format={"type": "json_object"},`
			`stream=True,`
			`)`

			`# Collect all chunks`
			`chunks = []`
			`for chunk in stream:`
			`if chunk.choices[0].delta.content is not None:`
			`chunks.append(chunk.choices[0].delta.content)`
			`full_response = "".join(chunks)`

			`print(`
			`f"Concatenated Response ({len(full_response)} characters): {full_response}"`
			`)`

			`# Verify the combined response is valid JSON`
			`try:`
			`js_obj = json.loads(full_response)`
			`except json.JSONDecodeError as e:`
			`self.fail(`
			`f"Streamed response is not valid JSON. Error: {e}. Response: {full_response}"`
			`)`

			`self.assertIsInstance(js_obj, dict)`


			`class TestJSONModeXGrammar(TestJSONModeOutlines):`
			`@classmethod`
			`def setUpClass(cls):`
			`setup_class(cls, backend="xgrammar")`


			`class TestJSONModeLLGuidance(TestJSONModeOutlines):`
			`@classmethod`
			`def setUpClass(cls):`
			`setup_class(cls, backend="llguidance")`


			`if __name__ == "__main__":`
			`unittest.main()`