Files
sglang/test/srt/openai_server/features/test_enable_thinking.py
2025-07-27 23:35:20 -07:00

244 lines
8.9 KiB
Python

"""
Usage:
python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_chat_completion_with_reasoning
python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_chat_completion_without_reasoning
python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_stream_chat_completion_with_reasoning
python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_stream_chat_completion_without_reasoning
"""
import asyncio
import json
import os
import sys
import time
import unittest
import openai
import requests
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils import kill_process_tree
from sglang.test.test_utils import (
DEFAULT_ENABLE_THINKING_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
class TestEnableThinking(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_ENABLE_THINKING_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-1234"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
other_args=[
"--reasoning-parser",
"qwen3",
],
)
cls.additional_chat_kwargs = {}
@classmethod
def tearDownClass(cls):
kill_process_tree(cls.process.pid)
def test_chat_completion_with_reasoning(self):
# Test non-streaming with "enable_thinking": True, reasoning_content should not be empty
client = requests.post(
f"{self.base_url}/v1/chat/completions",
headers={"Authorization": f"Bearer {self.api_key}"},
json={
"model": self.model,
"messages": [{"role": "user", "content": "Hello"}],
"temperature": 0,
"separate_reasoning": True,
"chat_template_kwargs": {"enable_thinking": True},
**self.additional_chat_kwargs,
},
)
self.assertEqual(client.status_code, 200, f"Failed with: {client.text}")
data = client.json()
self.assertIn("choices", data)
self.assertTrue(len(data["choices"]) > 0)
self.assertIn("message", data["choices"][0])
self.assertIn("reasoning_content", data["choices"][0]["message"])
self.assertIsNotNone(data["choices"][0]["message"]["reasoning_content"])
def test_chat_completion_without_reasoning(self):
# Test non-streaming with "enable_thinking": False, reasoning_content should be empty
client = requests.post(
f"{self.base_url}/v1/chat/completions",
headers={"Authorization": f"Bearer {self.api_key}"},
json={
"model": self.model,
"messages": [{"role": "user", "content": "Hello"}],
"temperature": 0,
"separate_reasoning": True,
"chat_template_kwargs": {"enable_thinking": False},
**self.additional_chat_kwargs,
},
)
self.assertEqual(client.status_code, 200, f"Failed with: {client.text}")
data = client.json()
self.assertIn("choices", data)
self.assertTrue(len(data["choices"]) > 0)
self.assertIn("message", data["choices"][0])
if "reasoning_content" in data["choices"][0]["message"]:
self.assertIsNone(data["choices"][0]["message"]["reasoning_content"])
def test_stream_chat_completion_with_reasoning(self):
# Test streaming with "enable_thinking": True, reasoning_content should not be empty
response = requests.post(
f"{self.base_url}/v1/chat/completions",
headers={"Authorization": f"Bearer {self.api_key}"},
json={
"model": self.model,
"messages": [{"role": "user", "content": "Hello"}],
"temperature": 0,
"separate_reasoning": True,
"stream": True,
"chat_template_kwargs": {"enable_thinking": True},
**self.additional_chat_kwargs,
},
stream=True,
)
self.assertEqual(response.status_code, 200, f"Failed with: {response.text}")
has_reasoning = False
has_content = False
print("\n=== Stream With Reasoning ===")
for line in response.iter_lines():
if line:
line = line.decode("utf-8")
if line.startswith("data:") and not line.startswith("data: [DONE]"):
data = json.loads(line[6:])
if "choices" in data and len(data["choices"]) > 0:
delta = data["choices"][0].get("delta", {})
if "reasoning_content" in delta and delta["reasoning_content"]:
has_reasoning = True
if "content" in delta and delta["content"]:
has_content = True
self.assertTrue(
has_reasoning,
"The reasoning content is not included in the stream response",
)
self.assertTrue(
has_content, "The stream response does not contain normal content"
)
def test_stream_chat_completion_without_reasoning(self):
# Test streaming with "enable_thinking": False, reasoning_content should be empty
response = requests.post(
f"{self.base_url}/v1/chat/completions",
headers={"Authorization": f"Bearer {self.api_key}"},
json={
"model": self.model,
"messages": [{"role": "user", "content": "Hello"}],
"temperature": 0,
"separate_reasoning": True,
"stream": True,
"chat_template_kwargs": {"enable_thinking": False},
**self.additional_chat_kwargs,
},
stream=True,
)
self.assertEqual(response.status_code, 200, f"Failed with: {response.text}")
has_reasoning = False
has_content = False
print("\n=== Stream Without Reasoning ===")
for line in response.iter_lines():
if line:
line = line.decode("utf-8")
if line.startswith("data:") and not line.startswith("data: [DONE]"):
data = json.loads(line[6:])
if "choices" in data and len(data["choices"]) > 0:
delta = data["choices"][0].get("delta", {})
if "reasoning_content" in delta and delta["reasoning_content"]:
has_reasoning = True
if "content" in delta and delta["content"]:
has_content = True
self.assertFalse(
has_reasoning,
"The reasoning content should not be included in the stream response",
)
self.assertTrue(
has_content, "The stream response does not contain normal content"
)
# Skip for ci test
# class TestGLM45EnableThinking(TestEnableThinking):
# @classmethod
# def setUpClass(cls):
# # Replace with the model name needed for testing; if not required, reuse DEFAULT_SMALL_MODEL_NAME_FOR_TEST
# cls.model = "THUDM/GLM-4.5"
# cls.base_url = DEFAULT_URL_FOR_TEST
# cls.api_key = "sk-1234"
# cls.process = popen_launch_server(
# cls.model,
# cls.base_url,
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
# api_key=cls.api_key,
# other_args=[
# "--tool-call-parser",
# "glm45",
# "--reasoning-parser",
# "glm45",
# "--tp-size",
# "8"
# ],
# )
# # Validate whether enable-thinking conflict with tool_calls
# cls.additional_chat_kwargs = {
# "tools": [
# {
# "type": "function",
# "function": {
# "name": "add",
# "description": "Compute the sum of two numbers",
# "parameters": {
# "type": "object",
# "properties": {
# "a": {
# "type": "int",
# "description": "A number",
# },
# "b": {
# "type": "int",
# "description": "A number",
# },
# },
# "required": ["a", "b"],
# },
# },
# }
# ]
# }
if __name__ == "__main__":
unittest.main()