refactor(test): reorganize OpenAI test file structure (#7408)

2025-06-21 19:37:48 -07:00
parent 1998ce4046
commit b7a2df0a44
27 changed files with 350 additions and 294 deletions
--- a/test/srt/openai_server/features/test_enable_thinking.py
+++ b/test/srt/openai_server/features/test_enable_thinking.py
@@ -0,0 +1,188 @@
+"""
+Usage:
+python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_chat_completion_with_reasoning
+python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_chat_completion_without_reasoning
+python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_stream_chat_completion_with_reasoning
+python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_stream_chat_completion_without_reasoning
+"""
+
+import asyncio
+import json
+import os
+import sys
+import time
+import unittest
+
+import openai
+import requests
+
+from sglang.srt.hf_transformers_utils import get_tokenizer
+from sglang.srt.utils import kill_process_tree
+from sglang.test.test_utils import (
+    DEFAULT_ENABLE_THINKING_MODEL_NAME_FOR_TEST,
+    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
+    popen_launch_server,
+)
+
+
+class TestEnableThinking(CustomTestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.model = DEFAULT_ENABLE_THINKING_MODEL_NAME_FOR_TEST
+        cls.base_url = DEFAULT_URL_FOR_TEST
+        cls.api_key = "sk-1234"
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+            api_key=cls.api_key,
+            other_args=[
+                "--reasoning-parser",
+                "qwen3",
+            ],
+        )
+
+    @classmethod
+    def tearDownClass(cls):
+        kill_process_tree(cls.process.pid)
+
+    def test_chat_completion_with_reasoning(self):
+        # Test non-streaming with "enable_thinking": True, reasoning_content should not be empty
+        client = requests.post(
+            f"{self.base_url}/v1/chat/completions",
+            headers={"Authorization": f"Bearer {self.api_key}"},
+            json={
+                "model": self.model,
+                "messages": [{"role": "user", "content": "Hello"}],
+                "temperature": 0,
+                "separate_reasoning": True,
+                "chat_template_kwargs": {"enable_thinking": True},
+            },
+        )
+
+        self.assertEqual(client.status_code, 200, f"Failed with: {client.text}")
+        data = client.json()
+
+        self.assertIn("choices", data)
+        self.assertTrue(len(data["choices"]) > 0)
+        self.assertIn("message", data["choices"][0])
+        self.assertIn("reasoning_content", data["choices"][0]["message"])
+        self.assertIsNotNone(data["choices"][0]["message"]["reasoning_content"])
+
+    def test_chat_completion_without_reasoning(self):
+        # Test non-streaming with "enable_thinking": False, reasoning_content should be empty
+        client = requests.post(
+            f"{self.base_url}/v1/chat/completions",
+            headers={"Authorization": f"Bearer {self.api_key}"},
+            json={
+                "model": self.model,
+                "messages": [{"role": "user", "content": "Hello"}],
+                "temperature": 0,
+                "separate_reasoning": True,
+                "chat_template_kwargs": {"enable_thinking": False},
+            },
+        )
+
+        self.assertEqual(client.status_code, 200, f"Failed with: {client.text}")
+        data = client.json()
+
+        self.assertIn("choices", data)
+        self.assertTrue(len(data["choices"]) > 0)
+        self.assertIn("message", data["choices"][0])
+
+        if "reasoning_content" in data["choices"][0]["message"]:
+            self.assertIsNone(data["choices"][0]["message"]["reasoning_content"])
+
+    def test_stream_chat_completion_with_reasoning(self):
+        # Test streaming with "enable_thinking": True, reasoning_content should not be empty
+        response = requests.post(
+            f"{self.base_url}/v1/chat/completions",
+            headers={"Authorization": f"Bearer {self.api_key}"},
+            json={
+                "model": self.model,
+                "messages": [{"role": "user", "content": "Hello"}],
+                "temperature": 0,
+                "separate_reasoning": True,
+                "stream": True,
+                "chat_template_kwargs": {"enable_thinking": True},
+            },
+            stream=True,
+        )
+
+        self.assertEqual(response.status_code, 200, f"Failed with: {response.text}")
+
+        has_reasoning = False
+        has_content = False
+
+        print("\n=== Stream With Reasoning ===")
+        for line in response.iter_lines():
+            if line:
+                line = line.decode("utf-8")
+                if line.startswith("data:") and not line.startswith("data: [DONE]"):
+                    data = json.loads(line[6:])
+                    if "choices" in data and len(data["choices"]) > 0:
+                        delta = data["choices"][0].get("delta", {})
+
+                        if "reasoning_content" in delta and delta["reasoning_content"]:
+                            has_reasoning = True
+
+                        if "content" in delta and delta["content"]:
+                            has_content = True
+
+        self.assertTrue(
+            has_reasoning,
+            "The reasoning content is not included in the stream response",
+        )
+        self.assertTrue(
+            has_content, "The stream response does not contain normal content"
+        )
+
+    def test_stream_chat_completion_without_reasoning(self):
+        # Test streaming with "enable_thinking": False, reasoning_content should  be empty
+        response = requests.post(
+            f"{self.base_url}/v1/chat/completions",
+            headers={"Authorization": f"Bearer {self.api_key}"},
+            json={
+                "model": self.model,
+                "messages": [{"role": "user", "content": "Hello"}],
+                "temperature": 0,
+                "separate_reasoning": True,
+                "stream": True,
+                "chat_template_kwargs": {"enable_thinking": False},
+            },
+            stream=True,
+        )
+
+        self.assertEqual(response.status_code, 200, f"Failed with: {response.text}")
+
+        has_reasoning = False
+        has_content = False
+
+        print("\n=== Stream Without Reasoning ===")
+        for line in response.iter_lines():
+            if line:
+                line = line.decode("utf-8")
+                if line.startswith("data:") and not line.startswith("data: [DONE]"):
+                    data = json.loads(line[6:])
+                    if "choices" in data and len(data["choices"]) > 0:
+                        delta = data["choices"][0].get("delta", {})
+
+                        if "reasoning_content" in delta and delta["reasoning_content"]:
+                            has_reasoning = True
+
+                        if "content" in delta and delta["content"]:
+                            has_content = True
+
+        self.assertFalse(
+            has_reasoning,
+            "The reasoning content should not be included in the stream response",
+        )
+        self.assertTrue(
+            has_content, "The stream response does not contain normal content"
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()