feat: add thinking_budget (#6089)

2025-05-09 23:22:09 +08:00
parent dff0ab92eb
commit 63484f9fd6
9 changed files with 196 additions and 5 deletions
--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -61,6 +61,7 @@ suites = {
        TestFile("test_radix_attention.py", 167),
        TestFile("test_reasoning_content.py", 89),
        TestFile("test_enable_thinking.py", 70),
+        TestFile("test_thinking_budget.py", 60),
        TestFile("test_regex_constrained.py", 64),
        TestFile("test_release_memory_occupation.py", 44),
        TestFile("test_request_length_validation.py", 31),
--- a/test/srt/test_thinking_budget.py
+++ b/test/srt/test_thinking_budget.py
@@ -0,0 +1,95 @@
+"""
+Usage:
+python3 -m unittest test_thinking_budget.TestThinkingBudget.test_chat_completion_with_thinking_budget_20
+python3 -m unittest test_thinking_budget.TestThinkingBudget.test_chat_completion_with_thinking_budget_200
+"""
+
+import unittest
+
+import requests
+from transformers import AutoTokenizer
+
+from sglang.srt.utils import kill_process_tree
+from sglang.test.test_utils import (
+    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
+    popen_launch_server,
+)
+
+
+class TestThinkingBudget(CustomTestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.model = "Qwen/Qwen3-8B"
+        cls.tokenizer = AutoTokenizer.from_pretrained(cls.model)
+        cls.base_url = DEFAULT_URL_FOR_TEST
+        cls.api_key = "sk-1234"
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+            api_key=cls.api_key,
+            other_args=[
+                "--reasoning-parser",
+                "qwen3",
+            ],
+        )
+
+    @classmethod
+    def tearDownClass(cls):
+        kill_process_tree(cls.process.pid)
+
+    def test_chat_completion_with_thinking_budget_20(self):
+        response = requests.post(
+            f"{self.base_url}/v1/chat/completions",
+            headers={"Authorization": f"Bearer {self.api_key}"},
+            json={
+                "model": self.model,
+                "messages": [
+                    {"role": "user", "content": "9.11 and 9.8, which is greater?"}
+                ],
+                "temperature": 0,
+                "separate_reasoning": True,
+                "chat_template_kwargs": {"enable_thinking": True},
+                "thinking_budget": 20,
+            },
+        )
+        self.assertEqual(response.status_code, 200, f"Failed with: {response.text}")
+        data = response.json()
+        reasoning_content = data["choices"][0]["message"]["reasoning_content"]
+        tokens = self.tokenizer.encode(reasoning_content)
+        self.assertEqual(
+            len(tokens),
+            20,
+            f"Reasoning content length: {len(tokens)} not equal to 20, tokens: {tokens}, reasoning_content: {reasoning_content}",
+        )
+
+    def test_chat_completion_with_thinking_budget_200(self):
+        response = requests.post(
+            f"{self.base_url}/v1/chat/completions",
+            headers={"Authorization": f"Bearer {self.api_key}"},
+            json={
+                "model": self.model,
+                "messages": [
+                    {"role": "user", "content": "9.11 and 9.8, which is greater?"}
+                ],
+                "temperature": 0,
+                "separate_reasoning": True,
+                "chat_template_kwargs": {"enable_thinking": True},
+                "thinking_budget": 200,
+            },
+        )
+        self.assertEqual(response.status_code, 200, f"Failed with: {response.text}")
+        data = response.json()
+        reasoning_content = data["choices"][0]["message"]["reasoning_content"]
+        tokens = self.tokenizer.encode(reasoning_content)
+        self.assertEqual(
+            len(tokens),
+            200,
+            f"Reasoning content length {len(tokens)} not equal to 200, tokens: {tokens}, reasoning_content: {reasoning_content}",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()