Fix constrained decoding (#1634)

2024-10-11 06:26:20 -07:00
parent 81c3327402
commit 5d09ca5735
2 changed files with 22 additions and 4 deletions
--- a/python/sglang/srt/managers/schedule_batch.py
+++ b/python/sglang/srt/managers/schedule_batch.py
@@ -810,6 +810,8 @@ class ScheduleBatch:
            self.sampling_info.regex_fsm_states = [
                req.regex_fsm_state for req in self.reqs
            ]
        else:
            self.sampling_info.regex_fsms = None
        return ModelWorkerBatch(
            forward_mode=self.forward_mode,
--- a/test/srt/test_json_constrained.py
+++ b/test/srt/test_json_constrained.py
@@ -1,5 +1,6 @@
 import json
 import unittest
 from concurrent.futures import ThreadPoolExecutor
 import openai
 import requests
@@ -27,13 +28,18 @@ class TestJSONConstrained(unittest.TestCase):
                "required": ["name", "population"],
            }
        )
-        cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300)
+        cls.process = popen_launch_server(
            cls.model,
            cls.base_url,
            timeout=300,
            other_args=["--max-running-requests", "10"],
        )
    @classmethod
    def tearDownClass(cls):
        kill_child_process(cls.process.pid)
-    def run_decode(self, return_logprob=False, top_logprobs_num=0, n=1):
+    def run_decode(self, json_schema, return_logprob=False, top_logprobs_num=0, n=1):
        response = requests.post(
            self.base_url + "/generate",
            json={
@@ -43,7 +49,7 @@ class TestJSONConstrained(unittest.TestCase):
                    "max_new_tokens": 128,
                    "n": n,
                    "stop_token_ids": [119690],
-                    "json_schema": self.json_schema,
+                    "json_schema": json_schema,
                },
                "stream": False,
                "return_logprob": return_logprob,
@@ -53,6 +59,10 @@ class TestJSONConstrained(unittest.TestCase):
        )
        print(json.dumps(response.json()))
        print("=" * 100)
        if not json_schema:
            return
        try:
            js_obj = json.loads(response.json()["text"])
        except (TypeError, json.decoder.JSONDecodeError):
@@ -61,7 +71,7 @@ class TestJSONConstrained(unittest.TestCase):
        assert isinstance(js_obj["population"], int)
    def test_json_generate(self):
-        self.run_decode()
+        self.run_decode(json_schema=self.json_schema)
    def test_json_openai(self):
        client = openai.Client(api_key="EMPTY", base_url=f"{self.base_url}/v1")
@@ -89,6 +99,12 @@ class TestJSONConstrained(unittest.TestCase):
        assert isinstance(js_obj["name"], str)
        assert isinstance(js_obj["population"], int)
    def test_mix_json_and_other(self):
        json_schemas = [None, None, self.json_schema, self.json_schema] * 10
        with ThreadPoolExecutor(len(json_schemas)) as executor:
            list(executor.map(self.run_decode, json_schemas))
 if __name__ == "__main__":
    unittest.main()