[PD] Support structured output (#6560)

2025-05-23 21:49:00 -07:00
parent ed0c3035cd
commit 2d831c6ef9
6 changed files with 106 additions and 13 deletions
--- a/scripts/playground/disaggregation/cli-so.py
+++ b/scripts/playground/disaggregation/cli-so.py
@@ -0,0 +1,34 @@
+import json
+
+import requests
+
+port = 8000
+
+json_schema = json.dumps(
+    {
+        "type": "object",
+        "properties": {
+            "name": {"type": "string", "pattern": "^[\\w]+$"},
+            "population": {"type": "integer"},
+        },
+        "required": ["name", "population"],
+    }
+)
+
+# JSON
+response = requests.post(
+    f"http://localhost:{port}/generate",
+    json={
+        "text": "Here is the information of the capital of France in the JSON format.\n",
+        "sampling_params": {
+            "temperature": 0,
+            "max_new_tokens": 64,
+            "json_schema": json_schema,
+        },
+    },
+)
+
+print(response.json())
+
+
+# python3 -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --trust-remote-code --disaggregation-mode prefill --tp 2 --disaggregation-ib-device mlx5_roce0,mlx5_roce1 --speculative-algorithm EAGLE --speculative-draft-model-path lmsys/sglang-EAGLE-llama2-chat-7B --speculative-num-steps 3 --speculative-eagle-topk 4 --speculative-num-draft-tokens 16 --cuda-graph-max-bs 8 --host 127.0.0.1 --port 8100