adapt to sglang v0.5.2rc1 on dcu

2025-09-04 15:56:33 +08:00
commit 909abb58f5
2320 changed files with 489411 additions and 0 deletions
--- a/examples/runtime/lora.py
+++ b/examples/runtime/lora.py
@@ -0,0 +1,37 @@
+# launch server
+# python -m sglang.launch_server --model mistralai/Mistral-7B-Instruct-v0.3 --lora-paths /home/ying/test_lora lora1=/home/ying/test_lora_1 lora2=/home/ying/test_lora_2 --disable-radix --disable-cuda-graph --max-loras-per-batch 4
+
+# send requests
+# lora_path[i] specifies the LoRA used for text[i], so make sure they have the same length
+# use None to specify base-only prompt, e.x. "lora_path": [None, "/home/ying/test_lora"]
+import json
+
+import requests
+
+url = "http://127.0.0.1:30000"
+json_data = {
+    "text": [
+        "prompt 1",
+        "prompt 2",
+        "prompt 3",
+        "prompt 4",
+        "prompt 5",
+        "prompt 6",
+        "prompt 7",
+    ],
+    "sampling_params": {"max_new_tokens": 32},
+    "lora_path": [
+        "/home/ying/test_lora",
+        "lora1",
+        "lora2",
+        "lora1",
+        "lora2",
+        None,
+        None,
+    ],
+}
+response = requests.post(
+    url + "/generate",
+    json=json_data,
+)
+print(json.dumps(response.json()))