[Feature] Support LoRA path renaming and add LoRA serving benchmarks (#1433)

2024-09-15 12:46:04 -07:00
parent 899cf5c438
commit 37963394aa
6 changed files with 594 additions and 62 deletions
--- a/examples/runtime/lora.py
+++ b/examples/runtime/lora.py
@@ -0,0 +1,37 @@
+# launch server
+# python -m sglang.launch_server --model mistralai/Mistral-7B-Instruct-v0.3 --lora-paths /home/ying/test_lora /home/ying/test_lora_1 /home/ying/test_lora_2 lora3=/home/ying/test_lora_3 lora4=/home/ying/test_lora_4 --disable-radix --disable-cuda-graph --max-loras-per-batch 4
+
+# send requests
+# lora_path[i] specifies the LoRA used for text[i], so make sure they have the same length
+# use None to specify base-only prompt, e.x. "lora_path": [None, "/home/ying/test_lora"]
+import json
+
+import requests
+
+url = "http://127.0.0.1:30000"
+json_data = {
+    "text": [
+        "prompt 1",
+        "prompt 2",
+        "prompt 3",
+        "prompt 4",
+        "prompt 5",
+        "prompt 6",
+        "prompt 7",
+    ],
+    "sampling_params": {"max_new_tokens": 32},
+    "lora_path": [
+        "/home/ying/test_lora",
+        "/home/ying/test_lora_1",
+        "/home/ying/test_lora_2",
+        "lora3",
+        "lora4",
+        "/home/ying/test_lora",
+        "/home/ying/test_lora_1",
+    ],
+}
+response = requests.post(
+    url + "/generate",
+    json=json_data,
+)
+print(json.dumps(response.json()))