Files
sglang/examples/runtime/lora.py

38 lines
1.1 KiB
Python

# launch server
# python -m sglang.launch_server --model mistralai/Mistral-7B-Instruct-v0.3 --lora-paths /home/ying/test_lora /home/ying/test_lora_1 /home/ying/test_lora_2 lora3=/home/ying/test_lora_3 lora4=/home/ying/test_lora_4 --disable-radix --disable-cuda-graph --max-loras-per-batch 4
# send requests
# lora_path[i] specifies the LoRA used for text[i], so make sure they have the same length
# use None to specify base-only prompt, e.x. "lora_path": [None, "/home/ying/test_lora"]
import json
import requests
url = "http://127.0.0.1:30000"
json_data = {
"text": [
"prompt 1",
"prompt 2",
"prompt 3",
"prompt 4",
"prompt 5",
"prompt 6",
"prompt 7",
],
"sampling_params": {"max_new_tokens": 32},
"lora_path": [
"/home/ying/test_lora",
"/home/ying/test_lora_1",
"/home/ying/test_lora_2",
"lora3",
"lora4",
"/home/ying/test_lora",
"/home/ying/test_lora_1",
],
}
response = requests.post(
url + "/generate",
json=json_data,
)
print(json.dumps(response.json()))