Support LoRA in bench_serving oai interface (#11318)
This commit is contained in:
@@ -209,6 +209,11 @@ async def async_request_openai_completions(
|
|||||||
**request_func_input.extra_request_body,
|
**request_func_input.extra_request_body,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# hack to accommodate different LoRA conventions between SGLang and vLLM.
|
||||||
|
if request_func_input.lora_name:
|
||||||
|
payload["model"] = request_func_input.lora_name
|
||||||
|
payload["lora_path"] = request_func_input.lora_name
|
||||||
|
|
||||||
if request_func_input.image_data:
|
if request_func_input.image_data:
|
||||||
payload.update({"image_data": request_func_input.image_data})
|
payload.update({"image_data": request_func_input.image_data})
|
||||||
|
|
||||||
@@ -326,6 +331,12 @@ async def async_request_openai_chat_completions(
|
|||||||
"stream": not args.disable_stream,
|
"stream": not args.disable_stream,
|
||||||
**request_func_input.extra_request_body,
|
**request_func_input.extra_request_body,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# hack to accommodate different LoRA conventions between SGLang and vLLM.
|
||||||
|
if request_func_input.lora_name:
|
||||||
|
payload["model"] = request_func_input.lora_name
|
||||||
|
payload["lora_path"] = request_func_input.lora_name
|
||||||
|
|
||||||
headers = get_auth_headers()
|
headers = get_auth_headers()
|
||||||
|
|
||||||
output = RequestFuncOutput.init_new(request_func_input)
|
output = RequestFuncOutput.init_new(request_func_input)
|
||||||
|
|||||||
Reference in New Issue
Block a user