[PD] Fix generate endpoint of min_lb for PD (#5598)
Signed-off-by: Shangming Cai <caishangming@linux.alibaba.com>
This commit is contained in:
@@ -187,11 +187,11 @@ async def handle_generate_request(request_data: dict):
|
||||
|
||||
if request_data.get("stream", False):
|
||||
return await load_balancer.generate_stream(
|
||||
modified_request, prefill_server, decode_server
|
||||
modified_request, prefill_server, decode_server, "generate"
|
||||
)
|
||||
else:
|
||||
return await load_balancer.generate(
|
||||
modified_request, prefill_server, decode_server
|
||||
modified_request, prefill_server, decode_server, "generate"
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user