[PD] Fix generate endpoint of min_lb for PD (#5598)
Signed-off-by: Shangming Cai <caishangming@linux.alibaba.com>
This commit is contained in:
@@ -187,11 +187,11 @@ async def handle_generate_request(request_data: dict):
|
|||||||
|
|
||||||
if request_data.get("stream", False):
|
if request_data.get("stream", False):
|
||||||
return await load_balancer.generate_stream(
|
return await load_balancer.generate_stream(
|
||||||
modified_request, prefill_server, decode_server
|
modified_request, prefill_server, decode_server, "generate"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return await load_balancer.generate(
|
return await load_balancer.generate(
|
||||||
modified_request, prefill_server, decode_server
|
modified_request, prefill_server, decode_server, "generate"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user