From e9fd11c0d1f19fb9d928332061f3e72635d988f0 Mon Sep 17 00:00:00 2001 From: shangmingc Date: Wed, 28 May 2025 21:33:36 +0800 Subject: [PATCH] [Bugfix] Fix ChatCompletion endpoint of mini_lb when stream is set (#6703) Signed-off-by: Shangming Cai --- python/sglang/srt/disaggregation/mini_lb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/disaggregation/mini_lb.py b/python/sglang/srt/disaggregation/mini_lb.py index 2624e5939..c591b052f 100644 --- a/python/sglang/srt/disaggregation/mini_lb.py +++ b/python/sglang/srt/disaggregation/mini_lb.py @@ -117,8 +117,8 @@ class MiniLoadBalancer: ) as session: # Create the tasks for both prefill and decode requests tasks = [ - session.post(f"{prefill_server}/generate", json=modified_request), - session.post(f"{decode_server}/generate", json=modified_request), + session.post(f"{prefill_server}/{endpoint}", json=modified_request), + session.post(f"{decode_server}/{endpoint}", json=modified_request), ] # Wait for both responses to complete. Since this is streaming, they return immediately. prefill_response, decode_response = await asyncio.gather(*tasks)