From fe43e889f8979ade3d9bcf1799bee1d7a0071f0a Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Wed, 20 Aug 2025 11:15:16 +0800 Subject: [PATCH] Fix mini lb timeout issue (#9369) --- python/sglang/srt/disaggregation/launch_lb.py | 8 +++++++- python/sglang/srt/disaggregation/mini_lb.py | 16 +++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/python/sglang/srt/disaggregation/launch_lb.py b/python/sglang/srt/disaggregation/launch_lb.py index bc116fb55..faa52f873 100644 --- a/python/sglang/srt/disaggregation/launch_lb.py +++ b/python/sglang/srt/disaggregation/launch_lb.py @@ -118,7 +118,13 @@ def main(): lb_args = LBArgs.from_cli_args(args) prefill_configs = [PrefillConfig(url, port) for url, port in lb_args.prefill_infos] - run(prefill_configs, lb_args.decode_infos, lb_args.host, lb_args.port) + run( + prefill_configs, + lb_args.decode_infos, + lb_args.host, + lb_args.port, + lb_args.timeout, + ) if __name__ == "__main__": diff --git a/python/sglang/srt/disaggregation/mini_lb.py b/python/sglang/srt/disaggregation/mini_lb.py index a80407bca..ebca01f41 100644 --- a/python/sglang/srt/disaggregation/mini_lb.py +++ b/python/sglang/srt/disaggregation/mini_lb.py @@ -50,10 +50,16 @@ class PrefillConfig: class MiniLoadBalancer: - def __init__(self, prefill_configs: List[PrefillConfig], decode_servers: List[str]): + def __init__( + self, + prefill_configs: List[PrefillConfig], + decode_servers: List[str], + timeout: int, + ): self.prefill_configs = prefill_configs self.prefill_servers = [p.url for p in prefill_configs] self.decode_servers = decode_servers + self.timeout = timeout def add_prefill_server(self, new_prefill_config: PrefillConfig): self.prefill_configs.append(new_prefill_config) @@ -78,7 +84,7 @@ class MiniLoadBalancer: async with aiohttp.ClientSession( timeout=aiohttp.ClientTimeout( - total=3600 + total=self.timeout ) # Add timeout for request reliability ) as session: tasks = [ @@ -117,7 +123,7 @@ class MiniLoadBalancer: async def stream_results(): async with aiohttp.ClientSession( timeout=aiohttp.ClientTimeout( - total=3600 + total=self.timeout ) # Add timeout for request reliability ) as session: # Create the tasks for both prefill and decode requests @@ -401,9 +407,9 @@ async def register(obj: PDRegistryRequest): return Response(status_code=200) -def run(prefill_configs, decode_addrs, host, port): +def run(prefill_configs, decode_addrs, host, port, timeout): global load_balancer - load_balancer = MiniLoadBalancer(prefill_configs, decode_addrs) + load_balancer = MiniLoadBalancer(prefill_configs, decode_addrs, timeout=timeout) uvicorn.run(app, host=host, port=port)