From 4f24ab1718c13e68a1249decc6d3d4da465070b6 Mon Sep 17 00:00:00 2001 From: Simo Lin Date: Thu, 16 Oct 2025 14:19:55 -0700 Subject: [PATCH] [router][grpc] add dissag info to warm up in grpc server (#11727) --- python/sglang/srt/entrypoints/grpc_server.py | 40 ++++++++++++++++---- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/python/sglang/srt/entrypoints/grpc_server.py b/python/sglang/srt/entrypoints/grpc_server.py index 9cec138f4..7bc59dfb9 100644 --- a/python/sglang/srt/entrypoints/grpc_server.py +++ b/python/sglang/srt/entrypoints/grpc_server.py @@ -532,9 +532,20 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer) bootstrap_port = None bootstrap_room = None if grpc_req.HasField("disaggregated_params"): - bootstrap_host = grpc_req.disaggregated_params.bootstrap_host or None - bootstrap_port = grpc_req.disaggregated_params.bootstrap_port or None - bootstrap_room = grpc_req.disaggregated_params.bootstrap_room or None + # Don't use 'or None' as it treats 0 as falsy + bootstrap_host = ( + grpc_req.disaggregated_params.bootstrap_host + if grpc_req.disaggregated_params.bootstrap_host + else None + ) + bootstrap_port = ( + grpc_req.disaggregated_params.bootstrap_port + if grpc_req.disaggregated_params.bootstrap_port + else None + ) + bootstrap_room = ( + grpc_req.disaggregated_params.bootstrap_room + ) # Can be 0, don't use 'or None' # Create request return TokenizedGenerateReqInput( @@ -988,9 +999,9 @@ def _execute_grpc_server_warmup( if is_generation: # Create tokenized input for warmup - warmup_request = sglang_scheduler_pb2.GenerateRequest( - request_id=f"WARMUP_{time.time()}", - tokenized=sglang_scheduler_pb2.TokenizedInput( + warmup_request_kwargs = { + "request_id": f"WARMUP_{time.time()}", + "tokenized": sglang_scheduler_pb2.TokenizedInput( input_ids=[ 954, 15541, @@ -1002,11 +1013,24 @@ def _execute_grpc_server_warmup( ], # Simple token sequence original_text="The capital city of France is", ), - sampling_params=sglang_scheduler_pb2.SamplingParams( + "sampling_params": sglang_scheduler_pb2.SamplingParams( temperature=0.0, max_new_tokens=max_new_tokens, ), - stream=False, + "stream": False, + } + + # Set disaggregation params if needed + if server_args.disaggregation_mode != DisaggregationMode.NULL: + warmup_request_kwargs["disaggregated_params"] = ( + sglang_scheduler_pb2.DisaggregatedParams( + bootstrap_host=FAKE_BOOTSTRAP_HOST, + bootstrap_room=0, + ) + ) + + warmup_request = sglang_scheduler_pb2.GenerateRequest( + **warmup_request_kwargs ) # Send the warmup request