[router][grpc] add dissag info to warm up in grpc server (#11727)
This commit is contained in:
@@ -532,9 +532,20 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
|
|||||||
bootstrap_port = None
|
bootstrap_port = None
|
||||||
bootstrap_room = None
|
bootstrap_room = None
|
||||||
if grpc_req.HasField("disaggregated_params"):
|
if grpc_req.HasField("disaggregated_params"):
|
||||||
bootstrap_host = grpc_req.disaggregated_params.bootstrap_host or None
|
# Don't use 'or None' as it treats 0 as falsy
|
||||||
bootstrap_port = grpc_req.disaggregated_params.bootstrap_port or None
|
bootstrap_host = (
|
||||||
bootstrap_room = grpc_req.disaggregated_params.bootstrap_room or None
|
grpc_req.disaggregated_params.bootstrap_host
|
||||||
|
if grpc_req.disaggregated_params.bootstrap_host
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
bootstrap_port = (
|
||||||
|
grpc_req.disaggregated_params.bootstrap_port
|
||||||
|
if grpc_req.disaggregated_params.bootstrap_port
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
bootstrap_room = (
|
||||||
|
grpc_req.disaggregated_params.bootstrap_room
|
||||||
|
) # Can be 0, don't use 'or None'
|
||||||
|
|
||||||
# Create request
|
# Create request
|
||||||
return TokenizedGenerateReqInput(
|
return TokenizedGenerateReqInput(
|
||||||
@@ -988,9 +999,9 @@ def _execute_grpc_server_warmup(
|
|||||||
|
|
||||||
if is_generation:
|
if is_generation:
|
||||||
# Create tokenized input for warmup
|
# Create tokenized input for warmup
|
||||||
warmup_request = sglang_scheduler_pb2.GenerateRequest(
|
warmup_request_kwargs = {
|
||||||
request_id=f"WARMUP_{time.time()}",
|
"request_id": f"WARMUP_{time.time()}",
|
||||||
tokenized=sglang_scheduler_pb2.TokenizedInput(
|
"tokenized": sglang_scheduler_pb2.TokenizedInput(
|
||||||
input_ids=[
|
input_ids=[
|
||||||
954,
|
954,
|
||||||
15541,
|
15541,
|
||||||
@@ -1002,11 +1013,24 @@ def _execute_grpc_server_warmup(
|
|||||||
], # Simple token sequence
|
], # Simple token sequence
|
||||||
original_text="The capital city of France is",
|
original_text="The capital city of France is",
|
||||||
),
|
),
|
||||||
sampling_params=sglang_scheduler_pb2.SamplingParams(
|
"sampling_params": sglang_scheduler_pb2.SamplingParams(
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
max_new_tokens=max_new_tokens,
|
max_new_tokens=max_new_tokens,
|
||||||
),
|
),
|
||||||
stream=False,
|
"stream": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Set disaggregation params if needed
|
||||||
|
if server_args.disaggregation_mode != DisaggregationMode.NULL:
|
||||||
|
warmup_request_kwargs["disaggregated_params"] = (
|
||||||
|
sglang_scheduler_pb2.DisaggregatedParams(
|
||||||
|
bootstrap_host=FAKE_BOOTSTRAP_HOST,
|
||||||
|
bootstrap_room=0,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
warmup_request = sglang_scheduler_pb2.GenerateRequest(
|
||||||
|
**warmup_request_kwargs
|
||||||
)
|
)
|
||||||
|
|
||||||
# Send the warmup request
|
# Send the warmup request
|
||||||
|
|||||||
Reference in New Issue
Block a user