[router][grpc] disable health check generation and increase timeout (#11353)
This commit is contained in:
@@ -313,78 +313,10 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
|
|||||||
request: sglang_scheduler_pb2.HealthCheckRequest,
|
request: sglang_scheduler_pb2.HealthCheckRequest,
|
||||||
context: grpc.aio.ServicerContext,
|
context: grpc.aio.ServicerContext,
|
||||||
) -> sglang_scheduler_pb2.HealthCheckResponse:
|
) -> sglang_scheduler_pb2.HealthCheckResponse:
|
||||||
"""Health check by generating from client input."""
|
"""Health check - always returns healthy after server started."""
|
||||||
try:
|
return sglang_scheduler_pb2.HealthCheckResponse(
|
||||||
# Check if request manager is shutting down
|
healthy=True, message="Health check passed"
|
||||||
if self.request_manager.gracefully_exit:
|
)
|
||||||
return sglang_scheduler_pb2.HealthCheckResponse(
|
|
||||||
healthy=False, message="Server shutting down"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Extract tokenized input from request
|
|
||||||
if not request.HasField("tokenized"):
|
|
||||||
return sglang_scheduler_pb2.HealthCheckResponse(
|
|
||||||
healthy=False, message="Tokenized input required for health check"
|
|
||||||
)
|
|
||||||
|
|
||||||
input_text = request.tokenized.original_text
|
|
||||||
input_ids = list(request.tokenized.input_ids)
|
|
||||||
|
|
||||||
# Create health check request
|
|
||||||
rid = f"HEALTH_CHECK_GRPC_{time.time()}"
|
|
||||||
|
|
||||||
health_request = TokenizedGenerateReqInput(
|
|
||||||
rid=rid,
|
|
||||||
input_text=input_text,
|
|
||||||
input_ids=input_ids,
|
|
||||||
sampling_params=SGLSamplingParams(max_new_tokens=1, temperature=0.0),
|
|
||||||
stream=False,
|
|
||||||
mm_inputs=None,
|
|
||||||
return_logprob=False,
|
|
||||||
logprob_start_len=-1,
|
|
||||||
top_logprobs_num=0,
|
|
||||||
token_ids_logprob=None,
|
|
||||||
)
|
|
||||||
|
|
||||||
if self.server_args.disaggregation_mode != DisaggregationMode.NULL:
|
|
||||||
health_request.bootstrap_host = FAKE_BOOTSTRAP_HOST
|
|
||||||
health_request.bootstrap_room = 0
|
|
||||||
|
|
||||||
logger.debug(f"Receive health check request: {rid}")
|
|
||||||
|
|
||||||
# Submit and wait for response
|
|
||||||
output_generator = self.request_manager.generate_request(
|
|
||||||
health_request, request_id=rid
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Get first response with timeout
|
|
||||||
response = await asyncio.wait_for(
|
|
||||||
output_generator.__anext__(), timeout=HEALTH_CHECK_TIMEOUT
|
|
||||||
)
|
|
||||||
|
|
||||||
# Clean up
|
|
||||||
if rid in self.request_manager.rid_to_state:
|
|
||||||
del self.request_manager.rid_to_state[rid]
|
|
||||||
|
|
||||||
return sglang_scheduler_pb2.HealthCheckResponse(
|
|
||||||
healthy=True, message="Health check passed"
|
|
||||||
)
|
|
||||||
|
|
||||||
except asyncio.TimeoutError:
|
|
||||||
# Clean up on timeout
|
|
||||||
if rid in self.request_manager.rid_to_state:
|
|
||||||
del self.request_manager.rid_to_state[rid]
|
|
||||||
|
|
||||||
return sglang_scheduler_pb2.HealthCheckResponse(
|
|
||||||
healthy=False, message="Health check timeout"
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Health check failed: {e}\n{get_exception_traceback()}")
|
|
||||||
return sglang_scheduler_pb2.HealthCheckResponse(
|
|
||||||
healthy=False, message=f"Health check error: {str(e)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
async def Abort(
|
async def Abort(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ impl SglangSchedulerClient {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let channel = Channel::from_shared(http_endpoint)?
|
let channel = Channel::from_shared(http_endpoint)?
|
||||||
.timeout(Duration::from_secs(30))
|
.timeout(Duration::from_secs(3600))
|
||||||
.http2_keep_alive_interval(Duration::from_secs(30))
|
.http2_keep_alive_interval(Duration::from_secs(30))
|
||||||
.keep_alive_timeout(Duration::from_secs(10))
|
.keep_alive_timeout(Duration::from_secs(10))
|
||||||
.keep_alive_while_idle(true)
|
.keep_alive_while_idle(true)
|
||||||
|
|||||||
Reference in New Issue
Block a user