[router][grpc] Cleanup debug logs in grpc_server and grpc_router (#11340)

This commit is contained in:
Chang Su
2025-10-08 13:26:19 -07:00
committed by GitHub
parent 677aa0e25f
commit a65ca73911
5 changed files with 31 additions and 102 deletions

View File

@@ -397,9 +397,7 @@ class GrpcRequestManager:
# Wait for result in background
async def wait_for_result():
try:
# Wait for completion
await state.event.wait()
# Get result from queue
result = await state.out_queue.get()
future.set_result(result)
except Exception as e:
@@ -437,19 +435,6 @@ class GrpcRequestManager:
return True
async def pause_generation(self):
"""Pause generation processing."""
async with self.is_pause_cond:
self.is_pause = True
logger.info("Generation paused")
async def resume_generation(self):
"""Resume generation processing."""
async with self.is_pause_cond:
self.is_pause = False
self.is_pause_cond.notify_all()
logger.info("Generation resumed")
async def handle_loop(self):
"""
Main event loop - processes outputs from scheduler.

View File

@@ -189,7 +189,7 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
# Start the request manager's event loop using auto_create_handle_loop
self.request_manager.auto_create_handle_loop()
logger.info("Standalone gRPC scheduler service initialized")
logger.info("gRPC scheduler servicer initialized")
async def Generate(
self,
@@ -197,7 +197,7 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
context: grpc.aio.ServicerContext,
) -> AsyncIterator[sglang_scheduler_pb2.GenerateResponse]:
"""Handle generation requests with streaming responses."""
logger.info(f"Generation request: {request.request_id}")
logger.debug(f"Receive generation request: {request.request_id}")
try:
# Convert gRPC request to internal format
@@ -249,7 +249,10 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
yield self._create_chunk_response(request.request_id, output)
except Exception as e:
logger.error(f"Generate failed: {e}\n{get_exception_traceback()}")
logger.error(
f"Generate failed for request {request.request_id}: {e}\n"
f"{get_exception_traceback()}"
)
yield sglang_scheduler_pb2.GenerateResponse(
request_id=request.request_id,
error=sglang_scheduler_pb2.GenerateError(
@@ -262,10 +265,10 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
async def Embed(
self,
request: sglang_scheduler_pb2.EmbedRequest,
context: grpc.aio.ServicerContext,
_context: grpc.aio.ServicerContext,
) -> sglang_scheduler_pb2.EmbedResponse:
"""Handle embedding requests."""
logger.info(f"Embedding request: {request.request_id}")
logger.debug(f"Receive embedding request: {request.request_id}")
try:
# Convert request
@@ -292,7 +295,10 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
)
except Exception as e:
logger.error(f"Embed failed: {e}\n{get_exception_traceback()}")
logger.error(
f"Embed failed for request {request.request_id}: {e}\n"
f"{get_exception_traceback()}"
)
return sglang_scheduler_pb2.EmbedResponse(
request_id=request.request_id,
error=sglang_scheduler_pb2.EmbedError(
@@ -344,7 +350,7 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
health_request.bootstrap_host = FAKE_BOOTSTRAP_HOST
health_request.bootstrap_room = 0
logger.info(f"Sending health check request to request manager...")
logger.debug(f"Receive health check request: {rid}")
# Submit and wait for response
output_generator = self.request_manager.generate_request(
@@ -375,7 +381,7 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
)
except Exception as e:
logger.error(f"Health check failed: {e}")
logger.error(f"Health check failed: {e}\n{get_exception_traceback()}")
return sglang_scheduler_pb2.HealthCheckResponse(
healthy=False, message=f"Health check error: {str(e)}"
)
@@ -383,10 +389,10 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
async def Abort(
self,
request: sglang_scheduler_pb2.AbortRequest,
context: grpc.aio.ServicerContext,
_context: grpc.aio.ServicerContext,
) -> sglang_scheduler_pb2.AbortResponse:
"""Abort an ongoing request."""
logger.info(f"Aborting request: {request.request_id}")
logger.debug(f"Receive abort request: {request.request_id}")
try:
success = await self.request_manager.abort_request(request.request_id)
@@ -396,7 +402,10 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
message=f"Request {request.request_id} {'aborted' if success else 'not found'}",
)
except Exception as e:
logger.error(f"Abort failed: {e}")
logger.error(
f"Abort failed for request {request.request_id}: {e}\n"
f"{get_exception_traceback()}"
)
return sglang_scheduler_pb2.AbortResponse(
success=False,
message=str(e),
@@ -404,11 +413,11 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
async def GetModelInfo(
self,
request: sglang_scheduler_pb2.GetModelInfoRequest,
context: grpc.aio.ServicerContext,
_request: sglang_scheduler_pb2.GetModelInfoRequest,
_context: grpc.aio.ServicerContext,
) -> sglang_scheduler_pb2.GetModelInfoResponse:
"""Get model information."""
logger.info("Model info request received")
logger.debug("Receive model info request")
is_generation = self.scheduler_info.get("is_generation")
if is_generation is None:
@@ -435,11 +444,11 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
async def GetServerInfo(
self,
request: sglang_scheduler_pb2.GetServerInfoRequest,
context: grpc.aio.ServicerContext,
_request: sglang_scheduler_pb2.GetServerInfoRequest,
_context: grpc.aio.ServicerContext,
) -> sglang_scheduler_pb2.GetServerInfoResponse:
"""Get server information."""
logger.info("Server info request received")
logger.debug("Receive server info request")
server_args_dict = dataclasses.asdict(self.server_args)
server_args_struct = Struct()
@@ -861,9 +870,8 @@ async def serve_grpc(
listen_addr = f"{server_args.host}:{server_args.port}"
server.add_insecure_port(listen_addr)
logger.info(f"Starting standalone gRPC server on {listen_addr}")
await server.start()
logger.info(f"gRPC server listening on {listen_addr}")
# Handle shutdown signals
loop = asyncio.get_running_loop()