[router][grpc] Cleanup debug logs in grpc_server and grpc_router (#11340)
This commit is contained in:
@@ -397,9 +397,7 @@ class GrpcRequestManager:
|
||||
# Wait for result in background
|
||||
async def wait_for_result():
|
||||
try:
|
||||
# Wait for completion
|
||||
await state.event.wait()
|
||||
# Get result from queue
|
||||
result = await state.out_queue.get()
|
||||
future.set_result(result)
|
||||
except Exception as e:
|
||||
@@ -437,19 +435,6 @@ class GrpcRequestManager:
|
||||
|
||||
return True
|
||||
|
||||
async def pause_generation(self):
|
||||
"""Pause generation processing."""
|
||||
async with self.is_pause_cond:
|
||||
self.is_pause = True
|
||||
logger.info("Generation paused")
|
||||
|
||||
async def resume_generation(self):
|
||||
"""Resume generation processing."""
|
||||
async with self.is_pause_cond:
|
||||
self.is_pause = False
|
||||
self.is_pause_cond.notify_all()
|
||||
logger.info("Generation resumed")
|
||||
|
||||
async def handle_loop(self):
|
||||
"""
|
||||
Main event loop - processes outputs from scheduler.
|
||||
|
||||
@@ -189,7 +189,7 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
|
||||
# Start the request manager's event loop using auto_create_handle_loop
|
||||
self.request_manager.auto_create_handle_loop()
|
||||
|
||||
logger.info("Standalone gRPC scheduler service initialized")
|
||||
logger.info("gRPC scheduler servicer initialized")
|
||||
|
||||
async def Generate(
|
||||
self,
|
||||
@@ -197,7 +197,7 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
|
||||
context: grpc.aio.ServicerContext,
|
||||
) -> AsyncIterator[sglang_scheduler_pb2.GenerateResponse]:
|
||||
"""Handle generation requests with streaming responses."""
|
||||
logger.info(f"Generation request: {request.request_id}")
|
||||
logger.debug(f"Receive generation request: {request.request_id}")
|
||||
|
||||
try:
|
||||
# Convert gRPC request to internal format
|
||||
@@ -249,7 +249,10 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
|
||||
yield self._create_chunk_response(request.request_id, output)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Generate failed: {e}\n{get_exception_traceback()}")
|
||||
logger.error(
|
||||
f"Generate failed for request {request.request_id}: {e}\n"
|
||||
f"{get_exception_traceback()}"
|
||||
)
|
||||
yield sglang_scheduler_pb2.GenerateResponse(
|
||||
request_id=request.request_id,
|
||||
error=sglang_scheduler_pb2.GenerateError(
|
||||
@@ -262,10 +265,10 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
|
||||
async def Embed(
|
||||
self,
|
||||
request: sglang_scheduler_pb2.EmbedRequest,
|
||||
context: grpc.aio.ServicerContext,
|
||||
_context: grpc.aio.ServicerContext,
|
||||
) -> sglang_scheduler_pb2.EmbedResponse:
|
||||
"""Handle embedding requests."""
|
||||
logger.info(f"Embedding request: {request.request_id}")
|
||||
logger.debug(f"Receive embedding request: {request.request_id}")
|
||||
|
||||
try:
|
||||
# Convert request
|
||||
@@ -292,7 +295,10 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Embed failed: {e}\n{get_exception_traceback()}")
|
||||
logger.error(
|
||||
f"Embed failed for request {request.request_id}: {e}\n"
|
||||
f"{get_exception_traceback()}"
|
||||
)
|
||||
return sglang_scheduler_pb2.EmbedResponse(
|
||||
request_id=request.request_id,
|
||||
error=sglang_scheduler_pb2.EmbedError(
|
||||
@@ -344,7 +350,7 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
|
||||
health_request.bootstrap_host = FAKE_BOOTSTRAP_HOST
|
||||
health_request.bootstrap_room = 0
|
||||
|
||||
logger.info(f"Sending health check request to request manager...")
|
||||
logger.debug(f"Receive health check request: {rid}")
|
||||
|
||||
# Submit and wait for response
|
||||
output_generator = self.request_manager.generate_request(
|
||||
@@ -375,7 +381,7 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Health check failed: {e}")
|
||||
logger.error(f"Health check failed: {e}\n{get_exception_traceback()}")
|
||||
return sglang_scheduler_pb2.HealthCheckResponse(
|
||||
healthy=False, message=f"Health check error: {str(e)}"
|
||||
)
|
||||
@@ -383,10 +389,10 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
|
||||
async def Abort(
|
||||
self,
|
||||
request: sglang_scheduler_pb2.AbortRequest,
|
||||
context: grpc.aio.ServicerContext,
|
||||
_context: grpc.aio.ServicerContext,
|
||||
) -> sglang_scheduler_pb2.AbortResponse:
|
||||
"""Abort an ongoing request."""
|
||||
logger.info(f"Aborting request: {request.request_id}")
|
||||
logger.debug(f"Receive abort request: {request.request_id}")
|
||||
|
||||
try:
|
||||
success = await self.request_manager.abort_request(request.request_id)
|
||||
@@ -396,7 +402,10 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
|
||||
message=f"Request {request.request_id} {'aborted' if success else 'not found'}",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Abort failed: {e}")
|
||||
logger.error(
|
||||
f"Abort failed for request {request.request_id}: {e}\n"
|
||||
f"{get_exception_traceback()}"
|
||||
)
|
||||
return sglang_scheduler_pb2.AbortResponse(
|
||||
success=False,
|
||||
message=str(e),
|
||||
@@ -404,11 +413,11 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
|
||||
|
||||
async def GetModelInfo(
|
||||
self,
|
||||
request: sglang_scheduler_pb2.GetModelInfoRequest,
|
||||
context: grpc.aio.ServicerContext,
|
||||
_request: sglang_scheduler_pb2.GetModelInfoRequest,
|
||||
_context: grpc.aio.ServicerContext,
|
||||
) -> sglang_scheduler_pb2.GetModelInfoResponse:
|
||||
"""Get model information."""
|
||||
logger.info("Model info request received")
|
||||
logger.debug("Receive model info request")
|
||||
|
||||
is_generation = self.scheduler_info.get("is_generation")
|
||||
if is_generation is None:
|
||||
@@ -435,11 +444,11 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
|
||||
|
||||
async def GetServerInfo(
|
||||
self,
|
||||
request: sglang_scheduler_pb2.GetServerInfoRequest,
|
||||
context: grpc.aio.ServicerContext,
|
||||
_request: sglang_scheduler_pb2.GetServerInfoRequest,
|
||||
_context: grpc.aio.ServicerContext,
|
||||
) -> sglang_scheduler_pb2.GetServerInfoResponse:
|
||||
"""Get server information."""
|
||||
logger.info("Server info request received")
|
||||
logger.debug("Receive server info request")
|
||||
|
||||
server_args_dict = dataclasses.asdict(self.server_args)
|
||||
server_args_struct = Struct()
|
||||
@@ -861,9 +870,8 @@ async def serve_grpc(
|
||||
listen_addr = f"{server_args.host}:{server_args.port}"
|
||||
server.add_insecure_port(listen_addr)
|
||||
|
||||
logger.info(f"Starting standalone gRPC server on {listen_addr}")
|
||||
|
||||
await server.start()
|
||||
logger.info(f"gRPC server listening on {listen_addr}")
|
||||
|
||||
# Handle shutdown signals
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
Reference in New Issue
Block a user