Generate 1 token to verify the health of the inference service in /health (#1154)
Co-authored-by: Yineng Zhang <me@zhyncs.com>
This commit is contained in:
@@ -89,6 +89,23 @@ app = FastAPI()
|
||||
tokenizer_manager = None
|
||||
|
||||
|
||||
@app.get("/v1/health")
|
||||
async def health(request: Request) -> Response:
|
||||
"""
|
||||
Generate 1 token to verify the health of the inference service.
|
||||
"""
|
||||
gri = GenerateReqInput(
|
||||
text="s", sampling_params={"max_new_tokens": 1, "temperature": 0.7}
|
||||
)
|
||||
try:
|
||||
async for _ in tokenizer_manager.generate_request(gri, request):
|
||||
break
|
||||
return Response(status_code=200)
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
return Response(status_code=503)
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health() -> Response:
|
||||
"""Health check."""
|
||||
|
||||
Reference in New Issue
Block a user