Generate 1 token to verify the health of the inference service in /health (#1154)
Co-authored-by: Yineng Zhang <me@zhyncs.com>
This commit is contained in:
@@ -89,6 +89,23 @@ app = FastAPI()
|
|||||||
tokenizer_manager = None
|
tokenizer_manager = None
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/v1/health")
|
||||||
|
async def health(request: Request) -> Response:
|
||||||
|
"""
|
||||||
|
Generate 1 token to verify the health of the inference service.
|
||||||
|
"""
|
||||||
|
gri = GenerateReqInput(
|
||||||
|
text="s", sampling_params={"max_new_tokens": 1, "temperature": 0.7}
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
async for _ in tokenizer_manager.generate_request(gri, request):
|
||||||
|
break
|
||||||
|
return Response(status_code=200)
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(e)
|
||||||
|
return Response(status_code=503)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
async def health() -> Response:
|
async def health() -> Response:
|
||||||
"""Health check."""
|
"""Health check."""
|
||||||
|
|||||||
Reference in New Issue
Block a user