From 6242c399abb7582fb3d9a4e6a11f6af7d248841b Mon Sep 17 00:00:00 2001 From: Lucien Date: Wed, 21 Aug 2024 01:14:34 +0800 Subject: [PATCH] Generate 1 token to verify the health of the inference service in /health (#1154) Co-authored-by: Yineng Zhang --- python/sglang/srt/server.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index 997b805cc..55271c235 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -89,6 +89,23 @@ app = FastAPI() tokenizer_manager = None +@app.get("/v1/health") +async def health(request: Request) -> Response: + """ + Generate 1 token to verify the health of the inference service. + """ + gri = GenerateReqInput( + text="s", sampling_params={"max_new_tokens": 1, "temperature": 0.7} + ) + try: + async for _ in tokenizer_manager.generate_request(gri, request): + break + return Response(status_code=200) + except Exception as e: + logger.exception(e) + return Response(status_code=503) + + @app.get("/health") async def health() -> Response: """Health check."""