diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index 997b805cc..55271c235 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -89,6 +89,23 @@ app = FastAPI() tokenizer_manager = None +@app.get("/v1/health") +async def health(request: Request) -> Response: + """ + Generate 1 token to verify the health of the inference service. + """ + gri = GenerateReqInput( + text="s", sampling_params={"max_new_tokens": 1, "temperature": 0.7} + ) + try: + async for _ in tokenizer_manager.generate_request(gri, request): + break + return Response(status_code=200) + except Exception as e: + logger.exception(e) + return Response(status_code=503) + + @app.get("/health") async def health() -> Response: """Health check."""