[router] fix get load response parsing (#11213)

This commit is contained in:
Simo Lin
2025-10-04 09:58:02 -04:00
committed by GitHub
parent 666da3d59f
commit ffd03a9bd3
2 changed files with 26 additions and 5 deletions

View File

@@ -141,7 +141,17 @@ def create_app(args: argparse.Namespace) -> FastAPI:
@app.get("/get_load")
async def get_load(request: Request):
check_api_key(request)
return JSONResponse({"load": _inflight})
# Return format matching real workers: array of load info per DP rank
return JSONResponse(
[
{
"dp_rank": 0,
"num_reqs": _inflight,
"num_waiting_reqs": 0,
"num_tokens": _inflight,
}
]
)
def make_json_response(obj: dict, status_code: int = 200) -> JSONResponse:
resp = JSONResponse(obj, status_code=status_code)