[router] fix get load response parsing (#11213)
This commit is contained in:
@@ -141,7 +141,17 @@ def create_app(args: argparse.Namespace) -> FastAPI:
|
|||||||
@app.get("/get_load")
|
@app.get("/get_load")
|
||||||
async def get_load(request: Request):
|
async def get_load(request: Request):
|
||||||
check_api_key(request)
|
check_api_key(request)
|
||||||
return JSONResponse({"load": _inflight})
|
# Return format matching real workers: array of load info per DP rank
|
||||||
|
return JSONResponse(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"dp_rank": 0,
|
||||||
|
"num_reqs": _inflight,
|
||||||
|
"num_waiting_reqs": 0,
|
||||||
|
"num_tokens": _inflight,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
def make_json_response(obj: dict, status_code: int = 200) -> JSONResponse:
|
def make_json_response(obj: dict, status_code: int = 200) -> JSONResponse:
|
||||||
resp = JSONResponse(obj, status_code=status_code)
|
resp = JSONResponse(obj, status_code=status_code)
|
||||||
|
|||||||
@@ -1252,11 +1252,22 @@ impl WorkerManager {
|
|||||||
Ok(response) if response.status().is_success() => {
|
Ok(response) if response.status().is_success() => {
|
||||||
match response.json::<Value>().await {
|
match response.json::<Value>().await {
|
||||||
Ok(json) => {
|
Ok(json) => {
|
||||||
if let Some(load) = json.get("load").and_then(|v| v.as_i64()) {
|
// The /get_load endpoint returns an array of load info objects (one per DP rank)
|
||||||
debug!("Worker {} load: {}", url, load);
|
// Each object has: {dp_rank, num_reqs, num_waiting_reqs, num_tokens}
|
||||||
Some(load as isize)
|
if let Some(array) = json.as_array() {
|
||||||
|
let total_tokens: i64 = array
|
||||||
|
.iter()
|
||||||
|
.filter_map(|entry| {
|
||||||
|
entry.get("num_tokens").and_then(|v| v.as_i64())
|
||||||
|
})
|
||||||
|
.sum();
|
||||||
|
debug!("Worker {} load (total tokens): {}", url, total_tokens);
|
||||||
|
Some(total_tokens as isize)
|
||||||
} else {
|
} else {
|
||||||
warn!("Invalid load response from {}: {:?}", url, json);
|
warn!(
|
||||||
|
"Invalid load response from {}: expected array, got {:?}",
|
||||||
|
url, json
|
||||||
|
);
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user