[bugfix] Fix sgl-router get_server_info endpoint compatibility issue (#7813)
This commit is contained in:
@@ -920,20 +920,14 @@ impl PDRouter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get_server_info(&self, client: &reqwest::Client) -> HttpResponse {
|
pub async fn get_server_info(&self, client: &reqwest::Client) -> HttpResponse {
|
||||||
// Get info from all decode servers (where generation happens)
|
// Get info from the first decode server to match sglang's server info format
|
||||||
let mut all_internal_states = Vec::new();
|
let first_decode_url = if let Ok(workers) = self.decode_workers.read() {
|
||||||
let mut decode_infos = Vec::new();
|
workers.first().map(|w| w.url.clone())
|
||||||
|
} else {
|
||||||
|
return HttpResponse::InternalServerError().body("Failed to access decode workers");
|
||||||
|
};
|
||||||
|
|
||||||
// Clone URLs to avoid holding lock across await
|
if let Some(worker_url) = first_decode_url {
|
||||||
let worker_urls: Vec<String> = self
|
|
||||||
.decode_workers
|
|
||||||
.read()
|
|
||||||
.unwrap()
|
|
||||||
.iter()
|
|
||||||
.map(|w| w.url.clone())
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
for worker_url in worker_urls {
|
|
||||||
match client
|
match client
|
||||||
.get(format!("{}/get_server_info", worker_url))
|
.get(format!("{}/get_server_info", worker_url))
|
||||||
.send()
|
.send()
|
||||||
@@ -942,38 +936,31 @@ impl PDRouter {
|
|||||||
Ok(res) if res.status().is_success() => {
|
Ok(res) if res.status().is_success() => {
|
||||||
match res.json::<Value>().await {
|
match res.json::<Value>().await {
|
||||||
Ok(info) => {
|
Ok(info) => {
|
||||||
// Extract internal_states from each decode server
|
// The decode server should already return the proper format
|
||||||
if let Some(states) = info.get("internal_states") {
|
// with tokenizer_path and other fields that bench_one_batch_server.py expects
|
||||||
if let Some(states_array) = states.as_array() {
|
HttpResponse::Ok().json(info)
|
||||||
all_internal_states.extend(states_array.clone());
|
}
|
||||||
}
|
Err(e) => {
|
||||||
}
|
error!("Failed to parse server info: {}", e);
|
||||||
decode_infos.push(info);
|
HttpResponse::InternalServerError()
|
||||||
|
.body(format!("Failed to parse server info: {}", e))
|
||||||
}
|
}
|
||||||
Err(e) => error!("Failed to parse server info: {}", e),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => {}
|
Ok(res) => {
|
||||||
|
let status = actix_web::http::StatusCode::from_u16(res.status().as_u16())
|
||||||
|
.unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR);
|
||||||
|
HttpResponse::build(status)
|
||||||
|
.body(format!("Decode server returned status: {}", res.status()))
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Failed to get server info: {}", e);
|
||||||
|
HttpResponse::InternalServerError()
|
||||||
|
.body(format!("Failed to get server info: {}", e))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// If we have internal states, return in the format expected by bench_one_batch_server.py
|
|
||||||
if !all_internal_states.is_empty() {
|
|
||||||
// Use the first decode server's internal state (they should all be similar)
|
|
||||||
HttpResponse::Ok().json(serde_json::json!({
|
|
||||||
"internal_states": all_internal_states,
|
|
||||||
// Include original format for compatibility
|
|
||||||
"decode_servers": decode_infos,
|
|
||||||
}))
|
|
||||||
} else {
|
} else {
|
||||||
// Fallback: create a dummy internal_states entry
|
HttpResponse::ServiceUnavailable().body("No decode servers available")
|
||||||
HttpResponse::Ok().json(serde_json::json!({
|
|
||||||
"internal_states": [{
|
|
||||||
"last_gen_throughput": 0.0,
|
|
||||||
"avg_spec_accept_length": null,
|
|
||||||
}],
|
|
||||||
"decode_servers": decode_infos,
|
|
||||||
}))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user