[router] improve router logs and request id header (#8415)
This commit is contained in:
@@ -5,6 +5,7 @@ use super::pd_types::{api_path, Bootstrap, ChatReqInput, GenerateReqInput, PDRou
|
||||
use super::request_adapter::ToPdRequest;
|
||||
use crate::core::{HealthChecker, Worker, WorkerFactory, WorkerLoadGuard};
|
||||
use crate::metrics::RouterMetrics;
|
||||
use crate::middleware::get_request_id;
|
||||
use crate::openai_api_types::{ChatCompletionRequest, CompletionRequest, GenerateRequest};
|
||||
use crate::policies::LoadBalancingPolicy;
|
||||
use crate::tree::Tree;
|
||||
@@ -16,7 +17,6 @@ use std::collections::HashMap;
|
||||
use std::sync::{Arc, Mutex, RwLock};
|
||||
use std::time::{Duration, Instant};
|
||||
use tracing::{debug, error, info, warn};
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PDRouter {
|
||||
@@ -307,8 +307,8 @@ impl PDRouter {
|
||||
mut typed_req: GenerateReqInput,
|
||||
route: &str,
|
||||
) -> HttpResponse {
|
||||
let request_id = get_request_id(req);
|
||||
let start = Instant::now();
|
||||
let _request_id = Uuid::new_v4();
|
||||
|
||||
// Get stream flag and return_logprob flag before moving the request
|
||||
let is_stream = typed_req.stream;
|
||||
@@ -328,7 +328,10 @@ impl PDRouter {
|
||||
let (prefill, decode) = match self.select_pd_pair(client, request_text).await {
|
||||
Ok(pair) => pair,
|
||||
Err(e) => {
|
||||
error!("Failed to select PD pair: {}", e);
|
||||
error!(
|
||||
request_id = %request_id,
|
||||
"Failed to select PD pair error={}", e
|
||||
);
|
||||
RouterMetrics::record_pd_error("server_selection");
|
||||
return HttpResponse::ServiceUnavailable()
|
||||
.body(format!("No available servers: {}", e));
|
||||
@@ -337,15 +340,17 @@ impl PDRouter {
|
||||
|
||||
// Log routing decision
|
||||
info!(
|
||||
"PD routing: {} -> prefill={}, decode={}",
|
||||
route,
|
||||
prefill.url(),
|
||||
decode.url()
|
||||
request_id = %request_id,
|
||||
"PD routing decision route={} prefill_url={} decode_url={}",
|
||||
route, prefill.url(), decode.url()
|
||||
);
|
||||
|
||||
// Add bootstrap info using the trait method
|
||||
if let Err(e) = typed_req.add_bootstrap_info(prefill.as_ref()) {
|
||||
error!("Failed to add bootstrap info: {}", e);
|
||||
error!(
|
||||
request_id = %request_id,
|
||||
"Failed to add bootstrap info error={}", e
|
||||
);
|
||||
RouterMetrics::record_pd_error("bootstrap_injection");
|
||||
return HttpResponse::InternalServerError()
|
||||
.body(format!("Bootstrap injection failed: {}", e));
|
||||
@@ -355,7 +360,10 @@ impl PDRouter {
|
||||
let json_with_bootstrap = match serde_json::to_value(&typed_req) {
|
||||
Ok(json) => json,
|
||||
Err(e) => {
|
||||
error!("Failed to serialize request: {}", e);
|
||||
error!(
|
||||
request_id = %request_id,
|
||||
"Failed to serialize request error={}", e
|
||||
);
|
||||
return HttpResponse::InternalServerError().body("Failed to serialize request");
|
||||
}
|
||||
};
|
||||
@@ -383,6 +391,7 @@ impl PDRouter {
|
||||
mut typed_req: ChatReqInput,
|
||||
route: &str,
|
||||
) -> HttpResponse {
|
||||
let request_id = get_request_id(req);
|
||||
let start = Instant::now();
|
||||
|
||||
// Get stream flag and return_logprob flag before moving the request
|
||||
@@ -406,7 +415,10 @@ impl PDRouter {
|
||||
let (prefill, decode) = match self.select_pd_pair(client, request_text).await {
|
||||
Ok(pair) => pair,
|
||||
Err(e) => {
|
||||
error!("Failed to select PD pair: {}", e);
|
||||
error!(
|
||||
request_id = %request_id,
|
||||
"Failed to select PD pair error={}", e
|
||||
);
|
||||
RouterMetrics::record_pd_error("server_selection");
|
||||
return HttpResponse::ServiceUnavailable()
|
||||
.body(format!("No available servers: {}", e));
|
||||
@@ -415,15 +427,17 @@ impl PDRouter {
|
||||
|
||||
// Log routing decision
|
||||
info!(
|
||||
"PD routing: {} -> prefill={}, decode={}",
|
||||
route,
|
||||
prefill.url(),
|
||||
decode.url()
|
||||
request_id = %request_id,
|
||||
"PD routing decision route={} prefill_url={} decode_url={}",
|
||||
route, prefill.url(), decode.url()
|
||||
);
|
||||
|
||||
// Add bootstrap info using the trait method
|
||||
if let Err(e) = typed_req.add_bootstrap_info(prefill.as_ref()) {
|
||||
error!("Failed to add bootstrap info: {}", e);
|
||||
error!(
|
||||
request_id = %request_id,
|
||||
"Failed to add bootstrap info error={}", e
|
||||
);
|
||||
RouterMetrics::record_pd_error("bootstrap_injection");
|
||||
return HttpResponse::InternalServerError()
|
||||
.body(format!("Bootstrap injection failed: {}", e));
|
||||
@@ -433,7 +447,10 @@ impl PDRouter {
|
||||
let json_with_bootstrap = match serde_json::to_value(&typed_req) {
|
||||
Ok(json) => json,
|
||||
Err(e) => {
|
||||
error!("Failed to serialize request: {}", e);
|
||||
error!(
|
||||
request_id = %request_id,
|
||||
"Failed to serialize request error={}", e
|
||||
);
|
||||
return HttpResponse::InternalServerError().body("Failed to serialize request");
|
||||
}
|
||||
};
|
||||
@@ -461,6 +478,7 @@ impl PDRouter {
|
||||
mut typed_req: CompletionRequest,
|
||||
route: &str,
|
||||
) -> HttpResponse {
|
||||
let request_id = get_request_id(req);
|
||||
let start = Instant::now();
|
||||
|
||||
// Get stream flag and return_logprob flag before moving the request
|
||||
@@ -477,7 +495,10 @@ impl PDRouter {
|
||||
let (prefill, decode) = match self.select_pd_pair(client, request_text).await {
|
||||
Ok(pair) => pair,
|
||||
Err(e) => {
|
||||
error!("Failed to select PD pair: {}", e);
|
||||
error!(
|
||||
request_id = %request_id,
|
||||
"Failed to select PD pair error={}", e
|
||||
);
|
||||
RouterMetrics::record_pd_error("server_selection");
|
||||
return HttpResponse::ServiceUnavailable()
|
||||
.body(format!("No available servers: {}", e));
|
||||
@@ -486,15 +507,17 @@ impl PDRouter {
|
||||
|
||||
// Log routing decision
|
||||
info!(
|
||||
"PD routing: {} -> prefill={}, decode={}",
|
||||
route,
|
||||
prefill.url(),
|
||||
decode.url()
|
||||
request_id = %request_id,
|
||||
"PD routing decision route={} prefill_url={} decode_url={}",
|
||||
route, prefill.url(), decode.url()
|
||||
);
|
||||
|
||||
// Add bootstrap info using the trait method
|
||||
if let Err(e) = typed_req.add_bootstrap_info(prefill.as_ref()) {
|
||||
error!("Failed to add bootstrap info: {}", e);
|
||||
error!(
|
||||
request_id = %request_id,
|
||||
"Failed to add bootstrap info error={}", e
|
||||
);
|
||||
RouterMetrics::record_pd_error("bootstrap_injection");
|
||||
return HttpResponse::InternalServerError()
|
||||
.body(format!("Bootstrap injection failed: {}", e));
|
||||
@@ -504,7 +527,10 @@ impl PDRouter {
|
||||
let json_with_bootstrap = match serde_json::to_value(&typed_req) {
|
||||
Ok(json) => json,
|
||||
Err(e) => {
|
||||
error!("Failed to serialize request: {}", e);
|
||||
error!(
|
||||
request_id = %request_id,
|
||||
"Failed to serialize request error={}", e
|
||||
);
|
||||
return HttpResponse::InternalServerError().body("Failed to serialize request");
|
||||
}
|
||||
};
|
||||
@@ -538,6 +564,7 @@ impl PDRouter {
|
||||
return_logprob: bool,
|
||||
start_time: Instant,
|
||||
) -> HttpResponse {
|
||||
let request_id = get_request_id(req);
|
||||
// Update load tracking for both workers
|
||||
let _guard = WorkerLoadGuard::new_multi(vec![prefill, decode]);
|
||||
|
||||
@@ -578,9 +605,9 @@ impl PDRouter {
|
||||
if !status.is_success() {
|
||||
RouterMetrics::record_pd_decode_error(decode.url());
|
||||
error!(
|
||||
"Decode server {} returned error status: {}",
|
||||
decode.url(),
|
||||
status
|
||||
request_id = %request_id,
|
||||
"Decode server returned error status decode_url={} status={}",
|
||||
decode.url(), status
|
||||
);
|
||||
|
||||
// Return the error response from decode server
|
||||
@@ -598,9 +625,9 @@ impl PDRouter {
|
||||
// Log prefill errors for debugging
|
||||
if let Err(e) = &prefill_result {
|
||||
error!(
|
||||
"Prefill server {} failed (non-critical): {}",
|
||||
prefill.url(),
|
||||
e
|
||||
request_id = %request_id,
|
||||
"Prefill server failed (non-critical) prefill_url={} error={}",
|
||||
prefill.url(), e
|
||||
);
|
||||
RouterMetrics::record_pd_prefill_error(prefill.url());
|
||||
}
|
||||
@@ -684,7 +711,12 @@ impl PDRouter {
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Decode request failed: {}", e);
|
||||
error!(
|
||||
request_id = %request_id,
|
||||
decode_url = %decode.url(),
|
||||
error = %e,
|
||||
"Decode request failed"
|
||||
);
|
||||
RouterMetrics::record_pd_decode_error(decode.url());
|
||||
HttpResponse::BadGateway().body(format!("Decode server error: {}", e))
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use crate::core::{HealthChecker, Worker, WorkerFactory};
|
||||
use crate::metrics::RouterMetrics;
|
||||
use crate::middleware::get_request_id;
|
||||
use crate::policies::LoadBalancingPolicy;
|
||||
use actix_web::http::header::{HeaderValue, CONTENT_TYPE};
|
||||
use actix_web::{HttpRequest, HttpResponse};
|
||||
@@ -134,32 +135,26 @@ impl Router {
|
||||
match sync_client.get(&format!("{}/health", url)).send() {
|
||||
Ok(res) => {
|
||||
if !res.status().is_success() {
|
||||
let msg = format!(
|
||||
"Worker heatlh check is pending with status {}",
|
||||
res.status()
|
||||
);
|
||||
info!("{}", msg);
|
||||
all_healthy = false;
|
||||
unhealthy_workers.push((url, msg));
|
||||
unhealthy_workers.push((url, format!("status: {}", res.status())));
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
let msg = format!("Worker is not ready yet");
|
||||
info!("{}", msg);
|
||||
all_healthy = false;
|
||||
unhealthy_workers.push((url, msg));
|
||||
unhealthy_workers.push((url, "not ready".to_string()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if all_healthy {
|
||||
info!("All workers are healthy");
|
||||
info!("All {} workers are healthy", worker_urls.len());
|
||||
return Ok(());
|
||||
} else {
|
||||
info!("Initializing workers:");
|
||||
for (url, reason) in &unhealthy_workers {
|
||||
info!(" {} - {}", url, reason);
|
||||
}
|
||||
debug!(
|
||||
"Waiting for {} workers to become healthy ({} unhealthy)",
|
||||
worker_urls.len(),
|
||||
unhealthy_workers.len()
|
||||
);
|
||||
thread::sleep(Duration::from_secs(interval_secs));
|
||||
}
|
||||
}
|
||||
@@ -181,6 +176,7 @@ impl Router {
|
||||
route: &str,
|
||||
req: &HttpRequest,
|
||||
) -> HttpResponse {
|
||||
let request_id = get_request_id(req);
|
||||
let start = Instant::now();
|
||||
let mut request_builder = client.get(format!("{}{}", worker_url, route));
|
||||
|
||||
@@ -202,14 +198,32 @@ impl Router {
|
||||
|
||||
match res.bytes().await {
|
||||
Ok(body) => HttpResponse::build(status).body(body.to_vec()),
|
||||
Err(e) => HttpResponse::InternalServerError()
|
||||
.body(format!("Failed to read response body: {}", e)),
|
||||
Err(e) => {
|
||||
error!(
|
||||
request_id = %request_id,
|
||||
worker_url = %worker_url,
|
||||
route = %route,
|
||||
error = %e,
|
||||
"Failed to read response body"
|
||||
);
|
||||
HttpResponse::InternalServerError()
|
||||
.body(format!("Failed to read response body: {}", e))
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => HttpResponse::InternalServerError().body(format!(
|
||||
"Failed to send request to worker {}: {}",
|
||||
worker_url, e
|
||||
)),
|
||||
Err(e) => {
|
||||
error!(
|
||||
request_id = %request_id,
|
||||
worker_url = %worker_url,
|
||||
route = %route,
|
||||
error = %e,
|
||||
"Failed to send request to worker"
|
||||
);
|
||||
HttpResponse::InternalServerError().body(format!(
|
||||
"Failed to send request to worker {}: {}",
|
||||
worker_url, e
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
// Record request metrics
|
||||
@@ -231,6 +245,7 @@ impl Router {
|
||||
route: &str,
|
||||
req: &HttpRequest,
|
||||
) -> HttpResponse {
|
||||
let request_id = get_request_id(req);
|
||||
const MAX_REQUEST_RETRIES: u32 = 3;
|
||||
const MAX_TOTAL_RETRIES: u32 = 6;
|
||||
let mut total_retries = 0;
|
||||
@@ -260,17 +275,23 @@ impl Router {
|
||||
}
|
||||
|
||||
warn!(
|
||||
"Request to {} failed (attempt {}/{})",
|
||||
worker_url,
|
||||
request_retries + 1,
|
||||
MAX_REQUEST_RETRIES
|
||||
request_id = %request_id,
|
||||
route = %route,
|
||||
worker_url = %worker_url,
|
||||
attempt = request_retries + 1,
|
||||
max_attempts = MAX_REQUEST_RETRIES,
|
||||
"Request failed"
|
||||
);
|
||||
|
||||
request_retries += 1;
|
||||
total_retries += 1;
|
||||
|
||||
if request_retries == MAX_REQUEST_RETRIES {
|
||||
warn!("Removing failed worker: {}", worker_url);
|
||||
warn!(
|
||||
request_id = %request_id,
|
||||
worker_url = %worker_url,
|
||||
"Removing failed worker"
|
||||
);
|
||||
self.remove_worker(&worker_url);
|
||||
break;
|
||||
}
|
||||
@@ -293,6 +314,7 @@ impl Router {
|
||||
typed_req: &T,
|
||||
route: &str,
|
||||
) -> HttpResponse {
|
||||
let request_id = get_request_id(req);
|
||||
// Handle retries like the original implementation
|
||||
let start = Instant::now();
|
||||
const MAX_REQUEST_RETRIES: u32 = 3;
|
||||
@@ -357,17 +379,19 @@ impl Router {
|
||||
}
|
||||
|
||||
warn!(
|
||||
"Generate request to {} failed (attempt {}/{})",
|
||||
worker_url,
|
||||
request_retries + 1,
|
||||
MAX_REQUEST_RETRIES
|
||||
request_id = %request_id,
|
||||
"Generate request failed route={} worker_url={} attempt={} max_attempts={}",
|
||||
route, worker_url, request_retries + 1, MAX_REQUEST_RETRIES
|
||||
);
|
||||
|
||||
request_retries += 1;
|
||||
total_retries += 1;
|
||||
|
||||
if request_retries == MAX_REQUEST_RETRIES {
|
||||
warn!("Removing failed worker: {}", worker_url);
|
||||
warn!(
|
||||
request_id = %request_id,
|
||||
"Removing failed worker after typed request failures worker_url={}", worker_url
|
||||
);
|
||||
self.remove_worker(&worker_url);
|
||||
break;
|
||||
}
|
||||
@@ -402,13 +426,9 @@ impl Router {
|
||||
is_stream: bool,
|
||||
load_incremented: bool, // Whether load was incremented for this request
|
||||
) -> HttpResponse {
|
||||
let request_id = get_request_id(req);
|
||||
let start = Instant::now();
|
||||
|
||||
// Debug: Log what we're sending
|
||||
if let Ok(json_str) = serde_json::to_string_pretty(typed_req) {
|
||||
debug!("Sending request to {}: {}", route, json_str);
|
||||
}
|
||||
|
||||
let mut request_builder = client
|
||||
.post(format!("{}{}", worker_url, route))
|
||||
.json(typed_req); // Use json() directly with typed request
|
||||
@@ -424,7 +444,11 @@ impl Router {
|
||||
let res = match request_builder.send().await {
|
||||
Ok(res) => res,
|
||||
Err(e) => {
|
||||
error!("Failed to send request to {}: {}", worker_url, e);
|
||||
error!(
|
||||
request_id = %request_id,
|
||||
"Failed to send typed request worker_url={} route={} error={}",
|
||||
worker_url, route, e
|
||||
);
|
||||
|
||||
// Decrement load on error if it was incremented
|
||||
if load_incremented {
|
||||
@@ -497,7 +521,6 @@ impl Router {
|
||||
&worker_url,
|
||||
worker.load(),
|
||||
);
|
||||
debug!("Streaming is done!!")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -536,7 +559,6 @@ impl Router {
|
||||
match client.get(&format!("{}/health", worker_url)).send().await {
|
||||
Ok(res) => {
|
||||
if res.status().is_success() {
|
||||
info!("Worker {} health check passed", worker_url);
|
||||
let mut workers_guard = self.workers.write().unwrap();
|
||||
if workers_guard.iter().any(|w| w.url() == worker_url) {
|
||||
return Err(format!("Worker {} already exists", worker_url));
|
||||
@@ -560,8 +582,8 @@ impl Router {
|
||||
|
||||
return Ok(format!("Successfully added worker: {}", worker_url));
|
||||
} else {
|
||||
info!(
|
||||
"Worker {} health check is pending with status: {}.",
|
||||
debug!(
|
||||
"Worker {} health check pending - status: {}",
|
||||
worker_url,
|
||||
res.status()
|
||||
);
|
||||
@@ -576,10 +598,7 @@ impl Router {
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
info!(
|
||||
"Worker {} health check is pending with error: {}",
|
||||
worker_url, e
|
||||
);
|
||||
debug!("Worker {} health check pending - error: {}", worker_url, e);
|
||||
|
||||
// if the url does not have http or https prefix, warn users
|
||||
if !worker_url.starts_with("http://") && !worker_url.starts_with("https://") {
|
||||
@@ -611,7 +630,6 @@ impl Router {
|
||||
.downcast_ref::<crate::policies::CacheAwarePolicy>()
|
||||
{
|
||||
cache_aware.remove_worker(worker_url);
|
||||
info!("Removed worker from tree: {}", worker_url);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -675,7 +693,6 @@ impl Router {
|
||||
for url in &worker_urls {
|
||||
if let Some(load) = Self::get_worker_load_static(&client, url).await {
|
||||
loads.insert(url.clone(), load);
|
||||
debug!("Worker {} load: {}", url, load);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user