[router] address worker load tracking consistency (#9523)
Co-authored-by: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com>
This commit is contained in:
@@ -490,6 +490,13 @@ impl Router {
|
||||
false
|
||||
};
|
||||
|
||||
// Keep a clone for potential cleanup on retry
|
||||
let worker_for_cleanup = if load_incremented {
|
||||
Some(worker.clone_worker())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let response = self
|
||||
.send_typed_request(
|
||||
headers,
|
||||
@@ -502,6 +509,19 @@ impl Router {
|
||||
.await;
|
||||
|
||||
worker.record_outcome(response.status().is_success());
|
||||
|
||||
// For retryable failures, we need to decrement load since send_typed_request
|
||||
// won't have done it (it only decrements on success or non-retryable failures)
|
||||
if is_retryable_status(response.status()) && load_incremented {
|
||||
if let Some(cleanup_worker) = worker_for_cleanup {
|
||||
cleanup_worker.decrement_load();
|
||||
RouterMetrics::set_running_requests(
|
||||
cleanup_worker.url(),
|
||||
cleanup_worker.load(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
response
|
||||
},
|
||||
// should_retry predicate
|
||||
@@ -657,13 +677,25 @@ impl Router {
|
||||
response
|
||||
}
|
||||
Err(e) => {
|
||||
// IMPORTANT: Decrement load on error before returning
|
||||
if load_incremented {
|
||||
if let Ok(workers_guard) = self.workers.read() {
|
||||
if let Some(worker) =
|
||||
workers_guard.iter().find(|w| w.url() == worker_url)
|
||||
{
|
||||
worker.decrement_load();
|
||||
RouterMetrics::set_running_requests(worker_url, worker.load());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let error_msg = format!("Failed to get response body: {}", e);
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, error_msg).into_response()
|
||||
}
|
||||
};
|
||||
|
||||
// Decrement load counter for non-streaming requests if it was incremented
|
||||
if load_incremented && !is_stream {
|
||||
if load_incremented {
|
||||
if let Ok(workers_guard) = self.workers.read() {
|
||||
if let Some(worker) = workers_guard.iter().find(|w| w.url() == worker_url) {
|
||||
worker.decrement_load();
|
||||
|
||||
Reference in New Issue
Block a user