[router] fix radix tree integration issues in PD router (#8982)

This commit is contained in:
Simo Lin
2025-08-08 14:47:51 -07:00
committed by GitHub
parent 1a8706c8b9
commit 7b7e56150e
2 changed files with 76 additions and 165 deletions

View File

@@ -112,7 +112,7 @@ impl CacheAwarePolicy {
}
}
/// Initialize the tree with worker URLs
/// Initialize the tree with worker URLs (used only during initial setup)
pub fn init_workers(&self, workers: &[Box<dyn Worker>]) {
if let Ok(tree) = self.tree.lock() {
for worker in workers {
@@ -121,6 +121,13 @@ impl CacheAwarePolicy {
}
}
/// Add a single worker to the tree (incremental update)
pub fn add_worker(&self, url: &str) {
if let Ok(tree) = self.tree.lock() {
tree.insert("", url);
}
}
/// Remove a worker from the tree
pub fn remove_worker(&self, url: &str) {
if let Ok(tree) = self.tree.lock() {
@@ -178,6 +185,13 @@ impl LoadBalancingPolicy for CacheAwarePolicy {
.min_by_key(|&&idx| workers[idx].load())
.copied()?;
// Even in imbalanced mode, update the tree to maintain cache state
if let Some(text) = request_text {
if let Ok(tree) = self.tree.lock() {
tree.insert(text, workers[min_load_idx].url());
}
}
// Increment processed counter
workers[min_load_idx].increment_processed();
RouterMetrics::record_processed_request(workers[min_load_idx].url());
@@ -206,21 +220,26 @@ impl LoadBalancingPolicy for CacheAwarePolicy {
};
// Find the index of the selected worker
let selected_idx = workers.iter().position(|w| w.url() == selected_url)?;
if let Some(selected_idx) = workers.iter().position(|w| w.url() == selected_url) {
// Only proceed if the worker is healthy
if workers[selected_idx].is_healthy() {
// Update the tree with this request
tree.insert(text, &selected_url);
// Only proceed if the worker is healthy
if !workers[selected_idx].is_healthy() {
return healthy_indices.first().copied();
// Increment processed counter
workers[selected_idx].increment_processed();
RouterMetrics::record_processed_request(&selected_url);
return Some(selected_idx);
}
} else {
// Selected worker no longer exists, remove it from tree
tree.remove_tenant(&selected_url);
debug!("Removed stale worker {} from cache tree", selected_url);
}
// Update the tree with this request
tree.insert(text, &selected_url);
// Increment processed counter
workers[selected_idx].increment_processed();
RouterMetrics::record_processed_request(&selected_url);
return Some(selected_idx);
// Fallback to first healthy worker
return healthy_indices.first().copied();
}
// Fallback to first healthy worker if tree operations fail