[router] consolidate worker load monitoring (#10894)

This commit is contained in:
Simo Lin
2025-09-25 09:59:30 -04:00
committed by GitHub
parent 77830a265e
commit d511b2d905
7 changed files with 199 additions and 232 deletions

View File

@@ -257,6 +257,47 @@ impl PolicyRegistry {
.unwrap_or_else(|| self.get_default_policy())
}
/// Get all PowerOfTwo policies that need load updates
pub fn get_all_power_of_two_policies(&self) -> Vec<Arc<dyn LoadBalancingPolicy>> {
let mut power_of_two_policies = Vec::new();
if self.default_policy.name() == "power_of_two" {
power_of_two_policies.push(Arc::clone(&self.default_policy));
}
if let Some(ref policy) = *self.prefill_policy.read().unwrap() {
if policy.name() == "power_of_two" && !Arc::ptr_eq(policy, &self.default_policy) {
power_of_two_policies.push(Arc::clone(policy));
}
}
if let Some(ref policy) = *self.decode_policy.read().unwrap() {
if policy.name() == "power_of_two"
&& !Arc::ptr_eq(policy, &self.default_policy)
&& !self
.prefill_policy
.read()
.unwrap()
.as_ref()
.is_some_and(|p| Arc::ptr_eq(p, policy))
{
power_of_two_policies.push(Arc::clone(policy));
}
}
let model_policies = self.model_policies.read().unwrap();
for policy in model_policies.values() {
if policy.name() == "power_of_two" {
let already_added = power_of_two_policies.iter().any(|p| Arc::ptr_eq(p, policy));
if !already_added {
power_of_two_policies.push(Arc::clone(policy));
}
}
}
power_of_two_policies
}
/// Initialize cache-aware policy with workers if applicable
/// This should be called after workers are registered for a model
pub fn init_cache_aware_policy(&self, model_id: &str, workers: &[Arc<dyn Worker>]) {