[router] consolidate worker load monitoring (#10894)
This commit is contained in:
@@ -257,6 +257,47 @@ impl PolicyRegistry {
|
||||
.unwrap_or_else(|| self.get_default_policy())
|
||||
}
|
||||
|
||||
/// Get all PowerOfTwo policies that need load updates
|
||||
pub fn get_all_power_of_two_policies(&self) -> Vec<Arc<dyn LoadBalancingPolicy>> {
|
||||
let mut power_of_two_policies = Vec::new();
|
||||
|
||||
if self.default_policy.name() == "power_of_two" {
|
||||
power_of_two_policies.push(Arc::clone(&self.default_policy));
|
||||
}
|
||||
|
||||
if let Some(ref policy) = *self.prefill_policy.read().unwrap() {
|
||||
if policy.name() == "power_of_two" && !Arc::ptr_eq(policy, &self.default_policy) {
|
||||
power_of_two_policies.push(Arc::clone(policy));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ref policy) = *self.decode_policy.read().unwrap() {
|
||||
if policy.name() == "power_of_two"
|
||||
&& !Arc::ptr_eq(policy, &self.default_policy)
|
||||
&& !self
|
||||
.prefill_policy
|
||||
.read()
|
||||
.unwrap()
|
||||
.as_ref()
|
||||
.is_some_and(|p| Arc::ptr_eq(p, policy))
|
||||
{
|
||||
power_of_two_policies.push(Arc::clone(policy));
|
||||
}
|
||||
}
|
||||
|
||||
let model_policies = self.model_policies.read().unwrap();
|
||||
for policy in model_policies.values() {
|
||||
if policy.name() == "power_of_two" {
|
||||
let already_added = power_of_two_policies.iter().any(|p| Arc::ptr_eq(p, policy));
|
||||
if !already_added {
|
||||
power_of_two_policies.push(Arc::clone(policy));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
power_of_two_policies
|
||||
}
|
||||
|
||||
/// Initialize cache-aware policy with workers if applicable
|
||||
/// This should be called after workers are registered for a model
|
||||
pub fn init_cache_aware_policy(&self, model_id: &str, workers: &[Arc<dyn Worker>]) {
|
||||
|
||||
Reference in New Issue
Block a user