[router] allow one router to support different model families and serving mode (#10244)

This commit is contained in:
Simo Lin
2025-09-12 19:18:27 -04:00
committed by GitHub
parent 321fecab74
commit 2f173ea074
28 changed files with 3528 additions and 837 deletions

View File

@@ -5,17 +5,20 @@
use crate::core::Worker;
use std::fmt::Debug;
use std::sync::Arc;
mod cache_aware;
mod factory;
mod power_of_two;
mod random;
mod registry;
mod round_robin;
pub use cache_aware::CacheAwarePolicy;
pub use factory::PolicyFactory;
pub use power_of_two::PowerOfTwoPolicy;
pub use random::RandomPolicy;
pub use registry::PolicyRegistry;
pub use round_robin::RoundRobinPolicy;
/// Core trait for load balancing policies
@@ -26,9 +29,10 @@ pub trait LoadBalancingPolicy: Send + Sync + Debug {
/// Select a single worker from the available workers
///
/// This is used for regular routing mode where requests go to a single worker.
/// Now uses Arc<dyn Worker> for better performance and to avoid unnecessary cloning.
fn select_worker(
&self,
workers: &[Box<dyn Worker>],
workers: &[Arc<dyn Worker>],
request_text: Option<&str>,
) -> Option<usize>;
@@ -38,8 +42,8 @@ pub trait LoadBalancingPolicy: Send + Sync + Debug {
/// Default implementation uses select_worker for each array independently.
fn select_worker_pair(
&self,
prefill_workers: &[Box<dyn Worker>],
decode_workers: &[Box<dyn Worker>],
prefill_workers: &[Arc<dyn Worker>],
decode_workers: &[Arc<dyn Worker>],
request_text: Option<&str>,
) -> Option<(usize, usize)> {
// Default implementation: independently select from each pool
@@ -105,7 +109,7 @@ impl Default for CacheAwareConfig {
}
/// Helper function to filter healthy workers and return their indices
pub(crate) fn get_healthy_worker_indices(workers: &[Box<dyn Worker>]) -> Vec<usize> {
pub(crate) fn get_healthy_worker_indices(workers: &[Arc<dyn Worker>]) -> Vec<usize> {
workers
.iter()
.enumerate()
@@ -121,16 +125,16 @@ mod tests {
#[test]
fn test_get_healthy_worker_indices() {
let workers: Vec<Box<dyn Worker>> = vec![
Box::new(BasicWorker::new(
let workers: Vec<Arc<dyn Worker>> = vec![
Arc::new(BasicWorker::new(
"http://w1:8000".to_string(),
WorkerType::Regular,
)),
Box::new(BasicWorker::new(
Arc::new(BasicWorker::new(
"http://w2:8000".to_string(),
WorkerType::Regular,
)),
Box::new(BasicWorker::new(
Arc::new(BasicWorker::new(
"http://w3:8000".to_string(),
WorkerType::Regular,
)),