[router] refactor worker to builder pattern 3/n (#10647)
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
|
||||
use crate::config::types::RetryConfig;
|
||||
use crate::core::{
|
||||
BasicWorker, CircuitBreakerConfig, HealthChecker, HealthConfig, Worker, WorkerType,
|
||||
BasicWorkerBuilder, CircuitBreakerConfig, HealthChecker, HealthConfig, Worker, WorkerType,
|
||||
};
|
||||
use crate::grpc::SglangSchedulerClient;
|
||||
use crate::metrics::RouterMetrics;
|
||||
@@ -130,23 +130,22 @@ impl GrpcPDRouter {
|
||||
let prefill_workers: Vec<Arc<dyn Worker>> = prefill_urls
|
||||
.iter()
|
||||
.map(|(url, bootstrap_port)| {
|
||||
let worker = BasicWorker::with_connection_mode(
|
||||
url.clone(),
|
||||
WorkerType::Prefill {
|
||||
let worker = BasicWorkerBuilder::new(url.clone())
|
||||
.worker_type(WorkerType::Prefill {
|
||||
bootstrap_port: *bootstrap_port,
|
||||
},
|
||||
crate::core::ConnectionMode::Grpc {
|
||||
})
|
||||
.connection_mode(crate::core::ConnectionMode::Grpc {
|
||||
port: *bootstrap_port,
|
||||
},
|
||||
)
|
||||
.with_circuit_breaker_config(core_cb_config.clone())
|
||||
.with_health_config(HealthConfig {
|
||||
timeout_secs: ctx.router_config.health_check.timeout_secs,
|
||||
check_interval_secs: ctx.router_config.health_check.check_interval_secs,
|
||||
endpoint: ctx.router_config.health_check.endpoint.clone(),
|
||||
failure_threshold: ctx.router_config.health_check.failure_threshold,
|
||||
success_threshold: ctx.router_config.health_check.success_threshold,
|
||||
});
|
||||
})
|
||||
.circuit_breaker_config(core_cb_config.clone())
|
||||
.health_config(HealthConfig {
|
||||
timeout_secs: ctx.router_config.health_check.timeout_secs,
|
||||
check_interval_secs: ctx.router_config.health_check.check_interval_secs,
|
||||
endpoint: ctx.router_config.health_check.endpoint.clone(),
|
||||
failure_threshold: ctx.router_config.health_check.failure_threshold,
|
||||
success_threshold: ctx.router_config.health_check.success_threshold,
|
||||
})
|
||||
.build();
|
||||
Arc::new(worker) as Arc<dyn Worker>
|
||||
})
|
||||
.collect();
|
||||
@@ -155,19 +154,18 @@ impl GrpcPDRouter {
|
||||
let decode_workers: Vec<Arc<dyn Worker>> = decode_urls
|
||||
.iter()
|
||||
.map(|url| {
|
||||
let worker = BasicWorker::with_connection_mode(
|
||||
url.clone(),
|
||||
WorkerType::Decode,
|
||||
crate::core::ConnectionMode::Grpc { port: None },
|
||||
)
|
||||
.with_circuit_breaker_config(core_cb_config.clone())
|
||||
.with_health_config(HealthConfig {
|
||||
timeout_secs: ctx.router_config.health_check.timeout_secs,
|
||||
check_interval_secs: ctx.router_config.health_check.check_interval_secs,
|
||||
endpoint: ctx.router_config.health_check.endpoint.clone(),
|
||||
failure_threshold: ctx.router_config.health_check.failure_threshold,
|
||||
success_threshold: ctx.router_config.health_check.success_threshold,
|
||||
});
|
||||
let worker = BasicWorkerBuilder::new(url.clone())
|
||||
.worker_type(WorkerType::Decode)
|
||||
.connection_mode(crate::core::ConnectionMode::Grpc { port: None })
|
||||
.circuit_breaker_config(core_cb_config.clone())
|
||||
.health_config(HealthConfig {
|
||||
timeout_secs: ctx.router_config.health_check.timeout_secs,
|
||||
check_interval_secs: ctx.router_config.health_check.check_interval_secs,
|
||||
endpoint: ctx.router_config.health_check.endpoint.clone(),
|
||||
failure_threshold: ctx.router_config.health_check.failure_threshold,
|
||||
success_threshold: ctx.router_config.health_check.success_threshold,
|
||||
})
|
||||
.build();
|
||||
Arc::new(worker) as Arc<dyn Worker>
|
||||
})
|
||||
.collect();
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
use crate::config::types::RetryConfig;
|
||||
use crate::core::{
|
||||
BasicWorker, CircuitBreakerConfig, HealthChecker, HealthConfig, Worker, WorkerType,
|
||||
BasicWorkerBuilder, CircuitBreakerConfig, HealthChecker, HealthConfig, Worker, WorkerType,
|
||||
};
|
||||
use crate::grpc::SglangSchedulerClient;
|
||||
use crate::metrics::RouterMetrics;
|
||||
@@ -108,20 +108,19 @@ impl GrpcRouter {
|
||||
// Move clients from the HashMap to the workers
|
||||
for url in &worker_urls {
|
||||
if let Some(client) = grpc_clients.remove(url) {
|
||||
let worker = BasicWorker::with_connection_mode(
|
||||
url.clone(),
|
||||
WorkerType::Regular,
|
||||
crate::core::ConnectionMode::Grpc { port: None },
|
||||
)
|
||||
.with_circuit_breaker_config(core_cb_config.clone())
|
||||
.with_health_config(HealthConfig {
|
||||
timeout_secs: ctx.router_config.health_check.timeout_secs,
|
||||
check_interval_secs: ctx.router_config.health_check.check_interval_secs,
|
||||
endpoint: ctx.router_config.health_check.endpoint.clone(),
|
||||
failure_threshold: ctx.router_config.health_check.failure_threshold,
|
||||
success_threshold: ctx.router_config.health_check.success_threshold,
|
||||
})
|
||||
.with_grpc_client(client);
|
||||
let worker = BasicWorkerBuilder::new(url.clone())
|
||||
.worker_type(WorkerType::Regular)
|
||||
.connection_mode(crate::core::ConnectionMode::Grpc { port: None })
|
||||
.circuit_breaker_config(core_cb_config.clone())
|
||||
.health_config(HealthConfig {
|
||||
timeout_secs: ctx.router_config.health_check.timeout_secs,
|
||||
check_interval_secs: ctx.router_config.health_check.check_interval_secs,
|
||||
endpoint: ctx.router_config.health_check.endpoint.clone(),
|
||||
failure_threshold: ctx.router_config.health_check.failure_threshold,
|
||||
success_threshold: ctx.router_config.health_check.success_threshold,
|
||||
})
|
||||
.grpc_client(client)
|
||||
.build();
|
||||
|
||||
workers.push(Arc::new(worker) as Arc<dyn Worker>);
|
||||
} else {
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
use super::pd_types::{api_path, PDRouterError};
|
||||
use crate::config::types::RetryConfig;
|
||||
use crate::core::{
|
||||
is_retryable_status, BasicWorker, CircuitBreakerConfig, HealthConfig, RetryExecutor, Worker,
|
||||
WorkerFactory, WorkerLoadGuard, WorkerRegistry, WorkerType,
|
||||
is_retryable_status, BasicWorkerBuilder, CircuitBreakerConfig, HealthConfig, RetryExecutor,
|
||||
Worker, WorkerFactory, WorkerLoadGuard, WorkerRegistry, WorkerType,
|
||||
};
|
||||
use crate::metrics::RouterMetrics;
|
||||
use crate::policies::{LoadBalancingPolicy, PolicyRegistry};
|
||||
@@ -389,34 +389,35 @@ impl PDRouter {
|
||||
|
||||
// Register prefill workers in the registry
|
||||
for (url, port) in prefill_urls {
|
||||
let worker = BasicWorker::new(
|
||||
url,
|
||||
WorkerType::Prefill {
|
||||
let worker = BasicWorkerBuilder::new(url)
|
||||
.worker_type(WorkerType::Prefill {
|
||||
bootstrap_port: port,
|
||||
},
|
||||
)
|
||||
.with_circuit_breaker_config(core_cb_config.clone())
|
||||
.with_health_config(HealthConfig {
|
||||
timeout_secs: ctx.router_config.health_check.timeout_secs,
|
||||
check_interval_secs: ctx.router_config.health_check.check_interval_secs,
|
||||
endpoint: ctx.router_config.health_check.endpoint.clone(),
|
||||
failure_threshold: ctx.router_config.health_check.failure_threshold,
|
||||
success_threshold: ctx.router_config.health_check.success_threshold,
|
||||
});
|
||||
ctx.worker_registry.register(Arc::new(worker));
|
||||
}
|
||||
|
||||
// Register decode workers in the registry
|
||||
for url in decode_urls {
|
||||
let worker = BasicWorker::new(url, WorkerType::Decode)
|
||||
.with_circuit_breaker_config(core_cb_config.clone())
|
||||
.with_health_config(HealthConfig {
|
||||
})
|
||||
.circuit_breaker_config(core_cb_config.clone())
|
||||
.health_config(HealthConfig {
|
||||
timeout_secs: ctx.router_config.health_check.timeout_secs,
|
||||
check_interval_secs: ctx.router_config.health_check.check_interval_secs,
|
||||
endpoint: ctx.router_config.health_check.endpoint.clone(),
|
||||
failure_threshold: ctx.router_config.health_check.failure_threshold,
|
||||
success_threshold: ctx.router_config.health_check.success_threshold,
|
||||
});
|
||||
})
|
||||
.build();
|
||||
ctx.worker_registry.register(Arc::new(worker));
|
||||
}
|
||||
|
||||
// Register decode workers in the registry
|
||||
for url in decode_urls {
|
||||
let worker = BasicWorkerBuilder::new(url)
|
||||
.worker_type(WorkerType::Decode)
|
||||
.circuit_breaker_config(core_cb_config.clone())
|
||||
.health_config(HealthConfig {
|
||||
timeout_secs: ctx.router_config.health_check.timeout_secs,
|
||||
check_interval_secs: ctx.router_config.health_check.check_interval_secs,
|
||||
endpoint: ctx.router_config.health_check.endpoint.clone(),
|
||||
failure_threshold: ctx.router_config.health_check.failure_threshold,
|
||||
success_threshold: ctx.router_config.health_check.success_threshold,
|
||||
})
|
||||
.build();
|
||||
ctx.worker_registry.register(Arc::new(worker));
|
||||
}
|
||||
|
||||
@@ -2116,7 +2117,7 @@ impl RouterTrait for PDRouter {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::core::{BasicWorker, WorkerType};
|
||||
use crate::core::WorkerType;
|
||||
|
||||
fn create_test_pd_router() -> PDRouter {
|
||||
let worker_registry = Arc::new(WorkerRegistry::new());
|
||||
@@ -2139,7 +2140,9 @@ mod tests {
|
||||
}
|
||||
|
||||
fn create_test_worker(url: String, worker_type: WorkerType, healthy: bool) -> Box<dyn Worker> {
|
||||
let worker = BasicWorker::new(url, worker_type);
|
||||
let worker = BasicWorkerBuilder::new(url)
|
||||
.worker_type(worker_type)
|
||||
.build();
|
||||
worker.set_healthy(healthy);
|
||||
Box::new(worker)
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use crate::config::types::RetryConfig;
|
||||
use crate::core::{
|
||||
is_retryable_status, BasicWorker, CircuitBreakerConfig, HealthConfig, RetryExecutor, Worker,
|
||||
WorkerRegistry, WorkerType,
|
||||
is_retryable_status, BasicWorkerBuilder, CircuitBreakerConfig, HealthConfig, RetryExecutor,
|
||||
Worker, WorkerRegistry, WorkerType,
|
||||
};
|
||||
use crate::metrics::RouterMetrics;
|
||||
use crate::policies::{LoadBalancingPolicy, PolicyRegistry};
|
||||
@@ -87,15 +87,17 @@ impl Router {
|
||||
for url in &worker_urls {
|
||||
// TODO: In IGW mode, fetch model_id from worker's /get_model_info endpoint
|
||||
// For now, create worker without model_id
|
||||
let worker = BasicWorker::new(url.clone(), WorkerType::Regular)
|
||||
.with_circuit_breaker_config(core_cb_config.clone())
|
||||
.with_health_config(HealthConfig {
|
||||
let worker = BasicWorkerBuilder::new(url.clone())
|
||||
.worker_type(WorkerType::Regular)
|
||||
.circuit_breaker_config(core_cb_config.clone())
|
||||
.health_config(HealthConfig {
|
||||
timeout_secs: ctx.router_config.health_check.timeout_secs,
|
||||
check_interval_secs: ctx.router_config.health_check.check_interval_secs,
|
||||
endpoint: ctx.router_config.health_check.endpoint.clone(),
|
||||
failure_threshold: ctx.router_config.health_check.failure_threshold,
|
||||
success_threshold: ctx.router_config.health_check.success_threshold,
|
||||
});
|
||||
})
|
||||
.build();
|
||||
|
||||
let worker_arc = Arc::new(worker);
|
||||
ctx.worker_registry.register(worker_arc.clone());
|
||||
@@ -991,11 +993,10 @@ impl Router {
|
||||
}
|
||||
info!("Added worker: {}", dp_url);
|
||||
// TODO: In IGW mode, fetch model_id from worker's /get_model_info endpoint
|
||||
let new_worker =
|
||||
BasicWorker::new(dp_url.to_string(), WorkerType::Regular)
|
||||
.with_circuit_breaker_config(
|
||||
self.circuit_breaker_config.clone(),
|
||||
);
|
||||
let new_worker = BasicWorkerBuilder::new(dp_url.to_string())
|
||||
.worker_type(WorkerType::Regular)
|
||||
.circuit_breaker_config(self.circuit_breaker_config.clone())
|
||||
.build();
|
||||
|
||||
let worker_arc = Arc::new(new_worker);
|
||||
self.worker_registry.register(worker_arc.clone());
|
||||
@@ -1028,11 +1029,10 @@ impl Router {
|
||||
info!("Added worker: {}", worker_url);
|
||||
|
||||
// TODO: In IGW mode, fetch model_id from worker's /get_model_info endpoint
|
||||
let new_worker =
|
||||
BasicWorker::new(worker_url.to_string(), WorkerType::Regular)
|
||||
.with_circuit_breaker_config(
|
||||
self.circuit_breaker_config.clone(),
|
||||
);
|
||||
let new_worker = BasicWorkerBuilder::new(worker_url.to_string())
|
||||
.worker_type(WorkerType::Regular)
|
||||
.circuit_breaker_config(self.circuit_breaker_config.clone())
|
||||
.build();
|
||||
|
||||
let worker_arc = Arc::new(new_worker);
|
||||
self.worker_registry.register(worker_arc.clone());
|
||||
@@ -1595,8 +1595,12 @@ mod tests {
|
||||
));
|
||||
|
||||
// Register test workers
|
||||
let worker1 = BasicWorker::new("http://worker1:8080".to_string(), WorkerType::Regular);
|
||||
let worker2 = BasicWorker::new("http://worker2:8080".to_string(), WorkerType::Regular);
|
||||
let worker1 = BasicWorkerBuilder::new("http://worker1:8080")
|
||||
.worker_type(WorkerType::Regular)
|
||||
.build();
|
||||
let worker2 = BasicWorkerBuilder::new("http://worker2:8080")
|
||||
.worker_type(WorkerType::Regular)
|
||||
.build();
|
||||
worker_registry.register(Arc::new(worker1));
|
||||
worker_registry.register(Arc::new(worker2));
|
||||
|
||||
|
||||
Reference in New Issue
Block a user