[pd-router] Add Configurable Retry Logic for reduce backend pressure (#8744)

This commit is contained in:
Simo Lin
2025-08-04 20:42:07 -07:00
committed by GitHub
parent d98a4913ea
commit 354ac43555
10 changed files with 502 additions and 293 deletions

View File

@@ -19,7 +19,7 @@ pub enum PolicyType {
Random,
RoundRobin,
CacheAware,
PowerOfTwo, // Moved from PD-specific, now shared
PowerOfTwo,
}
#[pyclass]
@@ -45,7 +45,6 @@ struct Router {
selector: HashMap<String, String>,
service_discovery_port: u16,
service_discovery_namespace: Option<String>,
// PD service discovery fields
prefill_selector: HashMap<String, String>,
decode_selector: HashMap<String, String>,
bootstrap_port_annotation: String,
@@ -53,14 +52,11 @@ struct Router {
prometheus_host: Option<String>,
request_timeout_secs: u64,
request_id_headers: Option<Vec<String>>,
// PD mode flag
pd_disaggregation: bool,
// PD-specific fields (only used when pd_disaggregation is true)
prefill_urls: Option<Vec<(String, Option<u16>)>>,
decode_urls: Option<Vec<String>>,
prefill_policy: Option<PolicyType>,
decode_policy: Option<PolicyType>,
// Additional server config fields
max_concurrent_requests: usize,
cors_allowed_origins: Vec<String>,
}
@@ -150,6 +146,7 @@ impl Router {
request_id_headers: self.request_id_headers.clone(),
max_concurrent_requests: self.max_concurrent_requests,
cors_allowed_origins: self.cors_allowed_origins.clone(),
retry: config::RetryConfig::default(),
})
}
}
@@ -289,7 +286,6 @@ impl Router {
check_interval: std::time::Duration::from_secs(60),
port: self.service_discovery_port,
namespace: self.service_discovery_namespace.clone(),
// PD mode configuration
pd_mode: self.pd_disaggregation,
prefill_selector: self.prefill_selector.clone(),
decode_selector: self.decode_selector.clone(),