[pd-router] Add Configurable Retry Logic for reduce backend pressure (#8744)
This commit is contained in:
@@ -8,7 +8,7 @@ use axum::{
|
||||
use common::mock_worker::{HealthStatus, MockWorker, MockWorkerConfig, WorkerType};
|
||||
use reqwest::Client;
|
||||
use serde_json::json;
|
||||
use sglang_router_rs::config::{PolicyConfig, RouterConfig, RoutingMode};
|
||||
use sglang_router_rs::config::{PolicyConfig, RetryConfig, RouterConfig, RoutingMode};
|
||||
use sglang_router_rs::routers::{RouterFactory, RouterTrait};
|
||||
use std::sync::Arc;
|
||||
use tower::ServiceExt;
|
||||
@@ -44,6 +44,7 @@ impl TestContext {
|
||||
request_id_headers: None,
|
||||
max_concurrent_requests: 64,
|
||||
cors_allowed_origins: vec![],
|
||||
retry: RetryConfig::default(),
|
||||
};
|
||||
|
||||
Self::new_with_config(config, worker_configs).await
|
||||
@@ -1085,6 +1086,7 @@ mod error_tests {
|
||||
request_id_headers: None,
|
||||
max_concurrent_requests: 64,
|
||||
cors_allowed_origins: vec![],
|
||||
retry: RetryConfig::default(),
|
||||
};
|
||||
|
||||
let ctx = TestContext::new_with_config(
|
||||
@@ -1431,6 +1433,7 @@ mod pd_mode_tests {
|
||||
request_id_headers: None,
|
||||
max_concurrent_requests: 64,
|
||||
cors_allowed_origins: vec![],
|
||||
retry: RetryConfig::default(),
|
||||
};
|
||||
|
||||
// Create app context
|
||||
@@ -1584,6 +1587,7 @@ mod request_id_tests {
|
||||
request_id_headers: Some(vec!["custom-id".to_string(), "trace-id".to_string()]),
|
||||
max_concurrent_requests: 64,
|
||||
cors_allowed_origins: vec![],
|
||||
retry: RetryConfig::default(),
|
||||
};
|
||||
|
||||
let ctx = TestContext::new_with_config(
|
||||
|
||||
@@ -3,7 +3,7 @@ mod common;
|
||||
use common::mock_worker::{HealthStatus, MockWorker, MockWorkerConfig, WorkerType};
|
||||
use reqwest::Client;
|
||||
use serde_json::json;
|
||||
use sglang_router_rs::config::{PolicyConfig, RouterConfig, RoutingMode};
|
||||
use sglang_router_rs::config::{PolicyConfig, RetryConfig, RouterConfig, RoutingMode};
|
||||
use sglang_router_rs::routers::{RouterFactory, RouterTrait};
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -35,6 +35,7 @@ impl TestContext {
|
||||
request_id_headers: None,
|
||||
max_concurrent_requests: 64,
|
||||
cors_allowed_origins: vec![],
|
||||
retry: RetryConfig::default(),
|
||||
};
|
||||
|
||||
let mut workers = Vec::new();
|
||||
|
||||
@@ -4,7 +4,7 @@ use common::mock_worker::{HealthStatus, MockWorker, MockWorkerConfig, WorkerType
|
||||
use futures_util::StreamExt;
|
||||
use reqwest::Client;
|
||||
use serde_json::json;
|
||||
use sglang_router_rs::config::{PolicyConfig, RouterConfig, RoutingMode};
|
||||
use sglang_router_rs::config::{PolicyConfig, RetryConfig, RouterConfig, RoutingMode};
|
||||
use sglang_router_rs::routers::{RouterFactory, RouterTrait};
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -36,6 +36,7 @@ impl TestContext {
|
||||
request_id_headers: None,
|
||||
max_concurrent_requests: 64,
|
||||
cors_allowed_origins: vec![],
|
||||
retry: RetryConfig::default(),
|
||||
};
|
||||
|
||||
let mut workers = Vec::new();
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
mod test_pd_routing {
|
||||
use rand::Rng;
|
||||
use serde_json::json;
|
||||
use sglang_router_rs::config::{PolicyConfig, RouterConfig, RoutingMode};
|
||||
use sglang_router_rs::config::{PolicyConfig, RetryConfig, RouterConfig, RoutingMode};
|
||||
use sglang_router_rs::core::{WorkerFactory, WorkerType};
|
||||
use sglang_router_rs::routers::pd_types::get_hostname;
|
||||
use sglang_router_rs::routers::pd_types::PDSelectionPolicy;
|
||||
@@ -178,6 +178,7 @@ mod test_pd_routing {
|
||||
request_id_headers: None,
|
||||
max_concurrent_requests: 64,
|
||||
cors_allowed_origins: vec![],
|
||||
retry: RetryConfig::default(),
|
||||
};
|
||||
|
||||
// Router creation will fail due to health checks, but config should be valid
|
||||
|
||||
Reference in New Issue
Block a user