[router] allow one router to support different model families and serving mode (#10244)
This commit is contained in:
@@ -27,9 +27,9 @@ use tracing::{info, warn};
|
||||
#[allow(dead_code)] // Fields will be used once implementation is complete
|
||||
pub struct GrpcPDRouter {
|
||||
/// Prefill worker connections
|
||||
prefill_workers: Arc<RwLock<Vec<Box<dyn Worker>>>>,
|
||||
prefill_workers: Arc<RwLock<Vec<Arc<dyn Worker>>>>,
|
||||
/// Decode worker connections
|
||||
decode_workers: Arc<RwLock<Vec<Box<dyn Worker>>>>,
|
||||
decode_workers: Arc<RwLock<Vec<Arc<dyn Worker>>>>,
|
||||
/// gRPC clients for prefill workers
|
||||
prefill_grpc_clients: Arc<RwLock<HashMap<String, SglangSchedulerClient>>>,
|
||||
/// gRPC clients for decode workers
|
||||
@@ -127,7 +127,7 @@ impl GrpcPDRouter {
|
||||
}
|
||||
|
||||
// Create Prefill Worker trait objects with gRPC connection mode
|
||||
let prefill_workers: Vec<Box<dyn Worker>> = prefill_urls
|
||||
let prefill_workers: Vec<Arc<dyn Worker>> = prefill_urls
|
||||
.iter()
|
||||
.map(|(url, bootstrap_port)| {
|
||||
let worker = BasicWorker::with_connection_mode(
|
||||
@@ -147,12 +147,12 @@ impl GrpcPDRouter {
|
||||
failure_threshold: ctx.router_config.health_check.failure_threshold,
|
||||
success_threshold: ctx.router_config.health_check.success_threshold,
|
||||
});
|
||||
Box::new(worker) as Box<dyn Worker>
|
||||
Arc::new(worker) as Arc<dyn Worker>
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Create Decode Worker trait objects with gRPC connection mode
|
||||
let decode_workers: Vec<Box<dyn Worker>> = decode_urls
|
||||
let decode_workers: Vec<Arc<dyn Worker>> = decode_urls
|
||||
.iter()
|
||||
.map(|url| {
|
||||
let worker = BasicWorker::with_connection_mode(
|
||||
@@ -168,7 +168,7 @@ impl GrpcPDRouter {
|
||||
failure_threshold: ctx.router_config.health_check.failure_threshold,
|
||||
success_threshold: ctx.router_config.health_check.success_threshold,
|
||||
});
|
||||
Box::new(worker) as Box<dyn Worker>
|
||||
Arc::new(worker) as Arc<dyn Worker>
|
||||
})
|
||||
.collect();
|
||||
|
||||
@@ -269,6 +269,7 @@ impl RouterTrait for GrpcPDRouter {
|
||||
&self,
|
||||
_headers: Option<&HeaderMap>,
|
||||
_body: &crate::protocols::spec::GenerateRequest,
|
||||
_model_id: Option<&str>,
|
||||
) -> Response {
|
||||
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||||
}
|
||||
@@ -277,6 +278,7 @@ impl RouterTrait for GrpcPDRouter {
|
||||
&self,
|
||||
_headers: Option<&HeaderMap>,
|
||||
_body: &crate::protocols::spec::ChatCompletionRequest,
|
||||
_model_id: Option<&str>,
|
||||
) -> Response {
|
||||
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||||
}
|
||||
@@ -285,6 +287,7 @@ impl RouterTrait for GrpcPDRouter {
|
||||
&self,
|
||||
_headers: Option<&HeaderMap>,
|
||||
_body: &crate::protocols::spec::CompletionRequest,
|
||||
_model_id: Option<&str>,
|
||||
) -> Response {
|
||||
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||||
}
|
||||
@@ -293,6 +296,7 @@ impl RouterTrait for GrpcPDRouter {
|
||||
&self,
|
||||
_headers: Option<&HeaderMap>,
|
||||
_body: &crate::protocols::spec::ResponsesRequest,
|
||||
_model_id: Option<&str>,
|
||||
) -> Response {
|
||||
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||||
}
|
||||
@@ -305,6 +309,7 @@ impl RouterTrait for GrpcPDRouter {
|
||||
&self,
|
||||
_headers: Option<&HeaderMap>,
|
||||
_body: &crate::protocols::spec::RerankRequest,
|
||||
_model_id: Option<&str>,
|
||||
) -> Response {
|
||||
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||||
}
|
||||
|
||||
@@ -27,7 +27,7 @@ use tracing::{info, warn};
|
||||
#[allow(dead_code)] // Fields will be used once implementation is complete
|
||||
pub struct GrpcRouter {
|
||||
/// Worker connections
|
||||
workers: Arc<RwLock<Vec<Box<dyn Worker>>>>,
|
||||
workers: Arc<RwLock<Vec<Arc<dyn Worker>>>>,
|
||||
/// gRPC clients for each worker
|
||||
grpc_clients: Arc<RwLock<HashMap<String, SglangSchedulerClient>>>,
|
||||
/// Load balancing policy
|
||||
@@ -103,7 +103,7 @@ impl GrpcRouter {
|
||||
}
|
||||
|
||||
// Create Worker trait objects with gRPC connection mode
|
||||
let mut workers: Vec<Box<dyn Worker>> = Vec::new();
|
||||
let mut workers: Vec<Arc<dyn Worker>> = Vec::new();
|
||||
|
||||
// Move clients from the HashMap to the workers
|
||||
for url in &worker_urls {
|
||||
@@ -123,7 +123,7 @@ impl GrpcRouter {
|
||||
})
|
||||
.with_grpc_client(client);
|
||||
|
||||
workers.push(Box::new(worker) as Box<dyn Worker>);
|
||||
workers.push(Arc::new(worker) as Arc<dyn Worker>);
|
||||
} else {
|
||||
warn!("No gRPC client for worker {}, skipping", url);
|
||||
}
|
||||
@@ -202,6 +202,7 @@ impl RouterTrait for GrpcRouter {
|
||||
&self,
|
||||
_headers: Option<&HeaderMap>,
|
||||
_body: &crate::protocols::spec::GenerateRequest,
|
||||
_model_id: Option<&str>,
|
||||
) -> Response {
|
||||
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||||
}
|
||||
@@ -210,6 +211,7 @@ impl RouterTrait for GrpcRouter {
|
||||
&self,
|
||||
_headers: Option<&HeaderMap>,
|
||||
_body: &crate::protocols::spec::ChatCompletionRequest,
|
||||
_model_id: Option<&str>,
|
||||
) -> Response {
|
||||
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||||
}
|
||||
@@ -218,6 +220,7 @@ impl RouterTrait for GrpcRouter {
|
||||
&self,
|
||||
_headers: Option<&HeaderMap>,
|
||||
_body: &crate::protocols::spec::CompletionRequest,
|
||||
_model_id: Option<&str>,
|
||||
) -> Response {
|
||||
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||||
}
|
||||
@@ -226,6 +229,7 @@ impl RouterTrait for GrpcRouter {
|
||||
&self,
|
||||
_headers: Option<&HeaderMap>,
|
||||
_body: &crate::protocols::spec::ResponsesRequest,
|
||||
_model_id: Option<&str>,
|
||||
) -> Response {
|
||||
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||||
}
|
||||
@@ -238,6 +242,7 @@ impl RouterTrait for GrpcRouter {
|
||||
&self,
|
||||
_headers: Option<&HeaderMap>,
|
||||
_body: &crate::protocols::spec::RerankRequest,
|
||||
_model_id: Option<&str>,
|
||||
) -> Response {
|
||||
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user