[router] allow one router to support different model families and serving mode (#10244)
This commit is contained in:
@@ -27,7 +27,7 @@ use tracing::{info, warn};
|
||||
#[allow(dead_code)] // Fields will be used once implementation is complete
|
||||
pub struct GrpcRouter {
|
||||
/// Worker connections
|
||||
workers: Arc<RwLock<Vec<Box<dyn Worker>>>>,
|
||||
workers: Arc<RwLock<Vec<Arc<dyn Worker>>>>,
|
||||
/// gRPC clients for each worker
|
||||
grpc_clients: Arc<RwLock<HashMap<String, SglangSchedulerClient>>>,
|
||||
/// Load balancing policy
|
||||
@@ -103,7 +103,7 @@ impl GrpcRouter {
|
||||
}
|
||||
|
||||
// Create Worker trait objects with gRPC connection mode
|
||||
let mut workers: Vec<Box<dyn Worker>> = Vec::new();
|
||||
let mut workers: Vec<Arc<dyn Worker>> = Vec::new();
|
||||
|
||||
// Move clients from the HashMap to the workers
|
||||
for url in &worker_urls {
|
||||
@@ -123,7 +123,7 @@ impl GrpcRouter {
|
||||
})
|
||||
.with_grpc_client(client);
|
||||
|
||||
workers.push(Box::new(worker) as Box<dyn Worker>);
|
||||
workers.push(Arc::new(worker) as Arc<dyn Worker>);
|
||||
} else {
|
||||
warn!("No gRPC client for worker {}, skipping", url);
|
||||
}
|
||||
@@ -202,6 +202,7 @@ impl RouterTrait for GrpcRouter {
|
||||
&self,
|
||||
_headers: Option<&HeaderMap>,
|
||||
_body: &crate::protocols::spec::GenerateRequest,
|
||||
_model_id: Option<&str>,
|
||||
) -> Response {
|
||||
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||||
}
|
||||
@@ -210,6 +211,7 @@ impl RouterTrait for GrpcRouter {
|
||||
&self,
|
||||
_headers: Option<&HeaderMap>,
|
||||
_body: &crate::protocols::spec::ChatCompletionRequest,
|
||||
_model_id: Option<&str>,
|
||||
) -> Response {
|
||||
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||||
}
|
||||
@@ -218,6 +220,7 @@ impl RouterTrait for GrpcRouter {
|
||||
&self,
|
||||
_headers: Option<&HeaderMap>,
|
||||
_body: &crate::protocols::spec::CompletionRequest,
|
||||
_model_id: Option<&str>,
|
||||
) -> Response {
|
||||
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||||
}
|
||||
@@ -226,6 +229,7 @@ impl RouterTrait for GrpcRouter {
|
||||
&self,
|
||||
_headers: Option<&HeaderMap>,
|
||||
_body: &crate::protocols::spec::ResponsesRequest,
|
||||
_model_id: Option<&str>,
|
||||
) -> Response {
|
||||
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||||
}
|
||||
@@ -238,6 +242,7 @@ impl RouterTrait for GrpcRouter {
|
||||
&self,
|
||||
_headers: Option<&HeaderMap>,
|
||||
_body: &crate::protocols::spec::RerankRequest,
|
||||
_model_id: Option<&str>,
|
||||
) -> Response {
|
||||
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user