111 lines
3.0 KiB
Rust
111 lines
3.0 KiB
Rust
|
|
// PD (Prefill-Decode) gRPC Router Implementation
|
||
|
|
// TODO: Implement gRPC-based PD router for disaggregated prefill-decode systems
|
||
|
|
|
||
|
|
use crate::routers::{RouterTrait, WorkerManagement};
|
||
|
|
use async_trait::async_trait;
|
||
|
|
use axum::{
|
||
|
|
body::Body,
|
||
|
|
extract::Request,
|
||
|
|
http::{HeaderMap, StatusCode},
|
||
|
|
response::{IntoResponse, Response},
|
||
|
|
};
|
||
|
|
|
||
|
|
/// Placeholder for gRPC PD router
|
||
|
|
#[derive(Debug)]
|
||
|
|
pub struct GrpcPDRouter;
|
||
|
|
|
||
|
|
impl GrpcPDRouter {
|
||
|
|
pub async fn new() -> Result<Self, String> {
|
||
|
|
// TODO: Implement gRPC PD router initialization
|
||
|
|
Err("gRPC PD router not yet implemented".to_string())
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[async_trait]
|
||
|
|
impl RouterTrait for GrpcPDRouter {
|
||
|
|
fn as_any(&self) -> &dyn std::any::Any {
|
||
|
|
self
|
||
|
|
}
|
||
|
|
|
||
|
|
async fn health(&self, _req: Request<Body>) -> Response {
|
||
|
|
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||
|
|
}
|
||
|
|
|
||
|
|
async fn health_generate(&self, _req: Request<Body>) -> Response {
|
||
|
|
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||
|
|
}
|
||
|
|
|
||
|
|
async fn get_server_info(&self, _req: Request<Body>) -> Response {
|
||
|
|
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||
|
|
}
|
||
|
|
|
||
|
|
async fn get_models(&self, _req: Request<Body>) -> Response {
|
||
|
|
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||
|
|
}
|
||
|
|
|
||
|
|
async fn get_model_info(&self, _req: Request<Body>) -> Response {
|
||
|
|
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||
|
|
}
|
||
|
|
|
||
|
|
async fn route_generate(
|
||
|
|
&self,
|
||
|
|
_headers: Option<&HeaderMap>,
|
||
|
|
_body: &crate::protocols::spec::GenerateRequest,
|
||
|
|
) -> Response {
|
||
|
|
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||
|
|
}
|
||
|
|
|
||
|
|
async fn route_chat(
|
||
|
|
&self,
|
||
|
|
_headers: Option<&HeaderMap>,
|
||
|
|
_body: &crate::protocols::spec::ChatCompletionRequest,
|
||
|
|
) -> Response {
|
||
|
|
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||
|
|
}
|
||
|
|
|
||
|
|
async fn route_completion(
|
||
|
|
&self,
|
||
|
|
_headers: Option<&HeaderMap>,
|
||
|
|
_body: &crate::protocols::spec::CompletionRequest,
|
||
|
|
) -> Response {
|
||
|
|
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||
|
|
}
|
||
|
|
|
||
|
|
async fn route_embeddings(&self, _headers: Option<&HeaderMap>, _body: Body) -> Response {
|
||
|
|
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||
|
|
}
|
||
|
|
|
||
|
|
async fn route_rerank(&self, _headers: Option<&HeaderMap>, _body: Body) -> Response {
|
||
|
|
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||
|
|
}
|
||
|
|
|
||
|
|
async fn flush_cache(&self) -> Response {
|
||
|
|
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||
|
|
}
|
||
|
|
|
||
|
|
async fn get_worker_loads(&self) -> Response {
|
||
|
|
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||
|
|
}
|
||
|
|
|
||
|
|
fn router_type(&self) -> &'static str {
|
||
|
|
"grpc_pd"
|
||
|
|
}
|
||
|
|
|
||
|
|
fn readiness(&self) -> Response {
|
||
|
|
(StatusCode::SERVICE_UNAVAILABLE).into_response()
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[async_trait]
|
||
|
|
impl WorkerManagement for GrpcPDRouter {
|
||
|
|
async fn add_worker(&self, _worker_url: &str) -> Result<String, String> {
|
||
|
|
Err("Not implemented".to_string())
|
||
|
|
}
|
||
|
|
|
||
|
|
fn remove_worker(&self, _worker_url: &str) {}
|
||
|
|
|
||
|
|
fn get_worker_urls(&self) -> Vec<String> {
|
||
|
|
vec![]
|
||
|
|
}
|
||
|
|
}
|