[router] migrate router from actix to axum (#8479)

2025-07-30 17:47:19 -07:00
parent 299803343d
commit 66a398f49d
18 changed files with 3626 additions and 3549 deletions
--- a/sgl-router/src/routers/mod.rs
+++ b/sgl-router/src/routers/mod.rs
@@ -1,10 +1,17 @@
 //! Router implementations

-use actix_web::{HttpRequest, HttpResponse};
 use async_trait::async_trait;
+use axum::{
+    body::Body,
+    extract::Request,
+    http::{HeaderMap, StatusCode},
+    response::{IntoResponse, Response},
+};
 use reqwest::Client;
 use std::fmt::Debug;

+use crate::openai_api_types::{ChatCompletionRequest, CompletionRequest, GenerateRequest};
+
 pub mod factory;
 pub mod pd_router;
 pub mod pd_types;
@@ -33,54 +40,55 @@ pub trait WorkerManagement: Send + Sync {
 ///
 /// This trait provides a unified interface for routing requests,
 /// regardless of whether it's a regular router or PD router.
-#[async_trait(?Send)]
+#[async_trait]
 pub trait RouterTrait: Send + Sync + Debug + WorkerManagement {
    /// Get a reference to self as Any for downcasting
    fn as_any(&self) -> &dyn std::any::Any;
+
    /// Route a health check request
-    async fn health(&self, client: &Client, req: &HttpRequest) -> HttpResponse;
+    async fn health(&self, client: &Client, req: Request<Body>) -> Response;

    /// Route a health generate request
-    async fn health_generate(&self, client: &Client, req: &HttpRequest) -> HttpResponse;
+    async fn health_generate(&self, client: &Client, req: Request<Body>) -> Response;

    /// Get server information
-    async fn get_server_info(&self, client: &Client, req: &HttpRequest) -> HttpResponse;
+    async fn get_server_info(&self, client: &Client, req: Request<Body>) -> Response;

    /// Get available models
-    async fn get_models(&self, client: &Client, req: &HttpRequest) -> HttpResponse;
+    async fn get_models(&self, client: &Client, req: Request<Body>) -> Response;

    /// Get model information
-    async fn get_model_info(&self, client: &Client, req: &HttpRequest) -> HttpResponse;
+    async fn get_model_info(&self, client: &Client, req: Request<Body>) -> Response;

    /// Route a generate request
    async fn route_generate(
        &self,
        client: &Client,
-        req: &HttpRequest,
-        body: serde_json::Value,
-    ) -> HttpResponse;
+        headers: Option<&HeaderMap>,
+        body: &GenerateRequest,
+    ) -> Response;

    /// Route a chat completion request
    async fn route_chat(
        &self,
        client: &Client,
-        req: &HttpRequest,
-        body: serde_json::Value,
-    ) -> HttpResponse;
+        headers: Option<&HeaderMap>,
+        body: &ChatCompletionRequest,
+    ) -> Response;

    /// Route a completion request
    async fn route_completion(
        &self,
        client: &Client,
-        req: &HttpRequest,
-        body: serde_json::Value,
-    ) -> HttpResponse;
+        headers: Option<&HeaderMap>,
+        body: &CompletionRequest,
+    ) -> Response;

    /// Flush cache on all workers
-    async fn flush_cache(&self, client: &Client) -> HttpResponse;
+    async fn flush_cache(&self, client: &Client) -> Response;

    /// Get worker loads (for monitoring)
-    async fn get_worker_loads(&self, client: &Client) -> HttpResponse;
+    async fn get_worker_loads(&self, client: &Client) -> Response;

    /// Get router type name
    fn router_type(&self) -> &'static str;
@@ -91,11 +99,11 @@ pub trait RouterTrait: Send + Sync + Debug + WorkerManagement {
    }

    /// Server liveness check - is the server process running
-    fn liveness(&self) -> HttpResponse {
+    fn liveness(&self) -> Response {
        // Simple liveness check - if we can respond, we're alive
-        HttpResponse::Ok().body("OK")
+        (StatusCode::OK, "OK").into_response()
    }

    /// Server readiness check - is the server ready to handle requests
-    fn readiness(&self) -> HttpResponse;
+    fn readiness(&self) -> Response;
 }