From ee704e62654c225bc63a4361ca1756fdeea0c264 Mon Sep 17 00:00:00 2001 From: Chang Su Date: Tue, 23 Sep 2025 16:07:34 -0700 Subject: [PATCH] [router] add auth middleware for api key auth (#10826) --- sgl-router/Cargo.toml | 1 + sgl-router/README.md | 76 +++++++++++++++++++ sgl-router/py_test/e2e/test_regular_router.py | 20 +++-- sgl-router/src/middleware.rs | 48 +++++++++++- sgl-router/src/server.rs | 44 ++++++++++- sgl-router/tests/common/test_app.rs | 13 ++++ 6 files changed, 186 insertions(+), 16 deletions(-) diff --git a/sgl-router/Cargo.toml b/sgl-router/Cargo.toml index 0ce57ba27..9d24cbabc 100644 --- a/sgl-router/Cargo.toml +++ b/sgl-router/Cargo.toml @@ -69,6 +69,7 @@ rmcp = { version = "0.6.3", features = ["client", "server", "reqwest", "auth"] } serde_yaml = "0.9" +subtle = "2.6" # gRPC and Protobuf dependencies tonic = { version = "0.12", features = ["tls", "gzip", "transport"] } diff --git a/sgl-router/README.md b/sgl-router/README.md index af73536b3..72f94f034 100644 --- a/sgl-router/README.md +++ b/sgl-router/README.md @@ -331,6 +331,79 @@ python -m sglang_router.launch_router \ --prometheus-port 9090 ``` +### API Key Authentication + +The router supports multi-level API key authentication for both the router itself and individual workers: + +#### Router API Key +Protect access to the router endpoints: + +```bash +python -m sglang_router.launch_router \ + --api-key "your-router-api-key" \ + --worker-urls http://worker1:8000 http://worker2:8000 +``` + +When router API key is set, clients must include the Bearer token: +```bash +curl -H "Authorization: Bearer your-router-api-key" http://localhost:8080/v1/chat/completions +``` + +#### Worker API Keys +Workers can have their own API keys for authentication: + +```bash +# Workers specified in --worker-urls automatically inherit the router's API key +python -m sglang_router.launch_router \ + --api-key "shared-api-key" \ + --worker-urls http://worker1:8000 http://worker2:8000 +# Both workers will use "shared-api-key" for authentication + +# Adding workers dynamically WITHOUT inheriting router's key +curl -X POST http://localhost:8080/add_worker?url=http://worker3:8000 +# WARNING: This worker has NO API key even though router has one! + +# Adding workers with specific API keys dynamically +curl -X POST http://localhost:8080/add_worker?url=http://worker3:8000&api_key=worker3-specific-key +``` + +#### Security Configurations + +1. **No Authentication** (default): + - Router and workers accessible without keys + - Suitable for trusted environments + +2. **Router-only Authentication**: + - Clients need key to access router + - Router can access workers freely + +3. **Worker-only Authentication**: + - Router accessible without key + - Each worker requires authentication + ```bash + # Add workers with their API keys + curl -X POST http://localhost:8080/add_worker?url=http://worker:8000&api_key=worker-key + ``` + +4. **Full Authentication**: + - Router requires key from clients + - Each worker requires its own key + ```bash + # Start router with its key + python -m sglang_router.launch_router --api-key "router-key" + + # Add workers with their keys + curl -H "Authorization: Bearer router-key" \ + -X POST http://localhost:8080/add_worker?url=http://worker:8000&api_key=worker-key + ``` + +#### Important Notes + +- **Initial Workers**: Workers specified in `--worker-urls` automatically inherit the router's API key +- **Dynamic Workers**: When adding workers via API, you must explicitly specify their API keys - they do NOT inherit the router's key +- **Security Warning**: When adding workers without API keys while the router has one configured, a warning will be logged +- **Common Pitfall**: If router and workers use the same API key, you must still specify the key when adding workers dynamically + ### Command Line Arguments Reference #### Service Discovery @@ -349,6 +422,9 @@ python -m sglang_router.launch_router \ - `--prefill-policy`: Separate routing policy for prefill nodes (optional, overrides `--policy` for prefill) - `--decode-policy`: Separate routing policy for decode nodes (optional, overrides `--policy` for decode) +#### Authentication +- `--api-key`: API key for router authentication (clients must provide this as Bearer token) + ## Development ### Build Process diff --git a/sgl-router/py_test/e2e/test_regular_router.py b/sgl-router/py_test/e2e/test_regular_router.py index 1f2879684..d42f8e146 100644 --- a/sgl-router/py_test/e2e/test_regular_router.py +++ b/sgl-router/py_test/e2e/test_regular_router.py @@ -131,31 +131,29 @@ def test_dp_aware_worker_expansion_and_api_key( r = requests.post( f"{router_url}/add_worker", params={"url": worker_url, "api_key": api_key}, + headers={"Authorization": f"Bearer {api_key}"}, timeout=180, ) r.raise_for_status() - r = requests.get(f"{router_url}/list_workers", timeout=30) + r = requests.get( + f"{router_url}/list_workers", + headers={"Authorization": f"Bearer {api_key}"}, + timeout=30, + ) r.raise_for_status() urls = r.json().get("urls", []) assert len(urls) == 2 assert set(urls) == {f"{worker_url}@0", f"{worker_url}@1"} - # TODO: Router currently doesn't enforce API key authentication on incoming requests. - # It only adds the API key to outgoing requests to workers. - # Need to implement auth middleware to properly protect router endpoints. - # For now, both requests succeed (200) regardless of client authentication. - - # Verify API key enforcement path-through - # 1) Without Authorization -> Currently 200 (should be 401 after auth middleware added) + # Verify API key enforcement + # 1) Without Authorization -> Should get 401 Unauthorized r = requests.post( f"{router_url}/v1/completions", json={"model": e2e_model, "prompt": "hi", "max_tokens": 1}, timeout=60, ) - assert ( - r.status_code == 200 - ) # TODO: Change to 401 after auth middleware implementation + assert r.status_code == 401 # 2) With correct Authorization -> 200 r = requests.post( diff --git a/sgl-router/src/middleware.rs b/sgl-router/src/middleware.rs index 567b1f695..90929e722 100644 --- a/sgl-router/src/middleware.rs +++ b/sgl-router/src/middleware.rs @@ -1,12 +1,13 @@ use axum::{ - extract::Request, extract::State, http::HeaderValue, http::StatusCode, middleware::Next, - response::IntoResponse, response::Response, + body::Body, extract::Request, extract::State, http::header, http::HeaderValue, + http::StatusCode, middleware::Next, response::IntoResponse, response::Response, }; use rand::Rng; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; use std::time::Duration; use std::time::Instant; +use subtle::ConstantTimeEq; use tokio::sync::{mpsc, oneshot}; use tower::{Layer, Service}; use tower_http::trace::{MakeSpan, OnRequest, OnResponse, TraceLayer}; @@ -17,6 +18,49 @@ pub use crate::core::token_bucket::TokenBucket; use crate::metrics::RouterMetrics; use crate::server::AppState; +#[derive(Clone)] +pub struct AuthConfig { + pub api_key: Option, +} + +/// Middleware to validate Bearer token against configured API key +/// Only active when router has an API key configured +pub async fn auth_middleware( + State(auth_config): State, + request: Request, + next: Next, +) -> Result { + if let Some(expected_key) = &auth_config.api_key { + // Extract Authorization header + let auth_header = request + .headers() + .get(header::AUTHORIZATION) + .and_then(|h| h.to_str().ok()); + + match auth_header { + Some(header_value) if header_value.starts_with("Bearer ") => { + let token = &header_value[7..]; // Skip "Bearer " + // Use constant-time comparison to prevent timing attacks + let token_bytes = token.as_bytes(); + let expected_bytes = expected_key.as_bytes(); + + // Check if lengths match first (this is not constant-time but necessary) + if token_bytes.len() != expected_bytes.len() { + return Err(StatusCode::UNAUTHORIZED); + } + + // Constant-time comparison of the actual values + if token_bytes.ct_eq(expected_bytes).unwrap_u8() != 1 { + return Err(StatusCode::UNAUTHORIZED); + } + } + _ => return Err(StatusCode::UNAUTHORIZED), + } + } + + Ok(next.run(request).await) +} + /// Generate OpenAI-compatible request ID based on endpoint fn generate_request_id(path: &str) -> String { let prefix = if path.contains("/chat/completions") { diff --git a/sgl-router/src/server.rs b/sgl-router/src/server.rs index 8530666cb..ad9545c96 100644 --- a/sgl-router/src/server.rs +++ b/sgl-router/src/server.rs @@ -4,7 +4,7 @@ use crate::{ data_connector::{MemoryResponseStorage, NoOpResponseStorage, SharedResponseStorage}, logging::{self, LoggingConfig}, metrics::{self, PrometheusConfig}, - middleware::{self, QueuedRequest, TokenBucket}, + middleware::{self, AuthConfig, QueuedRequest, TokenBucket}, policies::PolicyRegistry, protocols::{ spec::{ @@ -275,6 +275,16 @@ async fn add_worker( State(state): State>, Query(AddWorkerQuery { url, api_key }): Query, ) -> Response { + // Warn if router has API key but worker is being added without one + if state.context.router_config.api_key.is_some() && api_key.is_none() { + warn!( + "Adding worker {} without API key while router has API key configured. \ + Worker will be accessible without authentication. \ + If the worker requires the same API key as the router, please specify it explicitly.", + url + ); + } + let result = WorkerManager::add_worker(&url, &api_key, &state.context).await; match result { @@ -312,6 +322,16 @@ async fn create_worker( State(state): State>, Json(config): Json, ) -> Response { + // Warn if router has API key but worker is being added without one + if state.context.router_config.api_key.is_some() && config.api_key.is_none() { + warn!( + "Adding worker {} without API key while router has API key configured. \ + Worker will be accessible without authentication. \ + If the worker requires the same API key as the router, please specify it explicitly.", + config.url + ); + } + let result = WorkerManager::add_worker_from_config(&config, &state.context).await; match result { @@ -423,6 +443,7 @@ pub struct ServerConfig { pub fn build_app( app_state: Arc, + auth_config: AuthConfig, max_payload_size: usize, request_id_headers: Vec, cors_allowed_origins: Vec, @@ -448,6 +469,10 @@ pub fn build_app( .route_layer(axum::middleware::from_fn_with_state( app_state.clone(), middleware::concurrency_limit_middleware, + )) + .route_layer(axum::middleware::from_fn_with_state( + auth_config.clone(), + middleware::auth_middleware, )); let public_routes = Router::new() @@ -464,13 +489,21 @@ pub fn build_app( .route("/remove_worker", post(remove_worker)) .route("/list_workers", get(list_workers)) .route("/flush_cache", post(flush_cache)) - .route("/get_loads", get(get_loads)); + .route("/get_loads", get(get_loads)) + .route_layer(axum::middleware::from_fn_with_state( + auth_config.clone(), + middleware::auth_middleware, + )); let worker_routes = Router::new() .route("/workers", post(create_worker)) .route("/workers", get(list_workers_rest)) .route("/workers/{url}", get(get_worker)) - .route("/workers/{url}", delete(delete_worker)); + .route("/workers/{url}", delete(delete_worker)) + .route_layer(axum::middleware::from_fn_with_state( + auth_config.clone(), + middleware::auth_middleware, + )); Router::new() .merge(protected_routes) @@ -629,8 +662,13 @@ pub async fn startup(config: ServerConfig) -> Result<(), Box