From f4affd4df53c3eecbb19c2a80cce0b627285582e Mon Sep 17 00:00:00 2001 From: Simo Lin Date: Tue, 7 Oct 2025 13:39:33 -0400 Subject: [PATCH] [router] fix grpc connection conversion and add optimization (#11305) --- .../src/grpc_client/sglang_scheduler.rs | 29 ++++++++----------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/sgl-router/src/grpc_client/sglang_scheduler.rs b/sgl-router/src/grpc_client/sglang_scheduler.rs index 6b5a5e8bb..ab94c4c07 100644 --- a/sgl-router/src/grpc_client/sglang_scheduler.rs +++ b/sgl-router/src/grpc_client/sglang_scheduler.rs @@ -1,7 +1,7 @@ use std::convert::TryFrom; use std::time::Duration; use tonic::{transport::Channel, Request}; -use tracing::{debug, warn}; +use tracing::debug; use crate::protocols::spec::{ ChatCompletionRequest, GenerateRequest, ResponseFormat, @@ -27,28 +27,23 @@ impl SglangSchedulerClient { pub async fn connect(endpoint: &str) -> Result> { debug!("Connecting to SGLang scheduler at {}", endpoint); - // Convert grpc:// to http:// for tonic, preserving IPv6 bracket notation - let http_endpoint = if endpoint.starts_with("grpc://") { - // Use proper URL parsing to preserve IPv6 brackets - match url::Url::parse(endpoint) { - Ok(mut parsed) => { - let _ = parsed.set_scheme("http"); - parsed.to_string() - } - Err(_) => { - warn!( - "Failed to parse gRPC endpoint '{}', using simple string replacement", - endpoint - ); - endpoint.replace("grpc://", "http://") - } - } + // Convert grpc:// to http:// for tonic + let http_endpoint = if let Some(addr) = endpoint.strip_prefix("grpc://") { + format!("http://{}", addr) } else { endpoint.to_string() }; let channel = Channel::from_shared(http_endpoint)? .timeout(Duration::from_secs(30)) + .http2_keep_alive_interval(Duration::from_secs(30)) + .keep_alive_timeout(Duration::from_secs(10)) + .keep_alive_while_idle(true) + .tcp_keepalive(Some(Duration::from_secs(60))) + .tcp_nodelay(true) + .http2_adaptive_window(true) + .initial_stream_window_size(Some(16 * 1024 * 1024)) // 16MB + .initial_connection_window_size(Some(32 * 1024 * 1024)) // 32MB .connect() .await?;