[router] add grpc client get and set (#10955)
This commit is contained in:
@@ -19,6 +19,8 @@ pub enum WorkerError {
|
||||
WorkerAtCapacity { url: String },
|
||||
/// Invalid URL format
|
||||
InvalidUrl { url: String },
|
||||
/// Connection failed
|
||||
ConnectionFailed { url: String, reason: String },
|
||||
}
|
||||
|
||||
impl fmt::Display for WorkerError {
|
||||
@@ -42,6 +44,9 @@ impl fmt::Display for WorkerError {
|
||||
WorkerError::InvalidUrl { url } => {
|
||||
write!(f, "Invalid URL format: {}", url)
|
||||
}
|
||||
WorkerError::ConnectionFailed { url, reason } => {
|
||||
write!(f, "Connection failed for worker {}: {}", url, reason)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -220,6 +220,16 @@ pub trait Worker: Send + Sync + fmt::Debug {
|
||||
.get("chat_template")
|
||||
.map(|s| s.as_str())
|
||||
}
|
||||
|
||||
/// Get or create a gRPC client for this worker
|
||||
/// Returns None for HTTP workers, Some(client) for gRPC workers
|
||||
async fn get_grpc_client(&self) -> WorkerResult<Option<Arc<Mutex<SglangSchedulerClient>>>>;
|
||||
|
||||
/// Reset the gRPC client connection (for reconnection scenarios)
|
||||
/// No-op for HTTP workers
|
||||
async fn reset_grpc_client(&self) -> WorkerResult<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Connection mode for worker communication
|
||||
@@ -411,29 +421,44 @@ impl Worker for BasicWorker {
|
||||
}
|
||||
}
|
||||
ConnectionMode::Grpc { .. } => {
|
||||
if let Some(grpc_client) = &self.grpc_client {
|
||||
let mut client = grpc_client.lock().await;
|
||||
match client.health_check().await {
|
||||
Ok(response) => {
|
||||
tracing::debug!(
|
||||
"gRPC health check succeeded for {}: healthy={}",
|
||||
self.metadata.url,
|
||||
// Use the new get_grpc_client() method
|
||||
match self.get_grpc_client().await {
|
||||
Ok(Some(grpc_client)) => {
|
||||
let mut client = grpc_client.lock().await;
|
||||
match client.health_check().await {
|
||||
Ok(response) => {
|
||||
tracing::debug!(
|
||||
"gRPC health check succeeded for {}: healthy={}",
|
||||
self.metadata.url,
|
||||
response.healthy
|
||||
);
|
||||
response.healthy
|
||||
);
|
||||
response.healthy
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
"gRPC health check RPC failed for {}: {:?}",
|
||||
self.metadata.url,
|
||||
e
|
||||
);
|
||||
false
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
"gRPC health check RPC failed for {}: {:?}",
|
||||
self.metadata.url,
|
||||
e
|
||||
);
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
tracing::error!("No gRPC client available for worker {}", self.metadata.url);
|
||||
false
|
||||
Ok(None) => {
|
||||
tracing::error!(
|
||||
"Worker {} is not a gRPC worker but has gRPC connection mode",
|
||||
self.metadata.url
|
||||
);
|
||||
false
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!(
|
||||
"Failed to get gRPC client for worker {}: {:?}",
|
||||
self.metadata.url,
|
||||
e
|
||||
);
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -502,6 +527,42 @@ impl Worker for BasicWorker {
|
||||
fn circuit_breaker(&self) -> &CircuitBreaker {
|
||||
&self.circuit_breaker
|
||||
}
|
||||
|
||||
async fn get_grpc_client(&self) -> WorkerResult<Option<Arc<Mutex<SglangSchedulerClient>>>> {
|
||||
match self.metadata.connection_mode {
|
||||
ConnectionMode::Http => Ok(None),
|
||||
ConnectionMode::Grpc { .. } => {
|
||||
// If we already have a client, return it
|
||||
if let Some(ref client) = self.grpc_client {
|
||||
return Ok(Some(client.clone()));
|
||||
}
|
||||
|
||||
// For lazy initialization, we would need to change grpc_client to be mutable
|
||||
// For now, return error if no client exists (will be initialized during worker creation)
|
||||
Err(WorkerError::ConnectionFailed {
|
||||
url: self.metadata.url.clone(),
|
||||
reason:
|
||||
"gRPC client not initialized. Client should be set during worker creation"
|
||||
.to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn reset_grpc_client(&self) -> WorkerResult<()> {
|
||||
match self.metadata.connection_mode {
|
||||
ConnectionMode::Http => Ok(()),
|
||||
ConnectionMode::Grpc { .. } => {
|
||||
// For now, we can't reset the client since it's not mutable
|
||||
// This would require changing the grpc_client field to use RwLock or OnceCell
|
||||
// which we'll do in a future iteration
|
||||
tracing::warn!(
|
||||
"gRPC client reset not yet implemented - requires mutable client storage"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A DP-aware worker that handles data-parallel routing
|
||||
@@ -630,6 +691,14 @@ impl Worker for DPAwareWorker {
|
||||
fn endpoint_url(&self, route: &str) -> String {
|
||||
format!("{}{}", self.base_url, route)
|
||||
}
|
||||
|
||||
async fn get_grpc_client(&self) -> WorkerResult<Option<Arc<Mutex<SglangSchedulerClient>>>> {
|
||||
self.base_worker.get_grpc_client().await
|
||||
}
|
||||
|
||||
async fn reset_grpc_client(&self) -> WorkerResult<()> {
|
||||
self.base_worker.reset_grpc_client().await
|
||||
}
|
||||
}
|
||||
|
||||
/// Worker factory for creating workers of different types
|
||||
|
||||
@@ -13,6 +13,7 @@ pub mod proto {
|
||||
// package sglang.grpc.scheduler; generates a nested module structure
|
||||
|
||||
/// gRPC client for SGLang scheduler
|
||||
#[derive(Clone)]
|
||||
pub struct SglangSchedulerClient {
|
||||
client: proto::sglang_scheduler_client::SglangSchedulerClient<Channel>,
|
||||
}
|
||||
|
||||
@@ -202,12 +202,23 @@ impl GrpcRouter {
|
||||
|
||||
debug!("Selected worker: {}", worker.url());
|
||||
|
||||
// Step 2: Get gRPC client for worker (fail fast if can't connect)
|
||||
// TODO(CahterineSue): manage grpc connection in worker. (it should be simpler here)
|
||||
let client = match self.get_or_create_grpc_client(worker.url()).await {
|
||||
Ok(c) => c,
|
||||
// Step 2: Get gRPC client from worker
|
||||
let client = match worker.get_grpc_client().await {
|
||||
Ok(Some(client_arc)) => {
|
||||
// Clone the client from inside the Arc<Mutex<>>
|
||||
let client = client_arc.lock().await.clone();
|
||||
client
|
||||
}
|
||||
Ok(None) => {
|
||||
error!("Selected worker is not a gRPC worker");
|
||||
return (
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
"Selected worker is not configured for gRPC",
|
||||
)
|
||||
.into_response();
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to get gRPC client: {}", e);
|
||||
error!("Failed to get gRPC client from worker: {}", e);
|
||||
return (
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("Failed to get gRPC client: {}", e),
|
||||
@@ -552,18 +563,6 @@ impl GrpcRouter {
|
||||
None
|
||||
}
|
||||
|
||||
/// Get or create a gRPC client for the worker
|
||||
async fn get_or_create_grpc_client(
|
||||
&self,
|
||||
worker_url: &str,
|
||||
) -> Result<SglangSchedulerClient, String> {
|
||||
// TODO: move to worker
|
||||
debug!("Creating new gRPC client for worker: {}", worker_url);
|
||||
SglangSchedulerClient::connect(worker_url)
|
||||
.await
|
||||
.map_err(|e| format!("Failed to connect to gRPC server: {}", e))
|
||||
}
|
||||
|
||||
/// Placeholder for streaming handler (to be implemented in Phase 2)
|
||||
async fn handle_streaming_chat(
|
||||
&self,
|
||||
|
||||
Reference in New Issue
Block a user