[router][grpc] Support parallel queue puts in grpc_request_manager and remove mutex for grpc_client (#11798)
This commit is contained in:
@@ -10,10 +10,7 @@ use std::{
|
||||
use async_trait::async_trait;
|
||||
use futures;
|
||||
use serde_json;
|
||||
use tokio::{
|
||||
sync::{Mutex, RwLock},
|
||||
time,
|
||||
};
|
||||
use tokio::{sync::RwLock, time};
|
||||
|
||||
use super::{CircuitBreaker, WorkerError, WorkerResult};
|
||||
use crate::{
|
||||
@@ -232,7 +229,7 @@ pub trait Worker: Send + Sync + fmt::Debug {
|
||||
|
||||
/// Get or create a gRPC client for this worker
|
||||
/// Returns None for HTTP workers, Some(client) for gRPC workers
|
||||
async fn get_grpc_client(&self) -> WorkerResult<Option<Arc<Mutex<SglangSchedulerClient>>>>;
|
||||
async fn get_grpc_client(&self) -> WorkerResult<Option<Arc<SglangSchedulerClient>>>;
|
||||
|
||||
/// Reset the gRPC client connection (for reconnection scenarios)
|
||||
/// No-op for HTTP workers
|
||||
@@ -367,7 +364,7 @@ pub struct BasicWorker {
|
||||
pub consecutive_successes: Arc<AtomicUsize>,
|
||||
pub circuit_breaker: CircuitBreaker,
|
||||
/// Lazily initialized gRPC client for gRPC workers
|
||||
pub grpc_client: Arc<RwLock<Option<Arc<Mutex<SglangSchedulerClient>>>>>,
|
||||
pub grpc_client: Arc<RwLock<Option<Arc<SglangSchedulerClient>>>>,
|
||||
}
|
||||
|
||||
impl fmt::Debug for BasicWorker {
|
||||
@@ -505,7 +502,7 @@ impl Worker for BasicWorker {
|
||||
&self.circuit_breaker
|
||||
}
|
||||
|
||||
async fn get_grpc_client(&self) -> WorkerResult<Option<Arc<Mutex<SglangSchedulerClient>>>> {
|
||||
async fn get_grpc_client(&self) -> WorkerResult<Option<Arc<SglangSchedulerClient>>> {
|
||||
match self.metadata.connection_mode {
|
||||
ConnectionMode::Http => Ok(None),
|
||||
ConnectionMode::Grpc { .. } => {
|
||||
@@ -528,7 +525,7 @@ impl Worker for BasicWorker {
|
||||
);
|
||||
match SglangSchedulerClient::connect(&self.metadata.url).await {
|
||||
Ok(client) => {
|
||||
let client_arc = Arc::new(Mutex::new(client));
|
||||
let client_arc = Arc::new(client);
|
||||
*client_guard = Some(client_arc.clone());
|
||||
tracing::info!(
|
||||
"Successfully connected gRPC client for worker: {}",
|
||||
@@ -577,8 +574,7 @@ impl Worker for BasicWorker {
|
||||
return Ok(false);
|
||||
};
|
||||
|
||||
let client = grpc_client.lock().await;
|
||||
match time::timeout(timeout, client.health_check()).await {
|
||||
match time::timeout(timeout, grpc_client.health_check()).await {
|
||||
Ok(Ok(resp)) => {
|
||||
tracing::debug!(
|
||||
"gRPC health OK for {}: healthy={}",
|
||||
@@ -749,7 +745,7 @@ impl Worker for DPAwareWorker {
|
||||
format!("{}{}", self.base_url, route)
|
||||
}
|
||||
|
||||
async fn get_grpc_client(&self) -> WorkerResult<Option<Arc<Mutex<SglangSchedulerClient>>>> {
|
||||
async fn get_grpc_client(&self) -> WorkerResult<Option<Arc<SglangSchedulerClient>>> {
|
||||
self.base_worker.get_grpc_client().await
|
||||
}
|
||||
|
||||
|
||||
@@ -104,7 +104,7 @@ impl BasicWorkerBuilder {
|
||||
Arc,
|
||||
};
|
||||
|
||||
use tokio::sync::{Mutex, RwLock};
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
let bootstrap_host = match url::Url::parse(&self.url) {
|
||||
Ok(parsed) => parsed.host_str().unwrap_or("localhost").to_string(),
|
||||
@@ -145,9 +145,7 @@ impl BasicWorkerBuilder {
|
||||
bootstrap_port,
|
||||
};
|
||||
|
||||
let grpc_client = Arc::new(RwLock::new(
|
||||
self.grpc_client.map(|client| Arc::new(Mutex::new(client))),
|
||||
));
|
||||
let grpc_client = Arc::new(RwLock::new(self.grpc_client.map(Arc::new)));
|
||||
|
||||
BasicWorker {
|
||||
metadata,
|
||||
|
||||
@@ -42,8 +42,7 @@ pub async fn get_grpc_client_from_worker(
|
||||
.map_err(|e| internal_error_message(format!("Failed to get gRPC client: {}", e)))?
|
||||
.ok_or_else(|| internal_error_static("Selected worker is not configured for gRPC"))?;
|
||||
|
||||
let client = client_arc.lock().await.clone();
|
||||
Ok(client)
|
||||
Ok((*client_arc).clone())
|
||||
}
|
||||
|
||||
/// Process tool call arguments in messages
|
||||
|
||||
Reference in New Issue
Block a user