diff --git a/sgl-router/src/core/worker.rs b/sgl-router/src/core/worker.rs index 62ee37a5d..5df2a0229 100644 --- a/sgl-router/src/core/worker.rs +++ b/sgl-router/src/core/worker.rs @@ -554,7 +554,7 @@ impl Worker for BasicWorker { return Ok(false); }; - let mut client = grpc_client.lock().await; + let client = grpc_client.lock().await; match time::timeout(timeout, client.health_check()).await { Ok(Ok(resp)) => { tracing::debug!( diff --git a/sgl-router/src/grpc_client/sglang_scheduler.rs b/sgl-router/src/grpc_client/sglang_scheduler.rs index a9d9a11f1..799db14a4 100644 --- a/sgl-router/src/grpc_client/sglang_scheduler.rs +++ b/sgl-router/src/grpc_client/sglang_scheduler.rs @@ -1,6 +1,6 @@ use std::convert::TryFrom; use std::time::Duration; -use tonic::{transport::Channel, Request}; +use tonic::{transport::Channel, Request, Streaming}; use tracing::debug; use crate::protocols::spec::{ @@ -54,18 +54,18 @@ impl SglangSchedulerClient { /// Submit a generation request (returns streaming response) pub async fn generate( - &mut self, + &self, req: proto::GenerateRequest, - ) -> Result, Box> - { + ) -> Result, Box> { + let mut client = self.client.clone(); let request = Request::new(req); - let response = self.client.generate(request).await?; + let response = client.generate(request).await?; Ok(response.into_inner()) } /// Perform health check pub async fn health_check( - &mut self, + &self, ) -> Result> { debug!("Sending health check request"); let request = Request::new(proto::HealthCheckRequest { @@ -75,43 +75,47 @@ impl SglangSchedulerClient { }), }); - let response = self.client.health_check(request).await?; + let mut client = self.client.clone(); + let response = client.health_check(request).await?; debug!("Health check response received"); Ok(response.into_inner()) } /// Abort a request pub async fn abort_request( - &mut self, + &self, request_id: String, reason: String, ) -> Result<(), Box> { let request = Request::new(proto::AbortRequest { request_id, reason }); - self.client.abort(request).await?; + let mut client = self.client.clone(); + client.abort(request).await?; Ok(()) } /// Get model information pub async fn get_model_info( - &mut self, + &self, ) -> Result> { debug!("Requesting model info"); let request = Request::new(proto::GetModelInfoRequest {}); - let response = self.client.get_model_info(request).await?; + let mut client = self.client.clone(); + let response = client.get_model_info(request).await?; debug!("Model info response received"); Ok(response.into_inner()) } /// Get server information pub async fn get_server_info( - &mut self, + &self, ) -> Result> { debug!("Requesting server info"); let request = Request::new(proto::GetServerInfoRequest {}); - let response = self.client.get_server_info(request).await?; + let mut client = self.client.clone(); + let response = client.get_server_info(request).await?; debug!("Server info response received"); Ok(response.into_inner()) }