[router][grpc] Add helpfer functions for decoder in router.rs and fix specs (#10971)

This commit is contained in:
Chang Su
2025-09-26 17:10:45 -07:00
committed by GitHub
parent 2bdaf482f9
commit 0c3db88978
6 changed files with 309 additions and 108 deletions

View File

@@ -20,7 +20,7 @@ pub struct SglangSchedulerClient {
impl SglangSchedulerClient {
/// Create a new client and connect to the scheduler
pub async fn connect(endpoint: &str) -> Result<Self, Box<dyn std::error::Error>> {
pub async fn connect(endpoint: &str) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
debug!("Connecting to SGLang scheduler at {}", endpoint);
// Convert grpc:// to http:// for tonic
@@ -41,10 +41,11 @@ impl SglangSchedulerClient {
}
/// Submit a generation request (returns streaming response)
pub async fn generate_stream(
pub async fn generate(
&mut self,
req: proto::GenerateRequest,
) -> Result<tonic::Streaming<proto::GenerateResponse>, Box<dyn std::error::Error>> {
) -> Result<tonic::Streaming<proto::GenerateResponse>, Box<dyn std::error::Error + Send + Sync>>
{
let request = Request::new(req);
let response = self.client.generate(request).await?;
Ok(response.into_inner())
@@ -53,7 +54,7 @@ impl SglangSchedulerClient {
/// Perform health check
pub async fn health_check(
&mut self,
) -> Result<proto::HealthCheckResponse, Box<dyn std::error::Error>> {
) -> Result<proto::HealthCheckResponse, Box<dyn std::error::Error + Send + Sync>> {
debug!("Sending health check request");
let request = Request::new(proto::HealthCheckRequest {
tokenized: Some(proto::TokenizedInput {
@@ -72,7 +73,7 @@ impl SglangSchedulerClient {
&mut self,
request_id: String,
reason: String,
) -> Result<(), Box<dyn std::error::Error>> {
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let request = Request::new(proto::AbortRequest { request_id, reason });
self.client.abort(request).await?;
@@ -85,7 +86,7 @@ impl SglangSchedulerClient {
request_id: String,
body: &ChatCompletionRequest,
processed_text: String,
token_ids: Vec<i32>,
token_ids: Vec<u32>,
multimodal_inputs: Option<proto::MultimodalInputs>,
tool_call_constraint: Option<(String, String)>, // (constraint_type, constraint_value)
) -> Result<proto::GenerateRequest, String> {
@@ -153,6 +154,8 @@ impl SglangSchedulerClient {
stop: stop_sequences,
stop_token_ids: request.stop_token_ids.clone().unwrap_or_default(),
skip_special_tokens,
ignore_eos: request.ignore_eos,
no_stop_trim: request.no_stop_trim,
n: request.n.unwrap_or(1) as i32,
constraint: self.build_constraint(request, tool_call_constraint)?,
..Default::default()