Files
sglang/sgl-router/src/routers/grpc/processing.rs

267 lines
9.5 KiB
Rust

//! Shared response processing logic for gRPC routers
//!
//! This module contains response processing functions that are shared between
//! the regular router and PD router, eliminating ~1,200 lines of exact duplicates.
use std::sync::Arc;
use serde_json::Value;
use tracing::error;
use crate::grpc_client::proto;
use crate::protocols::spec::{
ChatChoice, ChatCompletionMessage, ChatCompletionRequest, FunctionCallResponse, ToolCall,
ToolChoice, ToolChoiceValue,
};
use crate::reasoning_parser::ReasoningParserFactory;
use crate::tokenizer::stop::{SequenceDecoderOutput, StopSequenceDecoder};
use crate::tokenizer::traits::Tokenizer;
use crate::tool_parser::ToolParserFactory;
use super::utils;
// ============================================================================
// Response Processor - Main Entry Point
// ============================================================================
/// Unified response processor for both routers
#[derive(Clone)]
pub struct ResponseProcessor {
pub tokenizer: Arc<dyn Tokenizer>,
pub tool_parser_factory: ToolParserFactory,
pub reasoning_parser_factory: ReasoningParserFactory,
configured_tool_parser: Option<String>,
configured_reasoning_parser: Option<String>,
}
impl ResponseProcessor {
pub fn new(
tokenizer: Arc<dyn Tokenizer>,
tool_parser_factory: ToolParserFactory,
reasoning_parser_factory: ReasoningParserFactory,
configured_tool_parser: Option<String>,
configured_reasoning_parser: Option<String>,
) -> Self {
Self {
tokenizer,
tool_parser_factory,
reasoning_parser_factory,
configured_tool_parser,
configured_reasoning_parser,
}
}
/// Process a single choice from GenerateComplete response (EXACT COPY from router.rs:1573-1725)
pub async fn process_single_choice(
&self,
complete: &proto::GenerateComplete,
index: usize,
original_request: &ChatCompletionRequest,
stop_decoder: &mut StopSequenceDecoder,
history_tool_calls_count: usize,
) -> Result<ChatChoice, String> {
stop_decoder.reset();
// Decode tokens
let outputs = stop_decoder
.process_tokens(&complete.output_ids)
.map_err(|e| format!("Failed to process tokens: {}", e))?;
// Accumulate text with early breaks
let mut final_text = String::new();
for output in outputs {
match output {
SequenceDecoderOutput::Text(t) => final_text.push_str(&t),
SequenceDecoderOutput::StoppedWithText(t) => {
final_text.push_str(&t);
break;
}
SequenceDecoderOutput::Stopped => break,
SequenceDecoderOutput::Held => {}
}
}
// Flush remaining text
if let SequenceDecoderOutput::Text(t) = stop_decoder.flush() {
final_text.push_str(&t);
}
// Step 1: Handle reasoning content parsing
let mut reasoning_text: Option<String> = None;
let mut processed_text = final_text;
// Check if reasoning parsing is enabled and separate_reasoning is requested
if original_request.separate_reasoning {
let pooled_parser = utils::get_reasoning_parser(
&self.reasoning_parser_factory,
self.configured_reasoning_parser.as_ref(),
&original_request.model,
);
let mut parser = pooled_parser.lock().await;
match parser.detect_and_parse_reasoning(&processed_text) {
Ok(result) => {
if !result.reasoning_text.is_empty() {
reasoning_text = Some(result.reasoning_text);
}
processed_text = result.normal_text;
}
Err(e) => {
return Err(format!("Reasoning parsing error: {}", e));
}
}
}
// Step 2: Handle tool call parsing
let mut tool_calls: Option<Vec<ToolCall>> = None;
// Check if tool calls should be processed
let tool_choice_enabled = !matches!(
&original_request.tool_choice,
Some(ToolChoice::Value(ToolChoiceValue::None))
);
if tool_choice_enabled && original_request.tools.is_some() {
// Check if JSON schema constraint was used (specific function or required mode)
let used_json_schema = match &original_request.tool_choice {
Some(ToolChoice::Function { .. }) => true,
Some(ToolChoice::Value(ToolChoiceValue::Required)) => true,
Some(ToolChoice::AllowedTools { mode, .. }) => mode == "required",
_ => false,
};
if used_json_schema {
(tool_calls, processed_text) = utils::parse_json_schema_response(
&processed_text,
&original_request.tool_choice,
);
} else {
(tool_calls, processed_text) = self
.parse_tool_calls(
&processed_text,
&original_request.model,
history_tool_calls_count,
)
.await;
}
}
// Step 3: Use finish reason directly from proto (already OpenAI-compatible string)
let finish_reason_str = &complete.finish_reason;
// Override finish reason if we have tool calls
let final_finish_reason_str = if tool_calls.is_some() {
"tool_calls"
} else {
finish_reason_str
};
// Extract matched_stop information from proto
let matched_stop = match &complete.matched_stop {
Some(proto::generate_complete::MatchedStop::MatchedTokenId(token_id)) => {
Some(Value::Number(serde_json::Number::from(*token_id)))
}
Some(proto::generate_complete::MatchedStop::MatchedStopStr(stop_str)) => {
Some(Value::String(stop_str.clone()))
}
None => None,
};
// Step 4: Convert output logprobs if present
let logprobs = if let Some(proto_logprobs) = &complete.output_logprobs {
match utils::convert_proto_to_openai_logprobs(proto_logprobs, &self.tokenizer) {
Ok(logprobs) => Some(logprobs),
Err(e) => {
error!("Failed to convert logprobs: {}", e);
None
}
}
} else {
None
};
// Step 5: Build ChatCompletionMessage (proper response message type)
let chat_message = ChatCompletionMessage {
role: "assistant".to_string(),
content: if processed_text.is_empty() {
None
} else {
Some(processed_text)
},
tool_calls,
reasoning_content: reasoning_text,
};
// Step 6: Build ChatChoice
let choice = ChatChoice {
index: index as u32,
message: chat_message,
logprobs,
finish_reason: Some(final_finish_reason_str.to_string()),
matched_stop,
hidden_states: None,
};
Ok(choice)
}
/// Parse tool calls using model-specific parser (EXACT COPY from router.rs:296-361)
pub async fn parse_tool_calls(
&self,
processed_text: &str,
model: &str,
history_tool_calls_count: usize,
) -> (Option<Vec<ToolCall>>, String) {
// Get pooled parser for this model
let pooled_parser = utils::get_tool_parser(
&self.tool_parser_factory,
self.configured_tool_parser.as_ref(),
model,
);
// Try parsing directly (parser will handle detection internally)
let result = {
let parser = pooled_parser.lock().await;
parser.parse_complete(processed_text).await
// Lock is dropped here
};
match result {
Ok((normal_text, parsed_tool_calls)) => {
if parsed_tool_calls.is_empty() {
return (None, normal_text);
}
let spec_tool_calls = parsed_tool_calls
.into_iter()
.enumerate()
.map(|(index, tc)| {
// Generate ID for this tool call
let id = utils::generate_tool_call_id(
model,
&tc.function.name,
index,
history_tool_calls_count,
);
ToolCall {
id,
tool_type: "function".to_string(),
function: FunctionCallResponse {
name: tc.function.name,
arguments: Some(
serde_json::to_string(&tc.function.arguments)
.unwrap_or_else(|_| "{}".to_string()),
),
},
}
})
.collect();
(Some(spec_tool_calls), normal_text)
}
Err(e) => {
error!("Tool call parsing error: {}", e);
(None, processed_text.to_string())
}
}
}
}