[router][grpc] Consolidate parser checks for chat completions (#11439)
This commit is contained in:
@@ -861,6 +861,44 @@ impl ResponseProcessingStage {
|
||||
let chat_request = ctx.chat_request_arc();
|
||||
let history_tool_calls_count = utils::get_history_tool_calls_count(&chat_request);
|
||||
|
||||
// Check parser availability once upfront (not per choice)
|
||||
let reasoning_parser_available = chat_request.separate_reasoning
|
||||
&& utils::check_reasoning_parser_availability(
|
||||
&self.processor.reasoning_parser_factory,
|
||||
self.processor.configured_reasoning_parser.as_ref(),
|
||||
&chat_request.model,
|
||||
);
|
||||
|
||||
let tool_choice_enabled = !matches!(
|
||||
&chat_request.tool_choice,
|
||||
Some(crate::protocols::spec::ToolChoice::Value(
|
||||
crate::protocols::spec::ToolChoiceValue::None
|
||||
))
|
||||
);
|
||||
|
||||
let tool_parser_available = tool_choice_enabled
|
||||
&& chat_request.tools.is_some()
|
||||
&& utils::check_tool_parser_availability(
|
||||
&self.processor.tool_parser_factory,
|
||||
self.processor.configured_tool_parser.as_ref(),
|
||||
&chat_request.model,
|
||||
);
|
||||
|
||||
// Log once per request (not per choice)
|
||||
if chat_request.separate_reasoning && !reasoning_parser_available {
|
||||
debug!(
|
||||
"No reasoning parser found for model '{}', skipping reasoning parsing",
|
||||
chat_request.model
|
||||
);
|
||||
}
|
||||
|
||||
if chat_request.tools.is_some() && tool_choice_enabled && !tool_parser_available {
|
||||
debug!(
|
||||
"No tool parser found for model '{}', skipping tool call parsing",
|
||||
chat_request.model
|
||||
);
|
||||
}
|
||||
|
||||
let stop_decoder = ctx
|
||||
.state
|
||||
.response
|
||||
@@ -878,6 +916,8 @@ impl ResponseProcessingStage {
|
||||
&chat_request,
|
||||
stop_decoder,
|
||||
history_tool_calls_count,
|
||||
reasoning_parser_available,
|
||||
tool_parser_available,
|
||||
)
|
||||
.await
|
||||
{
|
||||
|
||||
@@ -30,8 +30,8 @@ pub struct ResponseProcessor {
|
||||
pub tokenizer: Arc<dyn Tokenizer>,
|
||||
pub tool_parser_factory: ToolParserFactory,
|
||||
pub reasoning_parser_factory: ReasoningParserFactory,
|
||||
configured_tool_parser: Option<String>,
|
||||
configured_reasoning_parser: Option<String>,
|
||||
pub configured_tool_parser: Option<String>,
|
||||
pub configured_reasoning_parser: Option<String>,
|
||||
}
|
||||
|
||||
impl ResponseProcessor {
|
||||
@@ -52,6 +52,7 @@ impl ResponseProcessor {
|
||||
}
|
||||
|
||||
/// Process a single choice from GenerateComplete response (EXACT COPY from router.rs:1573-1725)
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn process_single_choice(
|
||||
&self,
|
||||
complete: &proto::GenerateComplete,
|
||||
@@ -59,6 +60,8 @@ impl ResponseProcessor {
|
||||
original_request: &ChatCompletionRequest,
|
||||
stop_decoder: &mut StopSequenceDecoder,
|
||||
history_tool_calls_count: usize,
|
||||
reasoning_parser_available: bool,
|
||||
tool_parser_available: bool,
|
||||
) -> Result<ChatChoice, String> {
|
||||
stop_decoder.reset();
|
||||
// Decode tokens
|
||||
@@ -89,8 +92,8 @@ impl ResponseProcessor {
|
||||
let mut reasoning_text: Option<String> = None;
|
||||
let mut processed_text = final_text;
|
||||
|
||||
// Check if reasoning parsing is enabled and separate_reasoning is requested
|
||||
if original_request.separate_reasoning {
|
||||
// Check if reasoning parsing is enabled and parser is available
|
||||
if original_request.separate_reasoning && reasoning_parser_available {
|
||||
let pooled_parser = utils::get_reasoning_parser(
|
||||
&self.reasoning_parser_factory,
|
||||
self.configured_reasoning_parser.as_ref(),
|
||||
@@ -113,8 +116,6 @@ impl ResponseProcessor {
|
||||
|
||||
// Step 2: Handle tool call parsing
|
||||
let mut tool_calls: Option<Vec<ToolCall>> = None;
|
||||
|
||||
// Check if tool calls should be processed
|
||||
let tool_choice_enabled = !matches!(
|
||||
&original_request.tool_choice,
|
||||
Some(ToolChoice::Value(ToolChoiceValue::None))
|
||||
@@ -134,7 +135,7 @@ impl ResponseProcessor {
|
||||
&processed_text,
|
||||
&original_request.tool_choice,
|
||||
);
|
||||
} else {
|
||||
} else if tool_parser_available {
|
||||
(tool_calls, processed_text) = self
|
||||
.parse_tool_calls(
|
||||
&processed_text,
|
||||
|
||||
@@ -195,41 +195,29 @@ impl StreamingProcessor {
|
||||
let system_fingerprint = dispatch.weight_version.as_deref();
|
||||
|
||||
// Check parser availability once upfront (log warning only once per request)
|
||||
let reasoning_parser_available = if separate_reasoning {
|
||||
if let Some(parser_name) = self.configured_reasoning_parser.as_ref() {
|
||||
self.reasoning_parser_factory
|
||||
.registry()
|
||||
.has_parser(parser_name)
|
||||
} else {
|
||||
self.reasoning_parser_factory
|
||||
.registry()
|
||||
.has_parser_for_model(model)
|
||||
}
|
||||
} else {
|
||||
false
|
||||
};
|
||||
let reasoning_parser_available = separate_reasoning
|
||||
&& utils::check_reasoning_parser_availability(
|
||||
&self.reasoning_parser_factory,
|
||||
self.configured_reasoning_parser.as_ref(),
|
||||
model,
|
||||
);
|
||||
|
||||
let tool_parser_available = if tools.is_some() {
|
||||
if let Some(parser_name) = self.configured_tool_parser.as_ref() {
|
||||
self.tool_parser_factory.registry().has_parser(parser_name)
|
||||
} else {
|
||||
self.tool_parser_factory
|
||||
.registry()
|
||||
.has_parser_for_model(model)
|
||||
}
|
||||
} else {
|
||||
false
|
||||
};
|
||||
let tool_parser_available = tools.is_some()
|
||||
&& utils::check_tool_parser_availability(
|
||||
&self.tool_parser_factory,
|
||||
self.configured_tool_parser.as_ref(),
|
||||
model,
|
||||
);
|
||||
|
||||
if separate_reasoning && !reasoning_parser_available {
|
||||
warn!(
|
||||
debug!(
|
||||
"No reasoning parser found for model '{}', skipping reasoning parsing",
|
||||
model
|
||||
);
|
||||
}
|
||||
|
||||
if tools.is_some() && !tool_parser_available {
|
||||
warn!(
|
||||
debug!(
|
||||
"No tool parser found for model '{}', skipping tool call parsing",
|
||||
model
|
||||
);
|
||||
|
||||
@@ -675,6 +675,34 @@ pub fn generate_tool_call_id(
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a reasoning parser is available for the given model
|
||||
pub fn check_reasoning_parser_availability(
|
||||
reasoning_parser_factory: &crate::reasoning_parser::ParserFactory,
|
||||
configured_parser: Option<&String>,
|
||||
model: &str,
|
||||
) -> bool {
|
||||
if let Some(parser_name) = configured_parser {
|
||||
reasoning_parser_factory.registry().has_parser(parser_name)
|
||||
} else {
|
||||
reasoning_parser_factory
|
||||
.registry()
|
||||
.has_parser_for_model(model)
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a tool parser is available for the given model
|
||||
pub fn check_tool_parser_availability(
|
||||
tool_parser_factory: &crate::tool_parser::ParserFactory,
|
||||
configured_parser: Option<&String>,
|
||||
model: &str,
|
||||
) -> bool {
|
||||
if let Some(parser_name) = configured_parser {
|
||||
tool_parser_factory.registry().has_parser(parser_name)
|
||||
} else {
|
||||
tool_parser_factory.registry().has_parser_for_model(model)
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the appropriate reasoning parser for a model
|
||||
///
|
||||
/// If a parser name is explicitly configured, use that parser.
|
||||
|
||||
@@ -6,7 +6,7 @@ use tokio::sync::Mutex;
|
||||
|
||||
use crate::tool_parser::parsers::{
|
||||
DeepSeekParser, Glm4MoeParser, GptOssHarmonyParser, GptOssParser, JsonParser, KimiK2Parser,
|
||||
LlamaParser, MistralParser, PythonicParser, QwenParser, Step3Parser,
|
||||
LlamaParser, MistralParser, PassthroughParser, PythonicParser, QwenParser, Step3Parser,
|
||||
};
|
||||
use crate::tool_parser::traits::ToolParser;
|
||||
|
||||
@@ -36,7 +36,7 @@ impl ParserRegistry {
|
||||
creators: Arc::new(RwLock::new(HashMap::new())),
|
||||
pool: Arc::new(RwLock::new(HashMap::new())),
|
||||
model_mapping: Arc::new(RwLock::new(HashMap::new())),
|
||||
default_parser: Arc::new(RwLock::new("json".to_string())),
|
||||
default_parser: Arc::new(RwLock::new("passthrough".to_string())),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -124,10 +124,9 @@ impl ParserRegistry {
|
||||
}
|
||||
}
|
||||
|
||||
// Check if default parser exists
|
||||
let default = self.default_parser.read().unwrap().clone();
|
||||
let creators = self.creators.read().unwrap();
|
||||
creators.contains_key(&default)
|
||||
// Return false if no specific parser found for this model
|
||||
// (get_pooled will still fall back to default parser)
|
||||
false
|
||||
}
|
||||
|
||||
/// Create a fresh (non-pooled) parser instance for a specific model.
|
||||
@@ -228,6 +227,7 @@ impl ParserFactory {
|
||||
let registry = ParserRegistry::new();
|
||||
|
||||
// Register default parsers
|
||||
registry.register_parser("passthrough", || Box::new(PassthroughParser::new()));
|
||||
registry.register_parser("json", || Box::new(JsonParser::new()));
|
||||
registry.register_parser("mistral", || Box::new(MistralParser::new()));
|
||||
registry.register_parser("qwen", || Box::new(QwenParser::new()));
|
||||
@@ -311,15 +311,15 @@ impl ParserFactory {
|
||||
|
||||
/// Get a pooled parser for the given model ID.
|
||||
/// Returns a shared instance that can be used concurrently.
|
||||
/// Falls back to JSON parser if model is not recognized.
|
||||
/// Falls back to passthrough parser if model is not recognized.
|
||||
pub fn get_pooled(&self, model_id: &str) -> PooledParser {
|
||||
self.registry
|
||||
.get_pooled_for_model(model_id)
|
||||
.unwrap_or_else(|| {
|
||||
// Fallback to JSON parser
|
||||
// Fallback to passthrough parser (no-op, returns text unchanged)
|
||||
self.registry
|
||||
.get_pooled_parser("json")
|
||||
.expect("JSON parser should always be registered")
|
||||
.get_pooled_parser("passthrough")
|
||||
.expect("Passthrough parser should always be registered")
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ pub mod json_parser;
|
||||
pub mod kimik2_parser;
|
||||
pub mod llama_parser;
|
||||
pub mod mistral_parser;
|
||||
pub mod passthrough_parser;
|
||||
pub mod pythonic_parser;
|
||||
pub mod qwen_parser;
|
||||
pub mod step3_parser;
|
||||
@@ -27,6 +28,7 @@ pub use json_parser::JsonParser;
|
||||
pub use kimik2_parser::KimiK2Parser;
|
||||
pub use llama_parser::LlamaParser;
|
||||
pub use mistral_parser::MistralParser;
|
||||
pub use passthrough_parser::PassthroughParser;
|
||||
pub use pythonic_parser::PythonicParser;
|
||||
pub use qwen_parser::QwenParser;
|
||||
pub use step3_parser::Step3Parser;
|
||||
|
||||
50
sgl-router/src/tool_parser/parsers/passthrough_parser.rs
Normal file
50
sgl-router/src/tool_parser/parsers/passthrough_parser.rs
Normal file
@@ -0,0 +1,50 @@
|
||||
//! Passthrough parser that returns text unchanged
|
||||
//!
|
||||
//! This parser is used as a fallback for unknown models where no specific
|
||||
//! tool call parsing should be performed. It simply returns the input text
|
||||
//! with no tool calls detected.
|
||||
|
||||
use crate::protocols::spec::Tool;
|
||||
use crate::tool_parser::errors::ParserResult;
|
||||
use crate::tool_parser::traits::ToolParser;
|
||||
use crate::tool_parser::types::{StreamingParseResult, ToolCall, ToolCallItem};
|
||||
use async_trait::async_trait;
|
||||
|
||||
/// Passthrough parser that returns text unchanged with no tool calls
|
||||
#[derive(Default)]
|
||||
pub struct PassthroughParser;
|
||||
|
||||
impl PassthroughParser {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ToolParser for PassthroughParser {
|
||||
async fn parse_complete(&self, output: &str) -> ParserResult<(String, Vec<ToolCall>)> {
|
||||
// Return text unchanged with no tool calls
|
||||
Ok((output.to_string(), vec![]))
|
||||
}
|
||||
|
||||
async fn parse_incremental(
|
||||
&mut self,
|
||||
chunk: &str,
|
||||
_tools: &[Tool],
|
||||
) -> ParserResult<StreamingParseResult> {
|
||||
// Return chunk unchanged with no tool calls
|
||||
Ok(StreamingParseResult {
|
||||
normal_text: chunk.to_string(),
|
||||
calls: vec![],
|
||||
})
|
||||
}
|
||||
|
||||
fn has_tool_markers(&self, _text: &str) -> bool {
|
||||
// Passthrough never detects tool calls
|
||||
false
|
||||
}
|
||||
|
||||
fn get_unstreamed_tool_args(&self) -> Option<Vec<ToolCallItem>> {
|
||||
None
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user