[router][grpc] Consolidate parser checks for chat completions (#11439)
This commit is contained in:
@@ -861,6 +861,44 @@ impl ResponseProcessingStage {
|
||||
let chat_request = ctx.chat_request_arc();
|
||||
let history_tool_calls_count = utils::get_history_tool_calls_count(&chat_request);
|
||||
|
||||
// Check parser availability once upfront (not per choice)
|
||||
let reasoning_parser_available = chat_request.separate_reasoning
|
||||
&& utils::check_reasoning_parser_availability(
|
||||
&self.processor.reasoning_parser_factory,
|
||||
self.processor.configured_reasoning_parser.as_ref(),
|
||||
&chat_request.model,
|
||||
);
|
||||
|
||||
let tool_choice_enabled = !matches!(
|
||||
&chat_request.tool_choice,
|
||||
Some(crate::protocols::spec::ToolChoice::Value(
|
||||
crate::protocols::spec::ToolChoiceValue::None
|
||||
))
|
||||
);
|
||||
|
||||
let tool_parser_available = tool_choice_enabled
|
||||
&& chat_request.tools.is_some()
|
||||
&& utils::check_tool_parser_availability(
|
||||
&self.processor.tool_parser_factory,
|
||||
self.processor.configured_tool_parser.as_ref(),
|
||||
&chat_request.model,
|
||||
);
|
||||
|
||||
// Log once per request (not per choice)
|
||||
if chat_request.separate_reasoning && !reasoning_parser_available {
|
||||
debug!(
|
||||
"No reasoning parser found for model '{}', skipping reasoning parsing",
|
||||
chat_request.model
|
||||
);
|
||||
}
|
||||
|
||||
if chat_request.tools.is_some() && tool_choice_enabled && !tool_parser_available {
|
||||
debug!(
|
||||
"No tool parser found for model '{}', skipping tool call parsing",
|
||||
chat_request.model
|
||||
);
|
||||
}
|
||||
|
||||
let stop_decoder = ctx
|
||||
.state
|
||||
.response
|
||||
@@ -878,6 +916,8 @@ impl ResponseProcessingStage {
|
||||
&chat_request,
|
||||
stop_decoder,
|
||||
history_tool_calls_count,
|
||||
reasoning_parser_available,
|
||||
tool_parser_available,
|
||||
)
|
||||
.await
|
||||
{
|
||||
|
||||
@@ -30,8 +30,8 @@ pub struct ResponseProcessor {
|
||||
pub tokenizer: Arc<dyn Tokenizer>,
|
||||
pub tool_parser_factory: ToolParserFactory,
|
||||
pub reasoning_parser_factory: ReasoningParserFactory,
|
||||
configured_tool_parser: Option<String>,
|
||||
configured_reasoning_parser: Option<String>,
|
||||
pub configured_tool_parser: Option<String>,
|
||||
pub configured_reasoning_parser: Option<String>,
|
||||
}
|
||||
|
||||
impl ResponseProcessor {
|
||||
@@ -52,6 +52,7 @@ impl ResponseProcessor {
|
||||
}
|
||||
|
||||
/// Process a single choice from GenerateComplete response (EXACT COPY from router.rs:1573-1725)
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn process_single_choice(
|
||||
&self,
|
||||
complete: &proto::GenerateComplete,
|
||||
@@ -59,6 +60,8 @@ impl ResponseProcessor {
|
||||
original_request: &ChatCompletionRequest,
|
||||
stop_decoder: &mut StopSequenceDecoder,
|
||||
history_tool_calls_count: usize,
|
||||
reasoning_parser_available: bool,
|
||||
tool_parser_available: bool,
|
||||
) -> Result<ChatChoice, String> {
|
||||
stop_decoder.reset();
|
||||
// Decode tokens
|
||||
@@ -89,8 +92,8 @@ impl ResponseProcessor {
|
||||
let mut reasoning_text: Option<String> = None;
|
||||
let mut processed_text = final_text;
|
||||
|
||||
// Check if reasoning parsing is enabled and separate_reasoning is requested
|
||||
if original_request.separate_reasoning {
|
||||
// Check if reasoning parsing is enabled and parser is available
|
||||
if original_request.separate_reasoning && reasoning_parser_available {
|
||||
let pooled_parser = utils::get_reasoning_parser(
|
||||
&self.reasoning_parser_factory,
|
||||
self.configured_reasoning_parser.as_ref(),
|
||||
@@ -113,8 +116,6 @@ impl ResponseProcessor {
|
||||
|
||||
// Step 2: Handle tool call parsing
|
||||
let mut tool_calls: Option<Vec<ToolCall>> = None;
|
||||
|
||||
// Check if tool calls should be processed
|
||||
let tool_choice_enabled = !matches!(
|
||||
&original_request.tool_choice,
|
||||
Some(ToolChoice::Value(ToolChoiceValue::None))
|
||||
@@ -134,7 +135,7 @@ impl ResponseProcessor {
|
||||
&processed_text,
|
||||
&original_request.tool_choice,
|
||||
);
|
||||
} else {
|
||||
} else if tool_parser_available {
|
||||
(tool_calls, processed_text) = self
|
||||
.parse_tool_calls(
|
||||
&processed_text,
|
||||
|
||||
@@ -195,41 +195,29 @@ impl StreamingProcessor {
|
||||
let system_fingerprint = dispatch.weight_version.as_deref();
|
||||
|
||||
// Check parser availability once upfront (log warning only once per request)
|
||||
let reasoning_parser_available = if separate_reasoning {
|
||||
if let Some(parser_name) = self.configured_reasoning_parser.as_ref() {
|
||||
self.reasoning_parser_factory
|
||||
.registry()
|
||||
.has_parser(parser_name)
|
||||
} else {
|
||||
self.reasoning_parser_factory
|
||||
.registry()
|
||||
.has_parser_for_model(model)
|
||||
}
|
||||
} else {
|
||||
false
|
||||
};
|
||||
let reasoning_parser_available = separate_reasoning
|
||||
&& utils::check_reasoning_parser_availability(
|
||||
&self.reasoning_parser_factory,
|
||||
self.configured_reasoning_parser.as_ref(),
|
||||
model,
|
||||
);
|
||||
|
||||
let tool_parser_available = if tools.is_some() {
|
||||
if let Some(parser_name) = self.configured_tool_parser.as_ref() {
|
||||
self.tool_parser_factory.registry().has_parser(parser_name)
|
||||
} else {
|
||||
self.tool_parser_factory
|
||||
.registry()
|
||||
.has_parser_for_model(model)
|
||||
}
|
||||
} else {
|
||||
false
|
||||
};
|
||||
let tool_parser_available = tools.is_some()
|
||||
&& utils::check_tool_parser_availability(
|
||||
&self.tool_parser_factory,
|
||||
self.configured_tool_parser.as_ref(),
|
||||
model,
|
||||
);
|
||||
|
||||
if separate_reasoning && !reasoning_parser_available {
|
||||
warn!(
|
||||
debug!(
|
||||
"No reasoning parser found for model '{}', skipping reasoning parsing",
|
||||
model
|
||||
);
|
||||
}
|
||||
|
||||
if tools.is_some() && !tool_parser_available {
|
||||
warn!(
|
||||
debug!(
|
||||
"No tool parser found for model '{}', skipping tool call parsing",
|
||||
model
|
||||
);
|
||||
|
||||
@@ -675,6 +675,34 @@ pub fn generate_tool_call_id(
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a reasoning parser is available for the given model
|
||||
pub fn check_reasoning_parser_availability(
|
||||
reasoning_parser_factory: &crate::reasoning_parser::ParserFactory,
|
||||
configured_parser: Option<&String>,
|
||||
model: &str,
|
||||
) -> bool {
|
||||
if let Some(parser_name) = configured_parser {
|
||||
reasoning_parser_factory.registry().has_parser(parser_name)
|
||||
} else {
|
||||
reasoning_parser_factory
|
||||
.registry()
|
||||
.has_parser_for_model(model)
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a tool parser is available for the given model
|
||||
pub fn check_tool_parser_availability(
|
||||
tool_parser_factory: &crate::tool_parser::ParserFactory,
|
||||
configured_parser: Option<&String>,
|
||||
model: &str,
|
||||
) -> bool {
|
||||
if let Some(parser_name) = configured_parser {
|
||||
tool_parser_factory.registry().has_parser(parser_name)
|
||||
} else {
|
||||
tool_parser_factory.registry().has_parser_for_model(model)
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the appropriate reasoning parser for a model
|
||||
///
|
||||
/// If a parser name is explicitly configured, use that parser.
|
||||
|
||||
Reference in New Issue
Block a user