[router][grpc] Support tool call parser in streaming (#11160)

This commit is contained in:
Chang Su
2025-10-02 03:18:50 -07:00
committed by GitHub
parent 5e786cca3a
commit b658be6f6a
38 changed files with 3086 additions and 2245 deletions

View File

@@ -8,9 +8,9 @@
//! - Different model formats (JSON, Mistral, Qwen, Pythonic, etc.)
use criterion::{black_box, criterion_group, BenchmarkId, Criterion, Throughput};
use sglang_router_rs::tool_parser::{
registry::ParserRegistry, state::ParseState, types::StreamResult,
};
use serde_json::json;
use sglang_router_rs::protocols::spec::{Function, Tool};
use sglang_router_rs::tool_parser::{JsonParser, ToolParser, ToolParserFactory};
use std::collections::BTreeMap;
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::sync::{Arc, Mutex};
@@ -108,6 +108,40 @@ const STEP3_FORMAT: &str = r#"<step.tML version="0.1">
const GPT_OSS_FORMAT: &str = r#"<Channel.vector_search>{"collection": "technical_documentation", "query_embedding": [0.0234, -0.1456, 0.0891, 0.2341, -0.0567, 0.1234, 0.0456, -0.0789, 0.1567, 0.0234, -0.1123, 0.0678, 0.2345, -0.0456, 0.0891, 0.1234, -0.0567, 0.0789, 0.1456, -0.0234, 0.0891, 0.1567, -0.0678, 0.0345, 0.1234, -0.0456, 0.0789, 0.1891, -0.0234, 0.0567, 0.1345, -0.0891], "top_k": 10, "similarity_metric": "cosine", "filters": {"language": "en", "last_updated": {"$gte": "2023-01-01"}, "categories": {"$in": ["api", "sdk", "integration"]}}, "include_metadata": true, "rerank_with_cross_encoder": true}</Channel.vector_search>"#;
// Create test tools for parsers that need them
fn create_test_tools() -> Vec<Tool> {
vec![
Tool {
tool_type: "function".to_string(),
function: Function {
name: "search".to_string(),
description: Some("Search for information".to_string()),
parameters: json!({
"type": "object",
"properties": {
"query": {"type": "string"},
"limit": {"type": "number"}
}
}),
},
},
Tool {
tool_type: "function".to_string(),
function: Function {
name: "code_interpreter".to_string(),
description: Some("Execute code".to_string()),
parameters: json!({
"type": "object",
"properties": {
"language": {"type": "string"},
"code": {"type": "string"}
}
}),
},
},
]
}
// Large test data for stress testing
fn generate_large_json(num_tools: usize) -> String {
let mut tools = Vec::new();
@@ -141,7 +175,7 @@ fn bench_registry_creation(c: &mut Criterion) {
b.iter_custom(|iters| {
let start = Instant::now();
for _ in 0..iters {
let registry = black_box(ParserRegistry::new());
let registry = black_box(ToolParserFactory::new());
// Force evaluation to prevent optimization
black_box(registry.list_parsers());
}
@@ -168,7 +202,7 @@ fn bench_registry_creation(c: &mut Criterion) {
}
fn bench_parser_lookup(c: &mut Criterion) {
let registry = Arc::new(ParserRegistry::new());
let registry = Arc::new(ToolParserFactory::new());
let models = vec![
"gpt-4",
"mistral-large",
@@ -227,7 +261,7 @@ fn bench_parser_lookup(c: &mut Criterion) {
fn bench_complete_parsing(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
let registry = Arc::new(ParserRegistry::new());
let registry = Arc::new(ToolParserFactory::new());
let test_cases = vec![
("json_simple", "json", JSON_SIMPLE),
@@ -295,7 +329,6 @@ fn bench_complete_parsing(c: &mut Criterion) {
fn bench_streaming_parsing(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
let registry = Arc::new(ParserRegistry::new());
// Streaming test with chunked input
let chunks = vec![
@@ -315,24 +348,21 @@ fn bench_streaming_parsing(c: &mut Criterion) {
let printed = Arc::new(AtomicBool::new(false));
group.bench_function("json_streaming", |b| {
let printed_clone = printed.clone();
let registry = registry.clone();
let rt = rt.handle().clone();
b.iter_custom(|iters| {
let parser = registry.get_parser("json").expect("Parser not found");
let tools = create_test_tools();
let start = Instant::now();
for _ in 0..iters {
let parser = parser.clone();
let mut state = ParseState::new();
let mut parser = JsonParser::new();
let mut complete_tools = Vec::new();
rt.block_on(async {
for chunk in &chunks {
if let StreamResult::ToolComplete(tool) =
parser.parse_incremental(chunk, &mut state).await.unwrap()
{
complete_tools.push(tool);
let result = parser.parse_incremental(chunk, &tools).await.unwrap();
if !result.calls.is_empty() {
complete_tools.extend(result.calls);
}
}
});
@@ -368,7 +398,7 @@ fn bench_streaming_parsing(c: &mut Criterion) {
fn bench_concurrent_parsing(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
let registry = Arc::new(ParserRegistry::new());
let registry = Arc::new(ToolParserFactory::new());
let parser = registry.get_parser("json").expect("Parser not found");
let thread_counts = vec![1, 2, 4, 8, 16, 32];
@@ -456,7 +486,7 @@ fn bench_concurrent_parsing(c: &mut Criterion) {
fn bench_large_payloads(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
let registry = Arc::new(ParserRegistry::new());
let registry = Arc::new(ToolParserFactory::new());
let parser = registry.get_parser("json").expect("Parser not found");
let sizes = vec![1, 10, 50, 100, 500];
@@ -526,7 +556,7 @@ fn bench_parser_reuse(c: &mut Criterion) {
b.iter_custom(|iters| {
let start = Instant::now();
for _ in 0..iters {
let registry = ParserRegistry::new();
let registry = ToolParserFactory::new();
let parser = registry.get_parser("json").unwrap();
let result = rt.block_on(async { parser.parse_complete(JSON_SIMPLE).await });
black_box(result.unwrap());
@@ -552,7 +582,7 @@ fn bench_parser_reuse(c: &mut Criterion) {
// Benchmark reusing registry
let printed_reuse = Arc::new(AtomicBool::new(false));
let shared_registry = Arc::new(ParserRegistry::new());
let shared_registry = Arc::new(ToolParserFactory::new());
group.bench_function("reuse_registry", |b| {
let printed_clone = printed_reuse.clone();
@@ -627,7 +657,7 @@ fn bench_parser_reuse(c: &mut Criterion) {
fn bench_latency_distribution(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
let registry = Arc::new(ParserRegistry::new());
let registry = Arc::new(ToolParserFactory::new());
let test_cases = vec![
("json", JSON_SIMPLE),