[router] Add Configurable L0 and L1 Tokenizer Caching (#11688)

This commit is contained in:
Simo Lin
2025-10-18 18:33:53 -07:00
committed by GitHub
parent fda0cb2a30
commit a7ae61ed77
22 changed files with 2385 additions and 24 deletions

View File

@@ -84,6 +84,7 @@ async fn test_non_streaming_mcp_minimal_e2e_with_persistence() {
oracle: None,
reasoning_parser: None,
tool_call_parser: None,
tokenizer_cache: sglang_router_rs::config::TokenizerCacheConfig::default(),
};
// Create router and context
@@ -284,6 +285,7 @@ async fn test_conversations_crud_basic() {
oracle: None,
reasoning_parser: None,
tool_call_parser: None,
tokenizer_cache: sglang_router_rs::config::TokenizerCacheConfig::default(),
};
let ctx = common::create_test_context(router_cfg);
@@ -619,6 +621,7 @@ async fn test_multi_turn_loop_with_mcp() {
oracle: None,
reasoning_parser: None,
tool_call_parser: None,
tokenizer_cache: sglang_router_rs::config::TokenizerCacheConfig::default(),
};
let ctx = common::create_test_context(router_cfg);
@@ -795,6 +798,7 @@ async fn test_max_tool_calls_limit() {
oracle: None,
reasoning_parser: None,
tool_call_parser: None,
tokenizer_cache: sglang_router_rs::config::TokenizerCacheConfig::default(),
};
let ctx = common::create_test_context(router_cfg);
@@ -937,6 +941,7 @@ async fn setup_streaming_mcp_test() -> (
oracle: None,
reasoning_parser: None,
tool_call_parser: None,
tokenizer_cache: sglang_router_rs::config::TokenizerCacheConfig::default(),
};
let ctx = common::create_test_context(router_cfg);
@@ -1378,6 +1383,7 @@ async fn test_conversation_items_create_and_get() {
oracle: None,
reasoning_parser: None,
tool_call_parser: None,
tokenizer_cache: sglang_router_rs::config::TokenizerCacheConfig::default(),
};
let ctx = common::create_test_context(router_cfg);
@@ -1479,6 +1485,7 @@ async fn test_conversation_items_delete() {
oracle: None,
reasoning_parser: None,
tool_call_parser: None,
tokenizer_cache: sglang_router_rs::config::TokenizerCacheConfig::default(),
};
let ctx = common::create_test_context(router_cfg);
@@ -1586,6 +1593,7 @@ async fn test_conversation_items_max_limit() {
oracle: None,
reasoning_parser: None,
tool_call_parser: None,
tokenizer_cache: sglang_router_rs::config::TokenizerCacheConfig::default(),
};
let ctx = common::create_test_context(router_cfg);
@@ -1663,6 +1671,7 @@ async fn test_conversation_items_unsupported_type() {
oracle: None,
reasoning_parser: None,
tool_call_parser: None,
tokenizer_cache: sglang_router_rs::config::TokenizerCacheConfig::default(),
};
let ctx = common::create_test_context(router_cfg);
@@ -1739,6 +1748,7 @@ async fn test_conversation_items_multi_conversation_sharing() {
oracle: None,
reasoning_parser: None,
tool_call_parser: None,
tokenizer_cache: sglang_router_rs::config::TokenizerCacheConfig::default(),
};
let ctx = common::create_test_context(router_cfg);