[router] Add Configurable L0 and L1 Tokenizer Caching (#11688)

This commit is contained in:
Simo Lin
2025-10-18 18:33:53 -07:00
committed by GitHub
parent fda0cb2a30
commit a7ae61ed77
22 changed files with 2385 additions and 24 deletions

View File

@@ -48,7 +48,11 @@ use crate::{
reasoning_parser::ParserFactory as ReasoningParserFactory,
routers::{router_manager::RouterManager, RouterTrait},
service_discovery::{start_service_discovery, ServiceDiscoveryConfig},
tokenizer::{factory as tokenizer_factory, traits::Tokenizer},
tokenizer::{
cache::{CacheConfig, CachedTokenizer},
factory as tokenizer_factory,
traits::Tokenizer,
},
tool_parser::ParserFactory as ToolParserFactory,
};
@@ -864,7 +868,7 @@ pub async fn startup(config: ServerConfig) -> Result<(), Box<dyn std::error::Err
.to_string()
})?;
let tokenizer = Some(
let base_tokenizer =
tokenizer_factory::create_tokenizer_with_chat_template_blocking(
&tokenizer_path,
config.router_config.chat_template.as_deref(),
@@ -876,8 +880,23 @@ pub async fn startup(config: ServerConfig) -> Result<(), Box<dyn std::error::Err
or a HuggingFace model ID. For directories, ensure they contain tokenizer files.",
tokenizer_path, e
)
})?,
);
})?;
// Conditionally wrap with caching layer if at least one cache is enabled
let tokenizer = if config.router_config.tokenizer_cache.enable_l0
|| config.router_config.tokenizer_cache.enable_l1
{
let cache_config = CacheConfig {
enable_l0: config.router_config.tokenizer_cache.enable_l0,
l0_max_entries: config.router_config.tokenizer_cache.l0_max_entries,
enable_l1: config.router_config.tokenizer_cache.enable_l1,
l1_max_memory: config.router_config.tokenizer_cache.l1_max_memory,
};
Some(Arc::new(CachedTokenizer::new(base_tokenizer, cache_config)) as Arc<dyn Tokenizer>)
} else {
// Use base tokenizer directly without caching
Some(base_tokenizer)
};
let reasoning_parser_factory = Some(ReasoningParserFactory::new());
let tool_parser_factory = Some(ToolParserFactory::new());