[router] Add Configurable L0 and L1 Tokenizer Caching (#11688)

This commit is contained in:
Simo Lin
2025-10-18 18:33:53 -07:00
committed by GitHub
parent fda0cb2a30
commit a7ae61ed77
22 changed files with 2385 additions and 24 deletions

View File

@@ -198,6 +198,10 @@ struct Router {
model_path: Option<String>,
tokenizer_path: Option<String>,
chat_template: Option<String>,
tokenizer_cache_enable_l0: bool,
tokenizer_cache_l0_max_entries: usize,
tokenizer_cache_enable_l1: bool,
tokenizer_cache_l1_max_memory: usize,
reasoning_parser: Option<String>,
tool_call_parser: Option<String>,
backend: BackendType,
@@ -350,6 +354,12 @@ impl Router {
oracle,
reasoning_parser: self.reasoning_parser.clone(),
tool_call_parser: self.tool_call_parser.clone(),
tokenizer_cache: config::TokenizerCacheConfig {
enable_l0: self.tokenizer_cache_enable_l0,
l0_max_entries: self.tokenizer_cache_l0_max_entries,
enable_l1: self.tokenizer_cache_enable_l1,
l1_max_memory: self.tokenizer_cache_l1_max_memory,
},
})
}
}
@@ -415,6 +425,10 @@ impl Router {
model_path = None,
tokenizer_path = None,
chat_template = None,
tokenizer_cache_enable_l0 = false,
tokenizer_cache_l0_max_entries = 10000,
tokenizer_cache_enable_l1 = false,
tokenizer_cache_l1_max_memory = 52428800,
reasoning_parser = None,
tool_call_parser = None,
backend = BackendType::Sglang,
@@ -480,6 +494,10 @@ impl Router {
model_path: Option<String>,
tokenizer_path: Option<String>,
chat_template: Option<String>,
tokenizer_cache_enable_l0: bool,
tokenizer_cache_l0_max_entries: usize,
tokenizer_cache_enable_l1: bool,
tokenizer_cache_l1_max_memory: usize,
reasoning_parser: Option<String>,
tool_call_parser: Option<String>,
backend: BackendType,
@@ -559,6 +577,10 @@ impl Router {
model_path,
tokenizer_path,
chat_template,
tokenizer_cache_enable_l0,
tokenizer_cache_l0_max_entries,
tokenizer_cache_enable_l1,
tokenizer_cache_l1_max_memory,
reasoning_parser,
tool_call_parser,
backend,