[router] Add Configurable L0 and L1 Tokenizer Caching (#11688)

This commit is contained in:
Simo Lin
2025-10-18 18:33:53 -07:00
committed by GitHub
parent fda0cb2a30
commit a7ae61ed77
22 changed files with 2385 additions and 24 deletions

View File

@@ -5,7 +5,7 @@ use sglang_router_rs::{
config::{
CircuitBreakerConfig, ConfigError, ConfigResult, ConnectionMode, DiscoveryConfig,
HealthCheckConfig, HistoryBackend, MetricsConfig, OracleConfig, PolicyConfig, RetryConfig,
RouterConfig, RoutingMode,
RouterConfig, RoutingMode, TokenizerCacheConfig,
},
metrics::PrometheusConfig,
server::{self, ServerConfig},
@@ -270,6 +270,18 @@ struct CliArgs {
#[arg(long)]
chat_template: Option<String>,
#[arg(long, default_value_t = false)]
tokenizer_cache_enable_l0: bool,
#[arg(long, default_value_t = 10000)]
tokenizer_cache_l0_max_entries: usize,
#[arg(long, default_value_t = false)]
tokenizer_cache_enable_l1: bool,
#[arg(long, default_value_t = 52428800)]
tokenizer_cache_l1_max_memory: usize,
#[arg(long, default_value = "memory", value_parser = ["memory", "none", "oracle"])]
history_backend: String,
@@ -581,6 +593,12 @@ impl CliArgs {
oracle,
reasoning_parser: self.reasoning_parser.clone(),
tool_call_parser: self.tool_call_parser.clone(),
tokenizer_cache: TokenizerCacheConfig {
enable_l0: self.tokenizer_cache_enable_l0,
l0_max_entries: self.tokenizer_cache_l0_max_entries,
enable_l1: self.tokenizer_cache_enable_l1,
l1_max_memory: self.tokenizer_cache_l1_max_memory,
},
})
}