[router] Add Configurable L0 and L1 Tokenizer Caching (#11688)
This commit is contained in:
@@ -5,7 +5,7 @@ use sglang_router_rs::{
|
||||
config::{
|
||||
CircuitBreakerConfig, ConfigError, ConfigResult, ConnectionMode, DiscoveryConfig,
|
||||
HealthCheckConfig, HistoryBackend, MetricsConfig, OracleConfig, PolicyConfig, RetryConfig,
|
||||
RouterConfig, RoutingMode,
|
||||
RouterConfig, RoutingMode, TokenizerCacheConfig,
|
||||
},
|
||||
metrics::PrometheusConfig,
|
||||
server::{self, ServerConfig},
|
||||
@@ -270,6 +270,18 @@ struct CliArgs {
|
||||
#[arg(long)]
|
||||
chat_template: Option<String>,
|
||||
|
||||
#[arg(long, default_value_t = false)]
|
||||
tokenizer_cache_enable_l0: bool,
|
||||
|
||||
#[arg(long, default_value_t = 10000)]
|
||||
tokenizer_cache_l0_max_entries: usize,
|
||||
|
||||
#[arg(long, default_value_t = false)]
|
||||
tokenizer_cache_enable_l1: bool,
|
||||
|
||||
#[arg(long, default_value_t = 52428800)]
|
||||
tokenizer_cache_l1_max_memory: usize,
|
||||
|
||||
#[arg(long, default_value = "memory", value_parser = ["memory", "none", "oracle"])]
|
||||
history_backend: String,
|
||||
|
||||
@@ -581,6 +593,12 @@ impl CliArgs {
|
||||
oracle,
|
||||
reasoning_parser: self.reasoning_parser.clone(),
|
||||
tool_call_parser: self.tool_call_parser.clone(),
|
||||
tokenizer_cache: TokenizerCacheConfig {
|
||||
enable_l0: self.tokenizer_cache_enable_l0,
|
||||
l0_max_entries: self.tokenizer_cache_l0_max_entries,
|
||||
enable_l1: self.tokenizer_cache_enable_l1,
|
||||
l1_max_memory: self.tokenizer_cache_l1_max_memory,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user