[router] Add Rust CLI flags for queue size, timeout, and rate limit for token bucket rate limiter (#11483)
Co-authored-by: Simo Lin <linsimo.mark@gmail.com>
This commit is contained in:
@@ -195,6 +195,15 @@ struct CliArgs {
|
||||
#[arg(long, default_value_t = -1)]
|
||||
max_concurrent_requests: i32,
|
||||
|
||||
#[arg(long, default_value_t = 100)]
|
||||
queue_size: usize,
|
||||
|
||||
#[arg(long, default_value_t = 60)]
|
||||
queue_timeout_secs: u64,
|
||||
|
||||
#[arg(long)]
|
||||
rate_limit_tokens_per_second: Option<i32>,
|
||||
|
||||
#[arg(long, num_args = 0..)]
|
||||
cors_allowed_origins: Vec<String>,
|
||||
|
||||
@@ -535,8 +544,8 @@ impl CliArgs {
|
||||
Some(self.request_id_headers.clone())
|
||||
},
|
||||
max_concurrent_requests: self.max_concurrent_requests,
|
||||
queue_size: 100,
|
||||
queue_timeout_secs: 60,
|
||||
queue_size: self.queue_size,
|
||||
queue_timeout_secs: self.queue_timeout_secs,
|
||||
cors_allowed_origins: self.cors_allowed_origins.clone(),
|
||||
retry: RetryConfig {
|
||||
max_retries: self.retry_max_retries,
|
||||
@@ -561,7 +570,7 @@ impl CliArgs {
|
||||
endpoint: self.health_check_endpoint.clone(),
|
||||
},
|
||||
enable_igw: self.enable_igw,
|
||||
rate_limit_tokens_per_second: None,
|
||||
rate_limit_tokens_per_second: self.rate_limit_tokens_per_second,
|
||||
model_path: self.model_path.clone(),
|
||||
tokenizer_path: self.tokenizer_path.clone(),
|
||||
chat_template: self.chat_template.clone(),
|
||||
|
||||
Reference in New Issue
Block a user