[router] disable rate limiter by default (#11435)

This commit is contained in:
Simo Lin
2025-10-10 20:43:07 -04:00
committed by GitHub
parent b36afed4a7
commit 2eeb27515a
7 changed files with 69 additions and 40 deletions

View File

@@ -424,22 +424,23 @@ pub struct ConcurrencyLimiter {
impl ConcurrencyLimiter {
/// Create new concurrency limiter with optional queue
pub fn new(
token_bucket: Arc<TokenBucket>,
token_bucket: Option<Arc<TokenBucket>>,
queue_size: usize,
queue_timeout: Duration,
) -> (Self, Option<QueueProcessor>) {
if queue_size > 0 {
let (queue_tx, queue_rx) = mpsc::channel(queue_size);
let processor = QueueProcessor::new(token_bucket, queue_rx, queue_timeout);
(
Self {
queue_tx: Some(queue_tx),
},
Some(processor),
)
} else {
(Self { queue_tx: None }, None)
match (token_bucket, queue_size) {
(None, _) => (Self { queue_tx: None }, None),
(Some(bucket), size) if size > 0 => {
let (queue_tx, queue_rx) = mpsc::channel(size);
let processor = QueueProcessor::new(bucket, queue_rx, queue_timeout);
(
Self {
queue_tx: Some(queue_tx),
},
Some(processor),
)
}
(Some(_), _) => (Self { queue_tx: None }, None),
}
}
}
@@ -450,12 +451,19 @@ pub async fn concurrency_limit_middleware(
request: Request<Body>,
next: Next,
) -> Response {
let token_bucket = match &app_state.context.rate_limiter {
Some(bucket) => bucket.clone(),
None => {
// Rate limiting disabled, pass through immediately
return next.run(request).await;
}
};
// Static counter for embeddings queue size
static EMBEDDINGS_QUEUE_SIZE: AtomicU64 = AtomicU64::new(0);
// Identify if this is an embeddings request based on path
let is_embeddings = request.uri().path().contains("/v1/embeddings");
let token_bucket = app_state.context.rate_limiter.clone();
// Try to acquire token immediately
if token_bucket.try_acquire(1.0).await.is_ok() {