feat: throttle requests at scheduler based on --max_queued_requests (#7565)
This commit is contained in:
@@ -38,7 +38,7 @@ import orjson
|
||||
import requests
|
||||
import uvicorn
|
||||
import uvloop
|
||||
from fastapi import Depends, FastAPI, Request, UploadFile
|
||||
from fastapi import Depends, FastAPI, HTTPException, Request, UploadFile
|
||||
from fastapi.exceptions import RequestValidationError
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import ORJSONResponse, Response, StreamingResponse
|
||||
@@ -174,6 +174,18 @@ app.add_middleware(
|
||||
)
|
||||
|
||||
|
||||
@app.exception_handler(HTTPException)
|
||||
async def validation_exception_handler(request: Request, exc: HTTPException):
|
||||
"""Enrich HTTP exception with status code and other details"""
|
||||
error = ErrorResponse(
|
||||
object="error",
|
||||
message=exc.detail,
|
||||
type=str(exc.status_code),
|
||||
code=exc.status_code,
|
||||
)
|
||||
return ORJSONResponse(content=error.model_dump(), status_code=exc.status_code)
|
||||
|
||||
|
||||
# Custom exception handlers to change validation error status codes
|
||||
@app.exception_handler(RequestValidationError)
|
||||
async def validation_exception_handler(request: Request, exc: RequestValidationError):
|
||||
|
||||
@@ -4,7 +4,7 @@ import uuid
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
from fastapi import Request
|
||||
from fastapi import HTTPException, Request
|
||||
from fastapi.responses import ORJSONResponse, StreamingResponse
|
||||
|
||||
from sglang.srt.entrypoints.openai.protocol import ErrorResponse, OpenAIServingRequest
|
||||
@@ -45,7 +45,10 @@ class OpenAIServingBase(ABC):
|
||||
return await self._handle_non_streaming_request(
|
||||
adapted_request, processed_request, raw_request
|
||||
)
|
||||
|
||||
except HTTPException as e:
|
||||
return self.create_error_response(
|
||||
message=e.detail, err_type=str(e.status_code), status_code=e.status_code
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception(f"Error in request: {e}")
|
||||
return self.create_error_response(
|
||||
|
||||
Reference in New Issue
Block a user