Update v1/responses to be more OpenAI-compatible. (#9624)
This commit is contained in:
@@ -299,7 +299,23 @@ app.add_middleware(
|
||||
|
||||
@app.exception_handler(HTTPException)
|
||||
async def validation_exception_handler(request: Request, exc: HTTPException):
|
||||
"""Enrich HTTP exception with status code and other details"""
|
||||
"""Enrich HTTP exception with status code and other details.
|
||||
|
||||
For /v1/responses, emit OpenAI-style nested error envelope:
|
||||
{"error": {"message": "...", "type": "...", "param": null, "code": <status>}}
|
||||
"""
|
||||
# adjust fmt for responses api
|
||||
if request.url.path.startswith("/v1/responses"):
|
||||
nested_error = {
|
||||
"message": exc.detail,
|
||||
"type": HTTPStatus(exc.status_code).phrase,
|
||||
"param": None,
|
||||
"code": exc.status_code,
|
||||
}
|
||||
return ORJSONResponse(
|
||||
content={"error": nested_error}, status_code=exc.status_code
|
||||
)
|
||||
|
||||
error = ErrorResponse(
|
||||
object="error",
|
||||
message=exc.detail,
|
||||
@@ -312,7 +328,10 @@ async def validation_exception_handler(request: Request, exc: HTTPException):
|
||||
# Custom exception handlers to change validation error status codes
|
||||
@app.exception_handler(RequestValidationError)
|
||||
async def validation_exception_handler(request: Request, exc: RequestValidationError):
|
||||
"""Override FastAPI's default 422 validation error with 400"""
|
||||
"""Override FastAPI's default 422 validation error with 400.
|
||||
|
||||
For /v1/responses, emit OpenAI-style nested error envelope; for other endpoints keep legacy format.
|
||||
"""
|
||||
exc_str = str(exc)
|
||||
errors_str = str(exc.errors())
|
||||
|
||||
@@ -321,6 +340,16 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE
|
||||
else:
|
||||
message = exc_str
|
||||
|
||||
if request.url.path.startswith("/v1/responses"):
|
||||
# adapt specially, for v1/responses API only (notice the error key is different)
|
||||
nested_error = {
|
||||
"message": message,
|
||||
"type": HTTPStatus.BAD_REQUEST.phrase,
|
||||
"param": None,
|
||||
"code": HTTPStatus.BAD_REQUEST.value,
|
||||
}
|
||||
return ORJSONResponse(status_code=400, content={"error": nested_error})
|
||||
|
||||
err = ErrorResponse(
|
||||
message=message,
|
||||
type=HTTPStatus.BAD_REQUEST.phrase,
|
||||
|
||||
@@ -22,6 +22,8 @@ from openai.types.responses import (
|
||||
ResponseFunctionToolCall,
|
||||
ResponseInputItemParam,
|
||||
ResponseOutputItem,
|
||||
ResponseOutputMessage,
|
||||
ResponseOutputText,
|
||||
ResponseReasoningItem,
|
||||
)
|
||||
from openai.types.responses.response import ToolChoice
|
||||
@@ -881,6 +883,26 @@ class ResponsesResponse(BaseModel):
|
||||
tool_choice: str = "auto"
|
||||
tools: List[ResponseTool] = Field(default_factory=list)
|
||||
|
||||
# OpenAI compatibility fields. not all are used at the moment.
|
||||
# Recommend checking https://platform.openai.com/docs/api-reference/responses
|
||||
error: Optional[dict] = None
|
||||
incomplete_details: Optional[dict] = None # TODO(v) support this input
|
||||
instructions: Optional[str] = None
|
||||
max_output_tokens: Optional[int] = None
|
||||
previous_response_id: Optional[str] = None
|
||||
reasoning: Optional[dict] = (
|
||||
# Unused. No model supports this. For GPT-oss, system prompt sets
|
||||
# the field, not server args.
|
||||
None # {"effort": Optional[str], "summary": Optional[str]}
|
||||
)
|
||||
store: Optional[bool] = None
|
||||
temperature: Optional[float] = None
|
||||
text: Optional[dict] = None # e.g. {"format": {"type": "text"}}
|
||||
top_p: Optional[float] = None
|
||||
truncation: Optional[str] = None
|
||||
user: Optional[str] = None
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
|
||||
@classmethod
|
||||
def from_request(
|
||||
cls,
|
||||
@@ -895,6 +917,41 @@ class ResponsesResponse(BaseModel):
|
||||
usage: Optional[UsageInfo],
|
||||
) -> "ResponsesResponse":
|
||||
"""Create a response from a request."""
|
||||
|
||||
# Determine if the output is plain text only to set text.format
|
||||
def _is_text_only(
|
||||
items: List[
|
||||
Union[
|
||||
ResponseOutputItem, ResponseReasoningItem, ResponseFunctionToolCall
|
||||
]
|
||||
]
|
||||
) -> bool:
|
||||
if not items:
|
||||
return False
|
||||
for it in items:
|
||||
# tool call -> not pure text.
|
||||
if isinstance(it, ResponseReasoningItem) or isinstance(
|
||||
it, ResponseFunctionToolCall
|
||||
):
|
||||
return False
|
||||
try:
|
||||
if isinstance(it, ResponseOutputText):
|
||||
continue
|
||||
elif isinstance(it, ResponseOutputMessage):
|
||||
if not it.content:
|
||||
continue
|
||||
for c in it.content:
|
||||
if not isinstance(c, ResponseOutputText):
|
||||
return False
|
||||
else:
|
||||
# Unknown type, not considered text-only
|
||||
return False
|
||||
except AttributeError:
|
||||
return False
|
||||
return True
|
||||
|
||||
text_format = {"format": {"type": "text"}} if _is_text_only(output) else None
|
||||
|
||||
return cls(
|
||||
id=request.request_id,
|
||||
created_at=created_time,
|
||||
@@ -905,6 +962,23 @@ class ResponsesResponse(BaseModel):
|
||||
parallel_tool_calls=request.parallel_tool_calls or True,
|
||||
tool_choice=request.tool_choice,
|
||||
tools=request.tools,
|
||||
# fields for parity with v1/responses
|
||||
error=None,
|
||||
incomplete_details=None,
|
||||
instructions=request.instructions,
|
||||
max_output_tokens=request.max_output_tokens,
|
||||
previous_response_id=request.previous_response_id, # TODO(v): ensure this is propagated if retrieved from store
|
||||
reasoning={
|
||||
"effort": request.reasoning.effort if request.reasoning else None,
|
||||
"summary": None, # unused
|
||||
},
|
||||
store=request.store,
|
||||
temperature=request.temperature,
|
||||
text=text_format, # TODO(v): Expand coverage per https://platform.openai.com/docs/api-reference/responses/list
|
||||
top_p=request.top_p,
|
||||
truncation=request.truncation,
|
||||
user=request.user,
|
||||
metadata=request.metadata or {},
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -123,6 +123,39 @@ class OpenAIServingResponses(OpenAIServingChat):
|
||||
|
||||
self.background_tasks: dict[str, asyncio.Task] = {}
|
||||
|
||||
# error helpers dedicated for v1/responses
|
||||
def create_error_response(
|
||||
self,
|
||||
message: str,
|
||||
err_type: str = "invalid_request_error",
|
||||
status_code: int = 400,
|
||||
param: Optional[str] = None,
|
||||
) -> ORJSONResponse:
|
||||
nested_error = {
|
||||
"message": message,
|
||||
"type": err_type,
|
||||
"param": param,
|
||||
"code": status_code,
|
||||
}
|
||||
return ORJSONResponse(content={"error": nested_error}, status_code=status_code)
|
||||
|
||||
def create_streaming_error_response(
|
||||
self,
|
||||
message: str,
|
||||
err_type: str = "BadRequestError",
|
||||
status_code: int = 400,
|
||||
) -> str:
|
||||
return json.dumps(
|
||||
{
|
||||
"error": {
|
||||
"message": message,
|
||||
"type": err_type,
|
||||
"param": None,
|
||||
"code": status_code,
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
def _request_id_prefix(self) -> str:
|
||||
return "resp_"
|
||||
|
||||
@@ -834,6 +867,13 @@ class OpenAIServingResponses(OpenAIServingChat):
|
||||
|
||||
async for ctx in result_generator:
|
||||
|
||||
# Only process context objects that implement the `is_expecting_start()` method,
|
||||
# which indicates they support per-turn streaming (e.g., StreamingHarmonyContext).
|
||||
# Contexts without this method are skipped, as they do not represent a new turn
|
||||
# or are not compatible with per-turn handling in the /v1/responses endpoint.
|
||||
if not hasattr(ctx, "is_expecting_start"):
|
||||
continue
|
||||
|
||||
if ctx.is_expecting_start():
|
||||
current_output_index += 1
|
||||
sent_output_item_added = False
|
||||
|
||||
Reference in New Issue
Block a user