Update v1/responses to be more OpenAI-compatible. (#9624)

This commit is contained in:
Vincent Zhong
2025-10-05 14:47:46 -04:00
committed by GitHub
parent e0b2d3eebe
commit 36a6b8dbfc
4 changed files with 491 additions and 2 deletions

View File

@@ -299,7 +299,23 @@ app.add_middleware(
@app.exception_handler(HTTPException)
async def validation_exception_handler(request: Request, exc: HTTPException):
"""Enrich HTTP exception with status code and other details"""
"""Enrich HTTP exception with status code and other details.
For /v1/responses, emit OpenAI-style nested error envelope:
{"error": {"message": "...", "type": "...", "param": null, "code": <status>}}
"""
# adjust fmt for responses api
if request.url.path.startswith("/v1/responses"):
nested_error = {
"message": exc.detail,
"type": HTTPStatus(exc.status_code).phrase,
"param": None,
"code": exc.status_code,
}
return ORJSONResponse(
content={"error": nested_error}, status_code=exc.status_code
)
error = ErrorResponse(
object="error",
message=exc.detail,
@@ -312,7 +328,10 @@ async def validation_exception_handler(request: Request, exc: HTTPException):
# Custom exception handlers to change validation error status codes
@app.exception_handler(RequestValidationError)
async def validation_exception_handler(request: Request, exc: RequestValidationError):
"""Override FastAPI's default 422 validation error with 400"""
"""Override FastAPI's default 422 validation error with 400.
For /v1/responses, emit OpenAI-style nested error envelope; for other endpoints keep legacy format.
"""
exc_str = str(exc)
errors_str = str(exc.errors())
@@ -321,6 +340,16 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE
else:
message = exc_str
if request.url.path.startswith("/v1/responses"):
# adapt specially, for v1/responses API only (notice the error key is different)
nested_error = {
"message": message,
"type": HTTPStatus.BAD_REQUEST.phrase,
"param": None,
"code": HTTPStatus.BAD_REQUEST.value,
}
return ORJSONResponse(status_code=400, content={"error": nested_error})
err = ErrorResponse(
message=message,
type=HTTPStatus.BAD_REQUEST.phrase,

View File

@@ -22,6 +22,8 @@ from openai.types.responses import (
ResponseFunctionToolCall,
ResponseInputItemParam,
ResponseOutputItem,
ResponseOutputMessage,
ResponseOutputText,
ResponseReasoningItem,
)
from openai.types.responses.response import ToolChoice
@@ -881,6 +883,26 @@ class ResponsesResponse(BaseModel):
tool_choice: str = "auto"
tools: List[ResponseTool] = Field(default_factory=list)
# OpenAI compatibility fields. not all are used at the moment.
# Recommend checking https://platform.openai.com/docs/api-reference/responses
error: Optional[dict] = None
incomplete_details: Optional[dict] = None # TODO(v) support this input
instructions: Optional[str] = None
max_output_tokens: Optional[int] = None
previous_response_id: Optional[str] = None
reasoning: Optional[dict] = (
# Unused. No model supports this. For GPT-oss, system prompt sets
# the field, not server args.
None # {"effort": Optional[str], "summary": Optional[str]}
)
store: Optional[bool] = None
temperature: Optional[float] = None
text: Optional[dict] = None # e.g. {"format": {"type": "text"}}
top_p: Optional[float] = None
truncation: Optional[str] = None
user: Optional[str] = None
metadata: Optional[Dict[str, Any]] = None
@classmethod
def from_request(
cls,
@@ -895,6 +917,41 @@ class ResponsesResponse(BaseModel):
usage: Optional[UsageInfo],
) -> "ResponsesResponse":
"""Create a response from a request."""
# Determine if the output is plain text only to set text.format
def _is_text_only(
items: List[
Union[
ResponseOutputItem, ResponseReasoningItem, ResponseFunctionToolCall
]
]
) -> bool:
if not items:
return False
for it in items:
# tool call -> not pure text.
if isinstance(it, ResponseReasoningItem) or isinstance(
it, ResponseFunctionToolCall
):
return False
try:
if isinstance(it, ResponseOutputText):
continue
elif isinstance(it, ResponseOutputMessage):
if not it.content:
continue
for c in it.content:
if not isinstance(c, ResponseOutputText):
return False
else:
# Unknown type, not considered text-only
return False
except AttributeError:
return False
return True
text_format = {"format": {"type": "text"}} if _is_text_only(output) else None
return cls(
id=request.request_id,
created_at=created_time,
@@ -905,6 +962,23 @@ class ResponsesResponse(BaseModel):
parallel_tool_calls=request.parallel_tool_calls or True,
tool_choice=request.tool_choice,
tools=request.tools,
# fields for parity with v1/responses
error=None,
incomplete_details=None,
instructions=request.instructions,
max_output_tokens=request.max_output_tokens,
previous_response_id=request.previous_response_id, # TODO(v): ensure this is propagated if retrieved from store
reasoning={
"effort": request.reasoning.effort if request.reasoning else None,
"summary": None, # unused
},
store=request.store,
temperature=request.temperature,
text=text_format, # TODO(v): Expand coverage per https://platform.openai.com/docs/api-reference/responses/list
top_p=request.top_p,
truncation=request.truncation,
user=request.user,
metadata=request.metadata or {},
)

View File

@@ -123,6 +123,39 @@ class OpenAIServingResponses(OpenAIServingChat):
self.background_tasks: dict[str, asyncio.Task] = {}
# error helpers dedicated for v1/responses
def create_error_response(
self,
message: str,
err_type: str = "invalid_request_error",
status_code: int = 400,
param: Optional[str] = None,
) -> ORJSONResponse:
nested_error = {
"message": message,
"type": err_type,
"param": param,
"code": status_code,
}
return ORJSONResponse(content={"error": nested_error}, status_code=status_code)
def create_streaming_error_response(
self,
message: str,
err_type: str = "BadRequestError",
status_code: int = 400,
) -> str:
return json.dumps(
{
"error": {
"message": message,
"type": err_type,
"param": None,
"code": status_code,
}
}
)
def _request_id_prefix(self) -> str:
return "resp_"
@@ -834,6 +867,13 @@ class OpenAIServingResponses(OpenAIServingChat):
async for ctx in result_generator:
# Only process context objects that implement the `is_expecting_start()` method,
# which indicates they support per-turn streaming (e.g., StreamingHarmonyContext).
# Contexts without this method are skipped, as they do not represent a new turn
# or are not compatible with per-turn handling in the /v1/responses endpoint.
if not hasattr(ctx, "is_expecting_start"):
continue
if ctx.is_expecting_start():
current_output_index += 1
sent_output_item_added = False