Upgrade to vllm 0.17.0 corex v4.1 overlay
This commit is contained in:
@@ -328,8 +328,9 @@ class ResponsesRequest(OpenAIBaseModel):
|
||||
# Also check text.format for OpenAI-style json_schema
|
||||
if self.text is not None and self.text.format is not None:
|
||||
if structured_outputs is not None:
|
||||
raise ValueError(
|
||||
"Cannot specify both structured_outputs and text.format"
|
||||
raise VLLMValidationError(
|
||||
"Cannot specify both structured_outputs and text.format",
|
||||
parameter="structured_outputs",
|
||||
)
|
||||
response_format = self.text.format
|
||||
if (
|
||||
@@ -378,14 +379,19 @@ class ResponsesRequest(OpenAIBaseModel):
|
||||
)
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def validate_background(cls, data):
|
||||
if not data.get("background"):
|
||||
return data
|
||||
if not data.get("store", True):
|
||||
raise ValueError("background can only be used when `store` is true")
|
||||
raise VLLMValidationError(
|
||||
"background can only be used when `store` is true",
|
||||
parameter="background",
|
||||
)
|
||||
return data
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def validate_prompt(cls, data):
|
||||
if data.get("prompt") is not None:
|
||||
raise VLLMValidationError(
|
||||
@@ -394,16 +400,19 @@ class ResponsesRequest(OpenAIBaseModel):
|
||||
return data
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def check_cache_salt_support(cls, data):
|
||||
if data.get("cache_salt") is not None and (
|
||||
not isinstance(data["cache_salt"], str) or not data["cache_salt"]
|
||||
):
|
||||
raise ValueError(
|
||||
"Parameter 'cache_salt' must be a non-empty string if provided."
|
||||
raise VLLMValidationError(
|
||||
"Parameter 'cache_salt' must be a non-empty string if provided.",
|
||||
parameter="cache_salt",
|
||||
)
|
||||
return data
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def function_call_parsing(cls, data):
|
||||
"""Parse function_call dictionaries into ResponseFunctionToolCall objects.
|
||||
This ensures Pydantic can properly resolve union types in the input field.
|
||||
|
||||
@@ -85,6 +85,8 @@ from vllm.entrypoints.openai.responses.protocol import (
|
||||
ResponseCreatedEvent,
|
||||
ResponseInProgressEvent,
|
||||
ResponseInputOutputMessage,
|
||||
ResponseReasoningPartAddedEvent,
|
||||
ResponseReasoningPartDoneEvent,
|
||||
ResponsesRequest,
|
||||
ResponsesResponse,
|
||||
ResponseUsage,
|
||||
@@ -1339,6 +1341,19 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
),
|
||||
)
|
||||
)
|
||||
yield _increment_sequence_number_and_return(
|
||||
ResponseReasoningPartAddedEvent(
|
||||
type="response.reasoning_part.added",
|
||||
sequence_number=-1,
|
||||
output_index=current_output_index,
|
||||
item_id=current_item_id,
|
||||
content_index=current_content_index,
|
||||
part=ResponseReasoningTextContent(
|
||||
text="",
|
||||
type="reasoning_text",
|
||||
),
|
||||
)
|
||||
)
|
||||
else:
|
||||
yield _increment_sequence_number_and_return(
|
||||
ResponseOutputItemAddedEvent(
|
||||
@@ -1354,22 +1369,21 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
),
|
||||
)
|
||||
)
|
||||
yield _increment_sequence_number_and_return(
|
||||
ResponseContentPartAddedEvent(
|
||||
type="response.content_part.added",
|
||||
sequence_number=-1,
|
||||
output_index=current_output_index,
|
||||
item_id=current_item_id,
|
||||
content_index=current_content_index,
|
||||
part=ResponseOutputText(
|
||||
type="output_text",
|
||||
text="",
|
||||
annotations=[],
|
||||
logprobs=[],
|
||||
),
|
||||
yield _increment_sequence_number_and_return(
|
||||
ResponseContentPartAddedEvent(
|
||||
type="response.content_part.added",
|
||||
sequence_number=-1,
|
||||
output_index=current_output_index,
|
||||
item_id=current_item_id,
|
||||
content_index=current_content_index,
|
||||
part=ResponseOutputText(
|
||||
type="output_text",
|
||||
text="",
|
||||
annotations=[],
|
||||
logprobs=[],
|
||||
),
|
||||
)
|
||||
)
|
||||
)
|
||||
current_content_index += 1
|
||||
first_delta_sent = True
|
||||
# todo(kebe7jun) tool call support
|
||||
|
||||
@@ -1397,6 +1411,19 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
text=reason_content,
|
||||
)
|
||||
)
|
||||
yield _increment_sequence_number_and_return(
|
||||
ResponseReasoningPartDoneEvent(
|
||||
type="response.reasoning_part.done",
|
||||
sequence_number=-1,
|
||||
item_id=current_item_id,
|
||||
output_index=current_output_index,
|
||||
content_index=current_content_index,
|
||||
part=ResponseReasoningTextContent(
|
||||
text=reason_content,
|
||||
type="reasoning_text",
|
||||
),
|
||||
)
|
||||
)
|
||||
current_content_index = 0
|
||||
reasoning_item = ResponseReasoningItem(
|
||||
type="reasoning",
|
||||
@@ -1418,6 +1445,8 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
item=reasoning_item,
|
||||
)
|
||||
)
|
||||
current_output_index += 1
|
||||
current_item_id = str(uuid.uuid4())
|
||||
yield _increment_sequence_number_and_return(
|
||||
ResponseOutputItemAddedEvent(
|
||||
type="response.output_item.added",
|
||||
@@ -1432,8 +1461,6 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
),
|
||||
)
|
||||
)
|
||||
current_output_index += 1
|
||||
current_item_id = str(uuid.uuid4())
|
||||
yield _increment_sequence_number_and_return(
|
||||
ResponseContentPartAddedEvent(
|
||||
type="response.content_part.added",
|
||||
@@ -1449,7 +1476,6 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
),
|
||||
)
|
||||
)
|
||||
current_content_index += 1
|
||||
# reset previous delta messages
|
||||
previous_delta_messages = []
|
||||
|
||||
@@ -1485,7 +1511,6 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
),
|
||||
)
|
||||
)
|
||||
current_content_index += 1
|
||||
|
||||
previous_delta_messages.append(delta_message)
|
||||
if previous_delta_messages:
|
||||
@@ -1505,7 +1530,19 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
text=reason_content,
|
||||
)
|
||||
)
|
||||
current_content_index += 1
|
||||
yield _increment_sequence_number_and_return(
|
||||
ResponseReasoningPartDoneEvent(
|
||||
type="response.reasoning_part.done",
|
||||
sequence_number=-1,
|
||||
item_id=current_item_id,
|
||||
output_index=current_output_index,
|
||||
content_index=current_content_index,
|
||||
part=ResponseReasoningTextContent(
|
||||
text=reason_content,
|
||||
type="reasoning_text",
|
||||
),
|
||||
)
|
||||
)
|
||||
reasoning_item = ResponseReasoningItem(
|
||||
type="reasoning",
|
||||
content=[
|
||||
@@ -1543,7 +1580,6 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
item_id=current_item_id,
|
||||
)
|
||||
)
|
||||
current_content_index += 1
|
||||
part = ResponseOutputText(
|
||||
text=final_content,
|
||||
type="output_text",
|
||||
@@ -1559,7 +1595,6 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
part=part,
|
||||
)
|
||||
)
|
||||
current_content_index += 1
|
||||
item = ResponseOutputMessage(
|
||||
type="message",
|
||||
role="assistant",
|
||||
|
||||
Reference in New Issue
Block a user