Support max_completion_tokens for OpenAIChatCompletions (#5857)
This commit is contained in:
@@ -1099,7 +1099,7 @@ def v1_chat_generate_request(
|
|||||||
|
|
||||||
sampling_params = {
|
sampling_params = {
|
||||||
"temperature": request.temperature,
|
"temperature": request.temperature,
|
||||||
"max_new_tokens": request.max_tokens,
|
"max_new_tokens": request.max_tokens or request.max_completion_tokens,
|
||||||
"min_new_tokens": request.min_tokens,
|
"min_new_tokens": request.min_tokens,
|
||||||
"stop": stop,
|
"stop": stop,
|
||||||
"stop_token_ids": request.stop_token_ids,
|
"stop_token_ids": request.stop_token_ids,
|
||||||
|
|||||||
@@ -320,7 +320,16 @@ class ChatCompletionRequest(BaseModel):
|
|||||||
logit_bias: Optional[Dict[str, float]] = None
|
logit_bias: Optional[Dict[str, float]] = None
|
||||||
logprobs: bool = False
|
logprobs: bool = False
|
||||||
top_logprobs: Optional[int] = None
|
top_logprobs: Optional[int] = None
|
||||||
max_tokens: Optional[int] = None
|
max_tokens: Optional[int] = Field(
|
||||||
|
default=None,
|
||||||
|
deprecated="max_tokens is deprecated in favor of the max_completion_tokens field",
|
||||||
|
description="The maximum number of tokens that can be generated in the chat completion. ",
|
||||||
|
)
|
||||||
|
max_completion_tokens: Optional[int] = Field(
|
||||||
|
default=None,
|
||||||
|
description="The maximum number of completion tokens for a chat completion request, "
|
||||||
|
"including visible output tokens and reasoning tokens. Input tokens are not included. ",
|
||||||
|
)
|
||||||
n: int = 1
|
n: int = 1
|
||||||
presence_penalty: float = 0.0
|
presence_penalty: float = 0.0
|
||||||
response_format: Optional[Union[ResponseFormat, StructuralTagResponseFormat]] = None
|
response_format: Optional[Union[ResponseFormat, StructuralTagResponseFormat]] = None
|
||||||
|
|||||||
Reference in New Issue
Block a user