Expose no_stop_trim and skip_special_tokens in openai api (#2039)
This commit is contained in:
@@ -516,8 +516,9 @@ def v1_generate_request(
|
|||||||
"regex": request.regex,
|
"regex": request.regex,
|
||||||
"json_schema": request.json_schema,
|
"json_schema": request.json_schema,
|
||||||
"n": request.n,
|
"n": request.n,
|
||||||
"ignore_eos": request.ignore_eos,
|
|
||||||
"no_stop_trim": request.no_stop_trim,
|
"no_stop_trim": request.no_stop_trim,
|
||||||
|
"ignore_eos": request.ignore_eos,
|
||||||
|
"skip_special_tokens": request.skip_special_tokens,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
return_logprobs.append(request.logprobs is not None and request.logprobs > 0)
|
return_logprobs.append(request.logprobs is not None and request.logprobs > 0)
|
||||||
@@ -928,7 +929,9 @@ def v1_chat_generate_request(
|
|||||||
"repetition_penalty": request.repetition_penalty,
|
"repetition_penalty": request.repetition_penalty,
|
||||||
"regex": request.regex,
|
"regex": request.regex,
|
||||||
"n": request.n,
|
"n": request.n,
|
||||||
|
"no_stop_trim": request.no_stop_trim,
|
||||||
"ignore_eos": request.ignore_eos,
|
"ignore_eos": request.ignore_eos,
|
||||||
|
"skip_special_tokens": request.skip_special_tokens,
|
||||||
}
|
}
|
||||||
if request.response_format and request.response_format.type == "json_schema":
|
if request.response_format and request.response_format.type == "json_schema":
|
||||||
sampling_params["json_schema"] = convert_json_schema_to_str(
|
sampling_params["json_schema"] = convert_json_schema_to_str(
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ class ModelList(BaseModel):
|
|||||||
"""Model list consists of model cards."""
|
"""Model list consists of model cards."""
|
||||||
|
|
||||||
object: str = "list"
|
object: str = "list"
|
||||||
data: List[ModelCard] = []
|
data: List[ModelCard] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
class ErrorResponse(BaseModel):
|
class ErrorResponse(BaseModel):
|
||||||
@@ -143,7 +143,7 @@ class BatchResponse(BaseModel):
|
|||||||
expired_at: Optional[int] = None
|
expired_at: Optional[int] = None
|
||||||
cancelling_at: Optional[int] = None
|
cancelling_at: Optional[int] = None
|
||||||
cancelled_at: Optional[int] = None
|
cancelled_at: Optional[int] = None
|
||||||
request_counts: dict = {"total": 0, "completed": 0, "failed": 0}
|
request_counts: Optional[dict] = None
|
||||||
metadata: Optional[dict] = None
|
metadata: Optional[dict] = None
|
||||||
|
|
||||||
|
|
||||||
@@ -153,30 +153,31 @@ class CompletionRequest(BaseModel):
|
|||||||
model: str
|
model: str
|
||||||
prompt: Union[List[int], List[List[int]], str, List[str]]
|
prompt: Union[List[int], List[List[int]], str, List[str]]
|
||||||
best_of: Optional[int] = None
|
best_of: Optional[int] = None
|
||||||
echo: Optional[bool] = False
|
echo: bool = False
|
||||||
frequency_penalty: Optional[float] = 0.0
|
frequency_penalty: float = 0.0
|
||||||
logit_bias: Optional[Dict[str, float]] = None
|
logit_bias: Optional[Dict[str, float]] = None
|
||||||
logprobs: Optional[int] = None
|
logprobs: Optional[int] = None
|
||||||
max_tokens: Optional[int] = 16
|
max_tokens: int = 16
|
||||||
n: int = 1
|
n: int = 1
|
||||||
presence_penalty: Optional[float] = 0.0
|
presence_penalty: float = 0.0
|
||||||
seed: Optional[int] = None
|
seed: Optional[int] = None
|
||||||
stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
|
stop: Optional[Union[str, List[str]]] = None
|
||||||
stream: Optional[bool] = False
|
stream: bool = False
|
||||||
stream_options: Optional[StreamOptions] = None
|
stream_options: Optional[StreamOptions] = None
|
||||||
suffix: Optional[str] = None
|
suffix: Optional[str] = None
|
||||||
temperature: Optional[float] = 1.0
|
temperature: float = 1.0
|
||||||
top_p: Optional[float] = 1.0
|
top_p: float = 1.0
|
||||||
user: Optional[str] = None
|
user: Optional[str] = None
|
||||||
|
|
||||||
# Extra parameters for SRT backend only and will be ignored by OpenAI models.
|
# Extra parameters for SRT backend only and will be ignored by OpenAI models.
|
||||||
regex: Optional[str] = None
|
|
||||||
json_schema: Optional[str] = None
|
json_schema: Optional[str] = None
|
||||||
ignore_eos: bool = False
|
regex: Optional[str] = None
|
||||||
min_tokens: int = 0
|
min_tokens: int = 0
|
||||||
repetition_penalty: Optional[float] = 1.0
|
repetition_penalty: float = 1.0
|
||||||
stop_token_ids: Optional[List[int]] = Field(default_factory=list)
|
stop_token_ids: Optional[List[int]] = None
|
||||||
no_stop_trim: Union[bool, List[bool]] = False
|
no_stop_trim: bool = False
|
||||||
|
ignore_eos: bool = False
|
||||||
|
skip_special_tokens: bool = True
|
||||||
|
|
||||||
|
|
||||||
class CompletionResponseChoice(BaseModel):
|
class CompletionResponseChoice(BaseModel):
|
||||||
@@ -259,28 +260,30 @@ class ChatCompletionRequest(BaseModel):
|
|||||||
# https://platform.openai.com/docs/api-reference/chat/create
|
# https://platform.openai.com/docs/api-reference/chat/create
|
||||||
messages: List[ChatCompletionMessageParam]
|
messages: List[ChatCompletionMessageParam]
|
||||||
model: str
|
model: str
|
||||||
frequency_penalty: Optional[float] = 0.0
|
frequency_penalty: float = 0.0
|
||||||
logit_bias: Optional[Dict[str, float]] = None
|
logit_bias: Optional[Dict[str, float]] = None
|
||||||
logprobs: Optional[bool] = False
|
logprobs: bool = False
|
||||||
top_logprobs: Optional[int] = None
|
top_logprobs: Optional[int] = None
|
||||||
max_tokens: Optional[int] = None
|
max_tokens: Optional[int] = None
|
||||||
n: Optional[int] = 1
|
n: int = 1
|
||||||
presence_penalty: Optional[float] = 0.0
|
presence_penalty: float = 0.0
|
||||||
response_format: Optional[ResponseFormat] = None
|
response_format: Optional[ResponseFormat] = None
|
||||||
seed: Optional[int] = None
|
seed: Optional[int] = None
|
||||||
stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
|
stop: Optional[Union[str, List[str]]] = None
|
||||||
stream: Optional[bool] = False
|
stream: bool = False
|
||||||
stream_options: Optional[StreamOptions] = None
|
stream_options: Optional[StreamOptions] = None
|
||||||
temperature: Optional[float] = 0.7
|
temperature: float = 0.7
|
||||||
top_p: Optional[float] = 1.0
|
top_p: float = 1.0
|
||||||
user: Optional[str] = None
|
user: Optional[str] = None
|
||||||
|
|
||||||
# Extra parameters for SRT backend only and will be ignored by OpenAI models.
|
# Extra parameters for SRT backend only and will be ignored by OpenAI models.
|
||||||
regex: Optional[str] = None
|
regex: Optional[str] = None
|
||||||
min_tokens: Optional[int] = 0
|
min_tokens: int = 0
|
||||||
repetition_penalty: Optional[float] = 1.0
|
repetition_penalty: float = 1.0
|
||||||
stop_token_ids: Optional[List[int]] = Field(default_factory=list)
|
stop_token_ids: Optional[List[int]] = None
|
||||||
|
no_stop_trim: bool = False
|
||||||
ignore_eos: bool = False
|
ignore_eos: bool = False
|
||||||
|
skip_special_tokens: bool = True
|
||||||
|
|
||||||
|
|
||||||
class ChatMessage(BaseModel):
|
class ChatMessage(BaseModel):
|
||||||
|
|||||||
@@ -34,13 +34,13 @@ class SamplingParams:
|
|||||||
frequency_penalty: float = 0.0,
|
frequency_penalty: float = 0.0,
|
||||||
presence_penalty: float = 0.0,
|
presence_penalty: float = 0.0,
|
||||||
repetition_penalty: float = 1.0,
|
repetition_penalty: float = 1.0,
|
||||||
ignore_eos: bool = False,
|
|
||||||
skip_special_tokens: bool = True,
|
|
||||||
spaces_between_special_tokens: bool = True,
|
spaces_between_special_tokens: bool = True,
|
||||||
regex: Optional[str] = None,
|
regex: Optional[str] = None,
|
||||||
n: int = 1,
|
n: int = 1,
|
||||||
json_schema: Optional[str] = None,
|
json_schema: Optional[str] = None,
|
||||||
no_stop_trim: bool = False,
|
no_stop_trim: bool = False,
|
||||||
|
ignore_eos: bool = False,
|
||||||
|
skip_special_tokens: bool = True,
|
||||||
) -> None:
|
) -> None:
|
||||||
self.temperature = temperature
|
self.temperature = temperature
|
||||||
self.top_p = top_p
|
self.top_p = top_p
|
||||||
|
|||||||
Reference in New Issue
Block a user