Fix openai protocols and pass top_k, min_p (#2499)
This commit is contained in:
@@ -510,6 +510,8 @@ def v1_generate_request(
|
|||||||
"stop": request.stop,
|
"stop": request.stop,
|
||||||
"stop_token_ids": request.stop_token_ids,
|
"stop_token_ids": request.stop_token_ids,
|
||||||
"top_p": request.top_p,
|
"top_p": request.top_p,
|
||||||
|
"top_k": request.top_k,
|
||||||
|
"min_p": request.min_p,
|
||||||
"presence_penalty": request.presence_penalty,
|
"presence_penalty": request.presence_penalty,
|
||||||
"frequency_penalty": request.frequency_penalty,
|
"frequency_penalty": request.frequency_penalty,
|
||||||
"repetition_penalty": request.repetition_penalty,
|
"repetition_penalty": request.repetition_penalty,
|
||||||
@@ -926,6 +928,8 @@ def v1_chat_generate_request(
|
|||||||
"stop": stop,
|
"stop": stop,
|
||||||
"stop_token_ids": request.stop_token_ids,
|
"stop_token_ids": request.stop_token_ids,
|
||||||
"top_p": request.top_p,
|
"top_p": request.top_p,
|
||||||
|
"top_k": request.top_k,
|
||||||
|
"min_p": request.min_p,
|
||||||
"presence_penalty": request.presence_penalty,
|
"presence_penalty": request.presence_penalty,
|
||||||
"frequency_penalty": request.frequency_penalty,
|
"frequency_penalty": request.frequency_penalty,
|
||||||
"repetition_penalty": request.repetition_penalty,
|
"repetition_penalty": request.repetition_penalty,
|
||||||
|
|||||||
@@ -166,17 +166,19 @@ class CompletionRequest(BaseModel):
|
|||||||
temperature: float = 1.0
|
temperature: float = 1.0
|
||||||
top_p: float = 1.0
|
top_p: float = 1.0
|
||||||
user: Optional[str] = None
|
user: Optional[str] = None
|
||||||
lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None
|
|
||||||
|
|
||||||
# Extra parameters for SRT backend only and will be ignored by OpenAI models.
|
# Extra parameters for SRT backend only and will be ignored by OpenAI models.
|
||||||
json_schema: Optional[str] = None
|
top_k: int = -1
|
||||||
regex: Optional[str] = None
|
min_p: float = 0.0
|
||||||
min_tokens: int = 0
|
min_tokens: int = 0
|
||||||
|
regex: Optional[str] = None
|
||||||
|
json_schema: Optional[str] = None
|
||||||
repetition_penalty: float = 1.0
|
repetition_penalty: float = 1.0
|
||||||
stop_token_ids: Optional[List[int]] = None
|
stop_token_ids: Optional[List[int]] = None
|
||||||
no_stop_trim: bool = False
|
no_stop_trim: bool = False
|
||||||
ignore_eos: bool = False
|
ignore_eos: bool = False
|
||||||
skip_special_tokens: bool = True
|
skip_special_tokens: bool = True
|
||||||
|
lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None
|
||||||
|
|
||||||
|
|
||||||
class CompletionResponseChoice(BaseModel):
|
class CompletionResponseChoice(BaseModel):
|
||||||
@@ -276,14 +278,16 @@ class ChatCompletionRequest(BaseModel):
|
|||||||
user: Optional[str] = None
|
user: Optional[str] = None
|
||||||
|
|
||||||
# Extra parameters for SRT backend only and will be ignored by OpenAI models.
|
# Extra parameters for SRT backend only and will be ignored by OpenAI models.
|
||||||
lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None
|
top_k: int = -1
|
||||||
regex: Optional[str] = None
|
min_p: float = 0.0
|
||||||
min_tokens: int = 0
|
min_tokens: int = 0
|
||||||
|
regex: Optional[str] = None
|
||||||
repetition_penalty: float = 1.0
|
repetition_penalty: float = 1.0
|
||||||
stop_token_ids: Optional[List[int]] = None
|
stop_token_ids: Optional[List[int]] = None
|
||||||
no_stop_trim: bool = False
|
no_stop_trim: bool = False
|
||||||
ignore_eos: bool = False
|
ignore_eos: bool = False
|
||||||
skip_special_tokens: bool = True
|
skip_special_tokens: bool = True
|
||||||
|
lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None
|
||||||
|
|
||||||
|
|
||||||
class ChatMessage(BaseModel):
|
class ChatMessage(BaseModel):
|
||||||
|
|||||||
Reference in New Issue
Block a user