[Fix] fix eos trim inconsistency (#1650)
This commit is contained in:
@@ -493,23 +493,38 @@ def v1_generate_request(
|
||||
top_logprobs_nums.append(
|
||||
request.logprobs if request.logprobs is not None else 0
|
||||
)
|
||||
sampling_params_list.append(
|
||||
{
|
||||
"temperature": request.temperature,
|
||||
"max_new_tokens": request.max_tokens,
|
||||
"min_new_tokens": request.min_tokens,
|
||||
"stop": request.stop,
|
||||
"stop_token_ids": request.stop_token_ids,
|
||||
"top_p": request.top_p,
|
||||
"presence_penalty": request.presence_penalty,
|
||||
"frequency_penalty": request.frequency_penalty,
|
||||
"repetition_penalty": request.repetition_penalty,
|
||||
"regex": request.regex,
|
||||
"json_schema": request.json_schema,
|
||||
"n": request.n,
|
||||
"ignore_eos": request.ignore_eos,
|
||||
}
|
||||
)
|
||||
sampling_params = []
|
||||
if isinstance(request.no_eos_trim, list):
|
||||
num_reqs = len(request.prompt)
|
||||
else:
|
||||
num_reqs = 1
|
||||
for i in range(num_reqs):
|
||||
sampling_params.append(
|
||||
{
|
||||
"temperature": request.temperature,
|
||||
"max_new_tokens": request.max_tokens,
|
||||
"min_new_tokens": request.min_tokens,
|
||||
"stop": request.stop,
|
||||
"stop_token_ids": request.stop_token_ids,
|
||||
"top_p": request.top_p,
|
||||
"presence_penalty": request.presence_penalty,
|
||||
"frequency_penalty": request.frequency_penalty,
|
||||
"repetition_penalty": request.repetition_penalty,
|
||||
"regex": request.regex,
|
||||
"json_schema": request.json_schema,
|
||||
"n": request.n,
|
||||
"ignore_eos": request.ignore_eos,
|
||||
"no_eos_trim": (
|
||||
request.no_eos_trim
|
||||
if not isinstance(request.no_eos_trim, list)
|
||||
else request.no_eos_trim[i]
|
||||
),
|
||||
}
|
||||
)
|
||||
if num_reqs == 1:
|
||||
sampling_params_list.append(sampling_params[0])
|
||||
else:
|
||||
sampling_params_list.append(sampling_params)
|
||||
|
||||
if len(all_requests) == 1:
|
||||
prompt = prompts[0]
|
||||
|
||||
@@ -174,6 +174,7 @@ class CompletionRequest(BaseModel):
|
||||
min_tokens: int = 0
|
||||
repetition_penalty: Optional[float] = 1.0
|
||||
stop_token_ids: Optional[List[int]] = Field(default_factory=list)
|
||||
no_eos_trim: Union[bool, List[bool]] = False
|
||||
|
||||
|
||||
class CompletionResponseChoice(BaseModel):
|
||||
|
||||
Reference in New Issue
Block a user