Add matched_stop token or str to distinguish between eos or stop str finish_reason generation (#1684)

This commit is contained in:
Gleb Drozdov
2024-10-17 22:06:52 +04:00
committed by GitHub
parent d17d19e5b8
commit a95d5589c3
3 changed files with 186 additions and 27 deletions

View File

@@ -621,16 +621,19 @@ def v1_generate_response(request, ret, tokenizer_manager, to_file=False):
else:
logprobs = None
finish_reason = ret_item["meta_info"]["finish_reason"]
if to_file:
# to make the choise data json serializable
choice_data = {
"index": 0,
"text": text,
"logprobs": logprobs,
"finish_reason": (
ret_item["meta_info"]["finish_reason"]["type"]
if ret_item["meta_info"]["finish_reason"]
else ""
"finish_reason": (finish_reason["type"] if finish_reason else ""),
"matched_stop": (
finish_reason["matched"]
if finish_reason and "matched" in finish_reason
else None
),
}
else:
@@ -638,10 +641,11 @@ def v1_generate_response(request, ret, tokenizer_manager, to_file=False):
index=idx,
text=text,
logprobs=logprobs,
finish_reason=(
ret_item["meta_info"]["finish_reason"]["type"]
if ret_item["meta_info"]["finish_reason"]
else ""
finish_reason=(finish_reason["type"] if finish_reason else ""),
matched_stop=(
finish_reason["matched"]
if finish_reason and "matched" in finish_reason
else None
),
)
@@ -771,14 +775,16 @@ async def v1_completions(tokenizer_manager, raw_request: Request):
delta = text[len(stream_buffer) :]
stream_buffer = stream_buffer + delta
finish_reason = content["meta_info"]["finish_reason"]
choice_data = CompletionResponseStreamChoice(
index=index,
text=delta,
logprobs=logprobs,
finish_reason=(
content["meta_info"]["finish_reason"]["type"]
if content["meta_info"]["finish_reason"]
else ""
finish_reason=(finish_reason["type"] if finish_reason else ""),
matched_stop=(
finish_reason["matched"]
if finish_reason and "matched" in finish_reason
else None
),
)
chunk = CompletionStreamResponse(
@@ -1016,16 +1022,19 @@ def v1_chat_generate_response(request, ret, to_file=False, cache_report=False):
else:
choice_logprobs = None
finish_reason = ret_item["meta_info"]["finish_reason"]
if to_file:
# to make the choice data json serializable
choice_data = {
"index": 0,
"message": {"role": "assistant", "content": ret_item["text"]},
"logprobs": choice_logprobs,
"finish_reason": (
ret_item["meta_info"]["finish_reason"]["type"]
if ret_item["meta_info"]["finish_reason"]
else ""
"finish_reason": (finish_reason["type"] if finish_reason else ""),
"matched_stop": (
finish_reason["matched"]
if finish_reason and "matched" in finish_reason
else None
),
}
else:
@@ -1033,10 +1042,11 @@ def v1_chat_generate_response(request, ret, to_file=False, cache_report=False):
index=idx,
message=ChatMessage(role="assistant", content=ret_item["text"]),
logprobs=choice_logprobs,
finish_reason=(
ret_item["meta_info"]["finish_reason"]["type"]
if ret_item["meta_info"]["finish_reason"]
else ""
finish_reason=(finish_reason["type"] if finish_reason else ""),
matched_stop=(
finish_reason["matched"]
if finish_reason and "matched" in finish_reason
else None
),
)
@@ -1159,6 +1169,8 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
else:
choice_logprobs = None
finish_reason = content["meta_info"]["finish_reason"]
if is_first:
# First chunk with role
is_first = False
@@ -1166,9 +1178,12 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
index=index,
delta=DeltaMessage(role="assistant"),
finish_reason=(
content["meta_info"]["finish_reason"]["type"]
if content["meta_info"]["finish_reason"]
else ""
finish_reason["type"] if finish_reason else ""
),
matched_stop=(
finish_reason["matched"]
if finish_reason and "matched" in finish_reason
else None
),
logprobs=choice_logprobs,
)
@@ -1185,10 +1200,11 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
choice_data = ChatCompletionResponseStreamChoice(
index=index,
delta=DeltaMessage(content=delta),
finish_reason=(
content["meta_info"]["finish_reason"]["type"]
if content["meta_info"]["finish_reason"]
else ""
finish_reason=(finish_reason["type"] if finish_reason else ""),
matched_stop=(
finish_reason["matched"]
if finish_reason and "matched" in finish_reason
else None
),
logprobs=choice_logprobs,
)

View File

@@ -184,6 +184,7 @@ class CompletionResponseChoice(BaseModel):
text: str
logprobs: Optional[LogProbs] = None
finish_reason: Optional[str] = None
matched_stop: Union[None, int, str] = None
class CompletionResponse(BaseModel):
@@ -200,6 +201,7 @@ class CompletionResponseStreamChoice(BaseModel):
text: str
logprobs: Optional[LogProbs] = None
finish_reason: Optional[str] = None
matched_stop: Union[None, int, str] = None
class CompletionStreamResponse(BaseModel):
@@ -291,6 +293,7 @@ class ChatCompletionResponseChoice(BaseModel):
message: ChatMessage
logprobs: Optional[Union[LogProbs, ChoiceLogprobs]] = None
finish_reason: str
matched_stop: Union[None, int, str] = None
class ChatCompletionResponse(BaseModel):
@@ -312,6 +315,7 @@ class ChatCompletionResponseStreamChoice(BaseModel):
delta: DeltaMessage
logprobs: Optional[Union[LogProbs, ChoiceLogprobs]] = None
finish_reason: Optional[str] = None
matched_stop: Union[None, int, str] = None
class ChatCompletionStreamResponse(BaseModel):