Reasoning parser (#4000)

Co-authored-by: Lucas Pickup <lupickup@microsoft.com>
This commit is contained in:
Xihuai Wang
2025-03-04 13:16:36 +08:00
committed by GitHub
parent 11eea69e70
commit 95575aa76a
12 changed files with 1047 additions and 7 deletions

View File

@@ -72,6 +72,7 @@ from sglang.srt.openai_api.protocol import (
TopLogprob,
UsageInfo,
)
from sglang.srt.reasoning_parser import ReasoningParser
from sglang.utils import get_exception_traceback
logger = logging.getLogger(__name__)
@@ -1038,7 +1039,12 @@ def v1_chat_generate_request(
def v1_chat_generate_response(
request, ret, to_file=False, cache_report=False, tool_call_parser=None
request,
ret,
to_file=False,
cache_report=False,
tool_call_parser=None,
reasoning_parser=None,
):
choices = []
@@ -1092,9 +1098,26 @@ def v1_chat_generate_response(
if isinstance(request, list):
tool_choice = request[idx].tool_choice
tools = request[idx].tools
separate_reasoning = request[idx].separate_reasoning
else:
tool_choice = request.tool_choice
tools = request.tools
separate_reasoning = request.separate_reasoning
if reasoning_parser and separate_reasoning:
try:
parser = ReasoningParser(
model_type=reasoning_parser, stream_reasoning=False
)
reasoning_text, text = parser.parse_non_stream(text)
except Exception as e:
logger.error(f"Exception: {e}")
return create_error_response(
HTTPStatus.BAD_REQUEST,
"Failed to parse reasoning related info to json format!",
)
else:
reasoning_text = None
if tool_choice != "none" and any([i in text for i in TOOLS_TAG_LIST]):
if finish_reason == "stop":
@@ -1124,8 +1147,9 @@ def v1_chat_generate_response(
"index": 0,
"message": {
"role": "assistant",
"content": ret_item["text"] if tool_calls is None else None,
"content": text if tool_calls is None else None,
"tool_calls": tool_calls,
"reasoning_content": reasoning_text,
},
"logprobs": choice_logprobs,
"finish_reason": (finish_reason["type"] if finish_reason else ""),
@@ -1140,8 +1164,9 @@ def v1_chat_generate_response(
index=idx,
message=ChatMessage(
role="assistant",
content=ret_item["text"] if tool_calls is None else None,
content=text if tool_calls is None else None,
tool_calls=tool_calls,
reasoning_content=reasoning_text,
),
logprobs=choice_logprobs,
finish_reason=(finish_reason["type"] if finish_reason else ""),
@@ -1208,6 +1233,7 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
if adapted_request.stream:
parser_dict = {}
reasoning_parser_dict = {}
async def generate_stream_resp():
is_firsts = {}
@@ -1274,15 +1300,27 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
choice_logprobs = None
finish_reason = content["meta_info"]["finish_reason"]
finish_reason_type = (
finish_reason["type"] if finish_reason else None
)
if is_first:
# First chunk with role
is_first = False
if (
tokenizer_manager.server_args.reasoning_parser
and request.separate_reasoning
):
delta = DeltaMessage(role="assistant", reasoning_content="")
else:
delta = DeltaMessage(role="assistant", content="")
choice_data = ChatCompletionResponseStreamChoice(
index=index,
delta=DeltaMessage(role="assistant", content=""),
delta=delta,
finish_reason=(
finish_reason["type"] if finish_reason else ""
None
if finish_reason_type and len(finish_reason_type) == 0
else finish_reason_type
),
matched_stop=(
finish_reason["matched"]
@@ -1302,6 +1340,41 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
delta = text[len(stream_buffer) :]
new_stream_buffer = stream_buffer + delta
if (
tokenizer_manager.server_args.reasoning_parser
and request.separate_reasoning
):
if index not in reasoning_parser_dict:
reasoning_parser_dict[index] = ReasoningParser(
tokenizer_manager.server_args.reasoning_parser,
request.stream_reasoning,
)
reasoning_parser = reasoning_parser_dict[index]
reasoning_text, delta = reasoning_parser.parse_stream_chunk(
delta
)
if reasoning_text:
choice_data = ChatCompletionResponseStreamChoice(
index=index,
delta=DeltaMessage(reasoning_content=reasoning_text),
finish_reason=(
None
if finish_reason_type
and len(finish_reason_type) == 0
else finish_reason_type
),
)
chunk = ChatCompletionStreamResponse(
id=content["meta_info"]["id"],
choices=[choice_data],
model=request.model,
)
yield f"data: {chunk.model_dump_json()}\n\n"
if (delta and len(delta) == 0) or not delta:
stream_buffers[index] = new_stream_buffer
is_firsts[index] = is_first
continue
if request.tool_choice != "none" and request.tools:
if index not in parser_dict:
parser_dict[index] = FunctionCallParser(
@@ -1319,7 +1392,10 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
index=index,
delta=DeltaMessage(content=normal_text),
finish_reason=(
finish_reason["type"] if finish_reason else ""
None
if finish_reason_type
and len(finish_reason_type) == 0
else finish_reason_type
),
)
chunk = ChatCompletionStreamResponse(
@@ -1388,7 +1464,9 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
index=index,
delta=DeltaMessage(content=delta),
finish_reason=(
finish_reason["type"] if finish_reason else ""
None
if finish_reason_type and len(finish_reason_type) == 0
else finish_reason_type
),
matched_stop=(
finish_reason["matched"]
@@ -1456,6 +1534,7 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
ret,
cache_report=tokenizer_manager.server_args.enable_cache_report,
tool_call_parser=tokenizer_manager.server_args.tool_call_parser,
reasoning_parser=tokenizer_manager.server_args.reasoning_parser,
)
return response

View File

@@ -336,6 +336,8 @@ class ChatCompletionRequest(BaseModel):
skip_special_tokens: bool = True
lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None
session_params: Optional[Dict] = None
separate_reasoning: bool = True
stream_reasoning: bool = True
class FunctionResponse(BaseModel):
@@ -356,6 +358,7 @@ class ToolCall(BaseModel):
class ChatMessage(BaseModel):
role: Optional[str] = None
content: Optional[str] = None
reasoning_content: Optional[str] = None
tool_calls: Optional[List[ToolCall]] = Field(default=None, examples=[None])
@@ -379,6 +382,7 @@ class ChatCompletionResponse(BaseModel):
class DeltaMessage(BaseModel):
role: Optional[str] = None
content: Optional[str] = None
reasoning_content: Optional[str] = None
tool_calls: Optional[List[ToolCall]] = Field(default=None, examples=[None])