Reasoning parser (#4000)

Co-authored-by: Lucas Pickup <lupickup@microsoft.com>
This commit is contained in:
Xihuai Wang
2025-03-04 13:16:36 +08:00
committed by GitHub
parent 11eea69e70
commit 95575aa76a
12 changed files with 1047 additions and 7 deletions

View File

@@ -23,6 +23,7 @@ from typing import List, Optional
import torch
from sglang.srt.hf_transformers_utils import check_gguf_file
from sglang.srt.reasoning_parser import ReasoningParser
from sglang.srt.utils import (
get_amdgpu_memory_capacity,
get_hpu_memory_capacity,
@@ -97,6 +98,7 @@ class ServerArgs:
api_key: Optional[str] = None
file_storage_path: str = "sglang_storage"
enable_cache_report: bool = False
reasoning_parser: Optional[str] = None
# Data parallelism
dp_size: int = 1
@@ -631,6 +633,13 @@ class ServerArgs:
action="store_true",
help="Return number of cached tokens in usage.prompt_tokens_details for each openai request.",
)
parser.add_argument(
"--reasoning-parser",
type=str,
choices=list(ReasoningParser.DetectorMap.keys()),
default=ServerArgs.reasoning_parser,
help=f"Specify the parser for reasoning models, supported parsers are: {list(ReasoningParser.DetectorMap.keys())}.",
)
# Data parallelism
parser.add_argument(