Sanity check to prevent performance regression (#3171)

Co-authored-by: Lianmin Zheng <lianminzheng@gmail.com>
This commit is contained in:
Zhiqiang Xie
2025-01-27 12:28:17 -08:00
committed by GitHub
parent cf142b6eb8
commit 08104b56de
5 changed files with 60 additions and 4 deletions

View File

@@ -163,6 +163,7 @@ class ServerArgs:
# Custom logit processor
enable_custom_logit_processor: bool = False
tool_call_parser: str = None
enable_hierarchical_cache: bool = False
def __post_init__(self):
# Set missing default values
@@ -892,6 +893,11 @@ class ServerArgs:
default=ServerArgs.tool_call_parser,
help="Specify the parser for handling tool-call interactions. Options include: 'qwen25', 'mistral', and 'llama3'.",
)
parser.add_argument(
"--enable-hierarchical-cache",
action="store_true",
help="Enable hierarchical cache",
)
@classmethod
def from_cli_args(cls, args: argparse.Namespace):