Sanity check to prevent performance regression (#3171)
Co-authored-by: Lianmin Zheng <lianminzheng@gmail.com>
This commit is contained in:
@@ -163,6 +163,7 @@ class ServerArgs:
|
||||
# Custom logit processor
|
||||
enable_custom_logit_processor: bool = False
|
||||
tool_call_parser: str = None
|
||||
enable_hierarchical_cache: bool = False
|
||||
|
||||
def __post_init__(self):
|
||||
# Set missing default values
|
||||
@@ -892,6 +893,11 @@ class ServerArgs:
|
||||
default=ServerArgs.tool_call_parser,
|
||||
help="Specify the parser for handling tool-call interactions. Options include: 'qwen25', 'mistral', and 'llama3'.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enable-hierarchical-cache",
|
||||
action="store_true",
|
||||
help="Enable hierarchical cache",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_cli_args(cls, args: argparse.Namespace):
|
||||
|
||||
Reference in New Issue
Block a user