Misc fixes for eagle (flush_cache, CPU overhead) (#3014)

This commit is contained in:
Lianmin Zheng
2025-01-20 20:25:13 -08:00
parent d2571dd5c7
commit 287d07a669
11 changed files with 133 additions and 96 deletions

View File

@@ -49,12 +49,13 @@ class BenchArgs:
gsp_system_prompt_len: int = 2048
gsp_question_len: int = 128
gsp_output_len: int = 256
seed: int = 1
disable_ignore_eos: bool = False
extra_request_body: Optional[str] = None
seed: int = 1
apply_chat_template: bool = False
profile: bool = False
skip_warmup: bool = False
do_not_exit: bool = False
profile: bool = False
@staticmethod
def add_cli_args(parser: argparse.ArgumentParser):
@@ -141,20 +142,31 @@ class BenchArgs:
default=BenchArgs.gsp_output_len,
help="Target length in tokens for outputs in generated-shared-prefix dataset",
)
parser.add_argument("--seed", type=int, default=1, help="The random seed.")
parser.add_argument(
"--disable-ignore-eos",
type=bool,
default=BenchArgs.disable_ignore_eos,
action="store_true",
help="Disable ignore EOS token",
)
parser.add_argument(
"--extra-request-body",
metavar='{"key1": "value1", "key2": "value2"}',
type=str,
default=BenchArgs.extra_request_body,
help="Append given JSON object to the request payload. You can use this to specify"
"additional generate params like sampling params.",
)
parser.add_argument("--seed", type=int, default=1, help="The random seed.")
parser.add_argument(
"--apply-chat-template",
action="store_true",
help="Apply chat template",
)
parser.add_argument(
"--profile",
action="store_true",
help="Use Torch Profiler. The endpoint must be launched with "
"SGLANG_TORCH_PROFILER_DIR to enable profiler.",
)
parser.add_argument(
"--skip-warmup",
action="store_true",
@@ -165,12 +177,6 @@ class BenchArgs:
action="store_true",
help="Do not exit the program. This is useful for nsys profile with --duration and --delay.",
)
parser.add_argument(
"--profile",
action="store_true",
help="Use Torch Profiler. The endpoint must be launched with "
"SGLANG_TORCH_PROFILER_DIR to enable profiler.",
)
@classmethod
def from_cli_args(cls, args: argparse.Namespace):