Simplify the nan detection and greedy check in sampler (#1709)

This commit is contained in:
Lianmin Zheng
2024-10-18 20:21:24 -07:00
committed by GitHub
parent 2bcfba1b08
commit f0f8a7699b
6 changed files with 24 additions and 7 deletions

View File

@@ -114,6 +114,7 @@ class ServerArgs:
disable_custom_all_reduce: bool = False
disable_mla: bool = False
disable_penalizer: bool = False
disable_nan_detection: bool = False
enable_overlap_schedule: bool = False
enable_mixed_chunk: bool = False
enable_torch_compile: bool = False
@@ -577,7 +578,12 @@ class ServerArgs:
parser.add_argument(
"--disable-penalizer",
action="store_true",
help="Disable the logit penalizer (e.g., frequency and repetition penalty).",
help="Disable the logit penalizers (e.g., frequency and repetition penalty) for better performance if they are not used in any requests.",
)
parser.add_argument(
"--disable-nan-detection",
action="store_true",
help="Disable the NaN detection for better performance.",
)
parser.add_argument(
"--enable-overlap-schedule",