[Frontend] Fix request length check and add option to disallow auto truncation in scheduler (#2876)

This commit is contained in:
Chang Su
2025-01-16 14:51:19 -08:00
committed by GitHub
parent 0427416b59
commit a8ccacc8b8
6 changed files with 154 additions and 17 deletions

View File

@@ -157,6 +157,7 @@ class ServerArgs:
num_continuous_decode_steps: int = 1
delete_ckpt_after_loading: bool = False
enable_memory_saver: bool = False
allow_auto_truncate: bool = False
def __post_init__(self):
# Set missing default values
@@ -859,6 +860,11 @@ class ServerArgs:
action="store_true",
help="Allow saving memory using release_memory_occupation and resume_memory_occupation",
)
parser.add_argument(
"--allow-auto-truncate",
action="store_true",
help="Allow automatically truncating requests that exceed the maximum input length instead of returning an error.",
)
@classmethod
def from_cli_args(cls, args: argparse.Namespace):