[Frontend] Fix request length check and add option to disallow auto truncation in scheduler (#2876)
This commit is contained in:
@@ -157,6 +157,7 @@ class ServerArgs:
|
||||
num_continuous_decode_steps: int = 1
|
||||
delete_ckpt_after_loading: bool = False
|
||||
enable_memory_saver: bool = False
|
||||
allow_auto_truncate: bool = False
|
||||
|
||||
def __post_init__(self):
|
||||
# Set missing default values
|
||||
@@ -859,6 +860,11 @@ class ServerArgs:
|
||||
action="store_true",
|
||||
help="Allow saving memory using release_memory_occupation and resume_memory_occupation",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--allow-auto-truncate",
|
||||
action="store_true",
|
||||
help="Allow automatically truncating requests that exceed the maximum input length instead of returning an error.",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_cli_args(cls, args: argparse.Namespace):
|
||||
|
||||
Reference in New Issue
Block a user