Fix grammar backend (#2018)

This commit is contained in:
Lianmin Zheng
2024-11-12 21:17:38 -08:00
committed by GitHub
parent 125b1199c5
commit ba069a24d3
13 changed files with 401 additions and 434 deletions

View File

@@ -111,7 +111,7 @@ class ServerArgs:
disable_flashinfer: bool = False
disable_flashinfer_sampling: bool = False
disable_radix_cache: bool = False
disable_regex_jump_forward: bool = False
disable_jump_forward: bool = False
disable_cuda_graph: bool = False
disable_cuda_graph_padding: bool = False
disable_disk_cache: bool = False
@@ -574,7 +574,7 @@ class ServerArgs:
type=str,
choices=["xgrammar", "outlines"],
default=ServerArgs.grammar_backend,
help="Choose the backend for constrained decoding.",
help="Choose the backend for grammar-guided decoding.",
)
# Optimization/debug options
@@ -594,9 +594,9 @@ class ServerArgs:
help="Disable RadixAttention for prefix caching.",
)
parser.add_argument(
"--disable-regex-jump-forward",
"--disable-jump-forward",
action="store_true",
help="Disable regex jump-forward.",
help="Disable jump-forward for grammar-guided decoding.",
)
parser.add_argument(
"--disable-cuda-graph",
@@ -616,7 +616,6 @@ class ServerArgs:
parser.add_argument(
"--disable-custom-all-reduce",
action="store_true",
default=False,
help="Disable the custom all-reduce kernel and fall back to NCCL.",
)
parser.add_argument(
@@ -688,7 +687,6 @@ class ServerArgs:
)
parser.add_argument(
"--delete-ckpt-after-loading",
default=ServerArgs.delete_ckpt_after_loading,
action="store_true",
help="Delete the model checkpoint after loading the model.",
)