Fix grammar backend (#2018)

2024-11-12 21:17:38 -08:00
parent 125b1199c5
commit ba069a24d3
13 changed files with 401 additions and 434 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -111,7 +111,7 @@ class ServerArgs:
    disable_flashinfer: bool = False
    disable_flashinfer_sampling: bool = False
    disable_radix_cache: bool = False
-    disable_regex_jump_forward: bool = False
+    disable_jump_forward: bool = False
    disable_cuda_graph: bool = False
    disable_cuda_graph_padding: bool = False
    disable_disk_cache: bool = False
@@ -574,7 +574,7 @@ class ServerArgs:
            type=str,
            choices=["xgrammar", "outlines"],
            default=ServerArgs.grammar_backend,
-            help="Choose the backend for constrained decoding.",
+            help="Choose the backend for grammar-guided decoding.",
        )

        # Optimization/debug options
@@ -594,9 +594,9 @@ class ServerArgs:
            help="Disable RadixAttention for prefix caching.",
        )
        parser.add_argument(
-            "--disable-regex-jump-forward",
+            "--disable-jump-forward",
            action="store_true",
-            help="Disable regex jump-forward.",
+            help="Disable jump-forward for grammar-guided decoding.",
        )
        parser.add_argument(
            "--disable-cuda-graph",
@@ -616,7 +616,6 @@ class ServerArgs:
        parser.add_argument(
            "--disable-custom-all-reduce",
            action="store_true",
-            default=False,
            help="Disable the custom all-reduce kernel and fall back to NCCL.",
        )
        parser.add_argument(
@@ -688,7 +687,6 @@ class ServerArgs:
        )
        parser.add_argument(
            "--delete-ckpt-after-loading",
-            default=ServerArgs.delete_ckpt_after_loading,
            action="store_true",
            help="Delete the model checkpoint after loading the model.",
        )