[Performance] Support both xgrammar and outlines for constrained decoding (#1752)
This commit is contained in:
@@ -102,6 +102,7 @@ class ServerArgs:
|
||||
# Kernel backend
|
||||
attention_backend: Optional[str] = None
|
||||
sampling_backend: Optional[str] = None
|
||||
grammar_backend: Optional[str] = "outlines"
|
||||
|
||||
# Optimization/debug options
|
||||
disable_flashinfer: bool = False
|
||||
@@ -537,6 +538,13 @@ class ServerArgs:
|
||||
default=ServerArgs.sampling_backend,
|
||||
help="Choose the kernels for sampling layers.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--grammar-backend",
|
||||
type=str,
|
||||
choices=["xgrammar", "outlines"],
|
||||
default=ServerArgs.grammar_backend,
|
||||
help="Choose the backend for constrained decoding.",
|
||||
)
|
||||
|
||||
# Optimization/debug options
|
||||
parser.add_argument(
|
||||
|
||||
Reference in New Issue
Block a user