[Performance] Support both xgrammar and outlines for constrained decoding (#1752)

This commit is contained in:
DarkSharpness
2024-10-26 06:47:02 +09:00
committed by GitHub
parent 30643fed7f
commit b77a02cdfd
7 changed files with 325 additions and 77 deletions

View File

@@ -102,6 +102,7 @@ class ServerArgs:
# Kernel backend
attention_backend: Optional[str] = None
sampling_backend: Optional[str] = None
grammar_backend: Optional[str] = "outlines"
# Optimization/debug options
disable_flashinfer: bool = False
@@ -537,6 +538,13 @@ class ServerArgs:
default=ServerArgs.sampling_backend,
help="Choose the kernels for sampling layers.",
)
parser.add_argument(
"--grammar-backend",
type=str,
choices=["xgrammar", "outlines"],
default=ServerArgs.grammar_backend,
help="Choose the backend for constrained decoding.",
)
# Optimization/debug options
parser.add_argument(