Constraint Decoding: Set xgrammar as the default grammar backend (#4386)
This commit is contained in:
@@ -38,7 +38,7 @@ runtime_common = [
|
||||
"transformers==4.48.3",
|
||||
"uvicorn",
|
||||
"uvloop",
|
||||
"xgrammar==0.1.15",
|
||||
"xgrammar==0.1.16",
|
||||
]
|
||||
|
||||
srt = [
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
import time
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, root_validator
|
||||
from typing_extensions import Literal
|
||||
|
||||
|
||||
@@ -323,6 +323,15 @@ class ChatCompletionRequest(BaseModel):
|
||||
default="auto", examples=["none"]
|
||||
) # noqa
|
||||
|
||||
@root_validator(pre=True)
|
||||
def set_tool_choice_default(cls, values):
|
||||
if values.get("tool_choice") is None:
|
||||
if values.get("tools") is None:
|
||||
values["tool_choice"] = "none"
|
||||
else:
|
||||
values["tool_choice"] = "auto"
|
||||
return values
|
||||
|
||||
# Extra parameters for SRT backend only and will be ignored by OpenAI models.
|
||||
top_k: int = -1
|
||||
min_p: float = 0.0
|
||||
|
||||
@@ -125,7 +125,7 @@ class ServerArgs:
|
||||
# Kernel backend
|
||||
attention_backend: Optional[str] = None
|
||||
sampling_backend: Optional[str] = None
|
||||
grammar_backend: Optional[str] = "outlines"
|
||||
grammar_backend: Optional[str] = "xgrammar"
|
||||
|
||||
# Speculative decoding
|
||||
speculative_algorithm: Optional[str] = None
|
||||
|
||||
Reference in New Issue
Block a user