bugfix: Fix XGrammar backend to use model's EOS tokens for constrained generation (#8422)
This commit is contained in:
@@ -168,7 +168,10 @@ class BaseGrammarBackend:
|
|||||||
|
|
||||||
|
|
||||||
def create_grammar_backend(
|
def create_grammar_backend(
|
||||||
server_args: ServerArgs, tokenizer, vocab_size: int
|
server_args: ServerArgs,
|
||||||
|
tokenizer,
|
||||||
|
vocab_size: int,
|
||||||
|
eos_token_ids: Optional[set] = None,
|
||||||
) -> Optional[BaseGrammarBackend]:
|
) -> Optional[BaseGrammarBackend]:
|
||||||
if server_args.grammar_backend == "outlines":
|
if server_args.grammar_backend == "outlines":
|
||||||
from sglang.srt.constrained.outlines_backend import OutlinesGrammarBackend
|
from sglang.srt.constrained.outlines_backend import OutlinesGrammarBackend
|
||||||
@@ -180,7 +183,12 @@ def create_grammar_backend(
|
|||||||
elif server_args.grammar_backend == "xgrammar":
|
elif server_args.grammar_backend == "xgrammar":
|
||||||
from sglang.srt.constrained.xgrammar_backend import XGrammarGrammarBackend
|
from sglang.srt.constrained.xgrammar_backend import XGrammarGrammarBackend
|
||||||
|
|
||||||
grammar_backend = XGrammarGrammarBackend(tokenizer, vocab_size=vocab_size)
|
# Convert Set[int] to List[int] if needed
|
||||||
|
eos_list = list(eos_token_ids) if eos_token_ids else None
|
||||||
|
|
||||||
|
grammar_backend = XGrammarGrammarBackend(
|
||||||
|
tokenizer, vocab_size=vocab_size, model_eos_token_ids=eos_list
|
||||||
|
)
|
||||||
elif server_args.grammar_backend == "llguidance":
|
elif server_args.grammar_backend == "llguidance":
|
||||||
from sglang.srt.constrained.llguidance_backend import GuidanceBackend
|
from sglang.srt.constrained.llguidance_backend import GuidanceBackend
|
||||||
|
|
||||||
|
|||||||
@@ -150,14 +150,16 @@ class XGrammarGrammarBackend(BaseGrammarBackend):
|
|||||||
self,
|
self,
|
||||||
tokenizer,
|
tokenizer,
|
||||||
vocab_size: int,
|
vocab_size: int,
|
||||||
|
model_eos_token_ids: Optional[List[int]] = None,
|
||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
if True:
|
# Create TokenizerInfo with model's EOS tokens as the authoritative stop tokens
|
||||||
tokenizer_info = TokenizerInfo.from_huggingface(
|
# This ensures consistency between what the model considers EOS and what XGrammar uses
|
||||||
tokenizer, vocab_size=vocab_size
|
tokenizer_info = TokenizerInfo.from_huggingface(
|
||||||
)
|
tokenizer, vocab_size=vocab_size, stop_token_ids=model_eos_token_ids
|
||||||
override_stop_tokens = None
|
)
|
||||||
|
override_stop_tokens = None
|
||||||
|
|
||||||
self.grammar_compiler = GrammarCompiler(tokenizer_info=tokenizer_info)
|
self.grammar_compiler = GrammarCompiler(tokenizer_info=tokenizer_info)
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = vocab_size
|
||||||
|
|||||||
@@ -458,7 +458,10 @@ class Scheduler(
|
|||||||
self.grammar_queue: List[Req] = []
|
self.grammar_queue: List[Req] = []
|
||||||
if not server_args.skip_tokenizer_init:
|
if not server_args.skip_tokenizer_init:
|
||||||
self.grammar_backend = create_grammar_backend(
|
self.grammar_backend = create_grammar_backend(
|
||||||
server_args, self.tokenizer, self.model_config.vocab_size
|
server_args,
|
||||||
|
self.tokenizer,
|
||||||
|
self.model_config.vocab_size,
|
||||||
|
self.model_config.hf_eos_token_id,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
self.grammar_backend = None
|
self.grammar_backend = None
|
||||||
|
|||||||
Reference in New Issue
Block a user