bugfix: Fix XGrammar backend to use model's EOS tokens for constrained generation (#8422)

This commit is contained in:
Chang Su
2025-07-27 19:01:02 -07:00
committed by GitHub
parent bb81daefb8
commit dd487e5553
3 changed files with 21 additions and 8 deletions

View File

@@ -168,7 +168,10 @@ class BaseGrammarBackend:
def create_grammar_backend( def create_grammar_backend(
server_args: ServerArgs, tokenizer, vocab_size: int server_args: ServerArgs,
tokenizer,
vocab_size: int,
eos_token_ids: Optional[set] = None,
) -> Optional[BaseGrammarBackend]: ) -> Optional[BaseGrammarBackend]:
if server_args.grammar_backend == "outlines": if server_args.grammar_backend == "outlines":
from sglang.srt.constrained.outlines_backend import OutlinesGrammarBackend from sglang.srt.constrained.outlines_backend import OutlinesGrammarBackend
@@ -180,7 +183,12 @@ def create_grammar_backend(
elif server_args.grammar_backend == "xgrammar": elif server_args.grammar_backend == "xgrammar":
from sglang.srt.constrained.xgrammar_backend import XGrammarGrammarBackend from sglang.srt.constrained.xgrammar_backend import XGrammarGrammarBackend
grammar_backend = XGrammarGrammarBackend(tokenizer, vocab_size=vocab_size) # Convert Set[int] to List[int] if needed
eos_list = list(eos_token_ids) if eos_token_ids else None
grammar_backend = XGrammarGrammarBackend(
tokenizer, vocab_size=vocab_size, model_eos_token_ids=eos_list
)
elif server_args.grammar_backend == "llguidance": elif server_args.grammar_backend == "llguidance":
from sglang.srt.constrained.llguidance_backend import GuidanceBackend from sglang.srt.constrained.llguidance_backend import GuidanceBackend

View File

@@ -150,14 +150,16 @@ class XGrammarGrammarBackend(BaseGrammarBackend):
self, self,
tokenizer, tokenizer,
vocab_size: int, vocab_size: int,
model_eos_token_ids: Optional[List[int]] = None,
): ):
super().__init__() super().__init__()
if True: # Create TokenizerInfo with model's EOS tokens as the authoritative stop tokens
tokenizer_info = TokenizerInfo.from_huggingface( # This ensures consistency between what the model considers EOS and what XGrammar uses
tokenizer, vocab_size=vocab_size tokenizer_info = TokenizerInfo.from_huggingface(
) tokenizer, vocab_size=vocab_size, stop_token_ids=model_eos_token_ids
override_stop_tokens = None )
override_stop_tokens = None
self.grammar_compiler = GrammarCompiler(tokenizer_info=tokenizer_info) self.grammar_compiler = GrammarCompiler(tokenizer_info=tokenizer_info)
self.vocab_size = vocab_size self.vocab_size = vocab_size

View File

@@ -458,7 +458,10 @@ class Scheduler(
self.grammar_queue: List[Req] = [] self.grammar_queue: List[Req] = []
if not server_args.skip_tokenizer_init: if not server_args.skip_tokenizer_init:
self.grammar_backend = create_grammar_backend( self.grammar_backend = create_grammar_backend(
server_args, self.tokenizer, self.model_config.vocab_size server_args,
self.tokenizer,
self.model_config.vocab_size,
self.model_config.hf_eos_token_id,
) )
else: else:
self.grammar_backend = None self.grammar_backend = None