diff --git a/python/sglang/srt/constrained/base_grammar_backend.py b/python/sglang/srt/constrained/base_grammar_backend.py index 5058cddb9..4fe5d6c77 100644 --- a/python/sglang/srt/constrained/base_grammar_backend.py +++ b/python/sglang/srt/constrained/base_grammar_backend.py @@ -168,7 +168,10 @@ class BaseGrammarBackend: def create_grammar_backend( - server_args: ServerArgs, tokenizer, vocab_size: int + server_args: ServerArgs, + tokenizer, + vocab_size: int, + eos_token_ids: Optional[set] = None, ) -> Optional[BaseGrammarBackend]: if server_args.grammar_backend == "outlines": from sglang.srt.constrained.outlines_backend import OutlinesGrammarBackend @@ -180,7 +183,12 @@ def create_grammar_backend( elif server_args.grammar_backend == "xgrammar": from sglang.srt.constrained.xgrammar_backend import XGrammarGrammarBackend - grammar_backend = XGrammarGrammarBackend(tokenizer, vocab_size=vocab_size) + # Convert Set[int] to List[int] if needed + eos_list = list(eos_token_ids) if eos_token_ids else None + + grammar_backend = XGrammarGrammarBackend( + tokenizer, vocab_size=vocab_size, model_eos_token_ids=eos_list + ) elif server_args.grammar_backend == "llguidance": from sglang.srt.constrained.llguidance_backend import GuidanceBackend diff --git a/python/sglang/srt/constrained/xgrammar_backend.py b/python/sglang/srt/constrained/xgrammar_backend.py index ff7caef8f..92e171662 100644 --- a/python/sglang/srt/constrained/xgrammar_backend.py +++ b/python/sglang/srt/constrained/xgrammar_backend.py @@ -150,14 +150,16 @@ class XGrammarGrammarBackend(BaseGrammarBackend): self, tokenizer, vocab_size: int, + model_eos_token_ids: Optional[List[int]] = None, ): super().__init__() - if True: - tokenizer_info = TokenizerInfo.from_huggingface( - tokenizer, vocab_size=vocab_size - ) - override_stop_tokens = None + # Create TokenizerInfo with model's EOS tokens as the authoritative stop tokens + # This ensures consistency between what the model considers EOS and what XGrammar uses + tokenizer_info = TokenizerInfo.from_huggingface( + tokenizer, vocab_size=vocab_size, stop_token_ids=model_eos_token_ids + ) + override_stop_tokens = None self.grammar_compiler = GrammarCompiler(tokenizer_info=tokenizer_info) self.vocab_size = vocab_size diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index f3eb20cad..0be67eaca 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -458,7 +458,10 @@ class Scheduler( self.grammar_queue: List[Req] = [] if not server_args.skip_tokenizer_init: self.grammar_backend = create_grammar_backend( - server_args, self.tokenizer, self.model_config.vocab_size + server_args, + self.tokenizer, + self.model_config.vocab_size, + self.model_config.hf_eos_token_id, ) else: self.grammar_backend = None