bugfix: Fix XGrammar backend to use model's EOS tokens for constrained generation (#8422)
This commit is contained in:
@@ -168,7 +168,10 @@ class BaseGrammarBackend:
|
||||
|
||||
|
||||
def create_grammar_backend(
|
||||
server_args: ServerArgs, tokenizer, vocab_size: int
|
||||
server_args: ServerArgs,
|
||||
tokenizer,
|
||||
vocab_size: int,
|
||||
eos_token_ids: Optional[set] = None,
|
||||
) -> Optional[BaseGrammarBackend]:
|
||||
if server_args.grammar_backend == "outlines":
|
||||
from sglang.srt.constrained.outlines_backend import OutlinesGrammarBackend
|
||||
@@ -180,7 +183,12 @@ def create_grammar_backend(
|
||||
elif server_args.grammar_backend == "xgrammar":
|
||||
from sglang.srt.constrained.xgrammar_backend import XGrammarGrammarBackend
|
||||
|
||||
grammar_backend = XGrammarGrammarBackend(tokenizer, vocab_size=vocab_size)
|
||||
# Convert Set[int] to List[int] if needed
|
||||
eos_list = list(eos_token_ids) if eos_token_ids else None
|
||||
|
||||
grammar_backend = XGrammarGrammarBackend(
|
||||
tokenizer, vocab_size=vocab_size, model_eos_token_ids=eos_list
|
||||
)
|
||||
elif server_args.grammar_backend == "llguidance":
|
||||
from sglang.srt.constrained.llguidance_backend import GuidanceBackend
|
||||
|
||||
|
||||
@@ -150,14 +150,16 @@ class XGrammarGrammarBackend(BaseGrammarBackend):
|
||||
self,
|
||||
tokenizer,
|
||||
vocab_size: int,
|
||||
model_eos_token_ids: Optional[List[int]] = None,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
if True:
|
||||
tokenizer_info = TokenizerInfo.from_huggingface(
|
||||
tokenizer, vocab_size=vocab_size
|
||||
)
|
||||
override_stop_tokens = None
|
||||
# Create TokenizerInfo with model's EOS tokens as the authoritative stop tokens
|
||||
# This ensures consistency between what the model considers EOS and what XGrammar uses
|
||||
tokenizer_info = TokenizerInfo.from_huggingface(
|
||||
tokenizer, vocab_size=vocab_size, stop_token_ids=model_eos_token_ids
|
||||
)
|
||||
override_stop_tokens = None
|
||||
|
||||
self.grammar_compiler = GrammarCompiler(tokenizer_info=tokenizer_info)
|
||||
self.vocab_size = vocab_size
|
||||
|
||||
@@ -458,7 +458,10 @@ class Scheduler(
|
||||
self.grammar_queue: List[Req] = []
|
||||
if not server_args.skip_tokenizer_init:
|
||||
self.grammar_backend = create_grammar_backend(
|
||||
server_args, self.tokenizer, self.model_config.vocab_size
|
||||
server_args,
|
||||
self.tokenizer,
|
||||
self.model_config.vocab_size,
|
||||
self.model_config.hf_eos_token_id,
|
||||
)
|
||||
else:
|
||||
self.grammar_backend = None
|
||||
|
||||
Reference in New Issue
Block a user