diff --git a/python/sglang/srt/constrained/base_grammar_backend.py b/python/sglang/srt/constrained/base_grammar_backend.py index d3be81ef7..dda3fab4f 100644 --- a/python/sglang/srt/constrained/base_grammar_backend.py +++ b/python/sglang/srt/constrained/base_grammar_backend.py @@ -224,13 +224,17 @@ def create_grammar_backend( eos_list = list(eos_token_ids) if eos_token_ids else None grammar_backend = XGrammarGrammarBackend( - tokenizer, vocab_size=vocab_size, model_eos_token_ids=eos_list + tokenizer, + vocab_size=vocab_size, + model_eos_token_ids=eos_list, + any_whitespace=not server_args.constrained_json_disable_any_whitespace, ) elif name == "llguidance": from sglang.srt.constrained.llguidance_backend import GuidanceBackend grammar_backend = GuidanceBackend( tokenizer=tokenizer, + any_whitespace=not server_args.constrained_json_disable_any_whitespace, whitespace_pattern=server_args.constrained_json_whitespace_pattern, ) elif name == "none": diff --git a/python/sglang/srt/constrained/llguidance_backend.py b/python/sglang/srt/constrained/llguidance_backend.py index 5e29c2524..dc34a353d 100644 --- a/python/sglang/srt/constrained/llguidance_backend.py +++ b/python/sglang/srt/constrained/llguidance_backend.py @@ -110,12 +110,14 @@ class GuidanceBackend(BaseGrammarBackend): def __init__( self, tokenizer, + any_whitespace: bool = True, whitespace_pattern: Optional[str] = None, n_vocab: Optional[int] = None, ): super().__init__() self.tokenizer = tokenizer + self.any_whitespace = any_whitespace self.whitespace_pattern = whitespace_pattern self.llguidance_tokenizer = from_tokenizer(self.tokenizer, n_vocab) @@ -134,6 +136,7 @@ class GuidanceBackend(BaseGrammarBackend): serialized_grammar = LLMatcher.grammar_from_json_schema( key_string, defaults={ + "whitespace_flexible": self.any_whitespace, "whitespace_pattern": self.whitespace_pattern, }, ) diff --git a/python/sglang/srt/constrained/outlines_backend.py b/python/sglang/srt/constrained/outlines_backend.py index b54e34b3d..28831ab86 100644 --- a/python/sglang/srt/constrained/outlines_backend.py +++ b/python/sglang/srt/constrained/outlines_backend.py @@ -115,7 +115,7 @@ class OutlinesGrammarBackend(BaseGrammarBackend): def __init__( self, tokenizer, - whitespace_pattern: bool, + whitespace_pattern: str | None, ): super().__init__() diff --git a/python/sglang/srt/constrained/xgrammar_backend.py b/python/sglang/srt/constrained/xgrammar_backend.py index 3df788970..00b54baef 100644 --- a/python/sglang/srt/constrained/xgrammar_backend.py +++ b/python/sglang/srt/constrained/xgrammar_backend.py @@ -167,6 +167,7 @@ class XGrammarGrammarBackend(BaseGrammarBackend): tokenizer, vocab_size: int, model_eos_token_ids: Optional[List[int]] = None, + any_whitespace: bool = True, ): super().__init__() @@ -188,6 +189,7 @@ class XGrammarGrammarBackend(BaseGrammarBackend): self.grammar_compiler = GrammarCompiler(tokenizer_info=tokenizer_info) self.vocab_size = vocab_size self.override_stop_tokens = override_stop_tokens + self.any_whitespace = any_whitespace def _from_context( self, ctx: CompiledGrammar, key_string: str, grammar_stats: GrammarStats @@ -212,7 +214,9 @@ class XGrammarGrammarBackend(BaseGrammarBackend): # Note: This builtin JSON grammar includes *all* valid JSON (including, for example, arrays at the root) ctx = self.grammar_compiler.compile_builtin_json_grammar() else: - ctx = self.grammar_compiler.compile_json_schema(schema=key_string) + ctx = self.grammar_compiler.compile_json_schema( + schema=key_string, any_whitespace=self.any_whitespace + ) except (RuntimeError, json.decoder.JSONDecodeError) as e: logging.error(f"Hit invalid json_schema: {key_string=}, {e=}") diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index c7e80a2b9..67d0a1807 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -227,6 +227,7 @@ class ServerArgs: stream_output: bool = False random_seed: Optional[int] = None constrained_json_whitespace_pattern: Optional[str] = None + constrained_json_disable_any_whitespace: bool = False watchdog_timeout: float = 300 dist_timeout: Optional[int] = None # timeout for torch.distributed download_dir: Optional[str] = None @@ -1683,7 +1684,12 @@ class ServerArgs: "--constrained-json-whitespace-pattern", type=str, default=ServerArgs.constrained_json_whitespace_pattern, - help="(outlines backend only) Regex pattern for syntactic whitespaces allowed in JSON constrained output. For example, to allow the model generate consecutive whitespaces, set the pattern to [\n\t ]*", + help="(outlines and llguidance backends only) Regex pattern for syntactic whitespaces allowed in JSON constrained output. For example, to allow the model generate consecutive whitespaces, set the pattern to [\n\t ]*", + ) + parser.add_argument( + "--constrained-json-disable-any-whitespace", + action="store_true", + help="(xgrammar and llguidance backends only) Enforce compact representation in JSON constrained output.", ) parser.add_argument( "--watchdog-timeout",