From 218ab3611ddf46ce6acf8a465611a01faa275eb7 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Wed, 13 Nov 2024 11:39:16 -0800 Subject: [PATCH] Do not let invalid grammar crash the server (#2023) --- .../srt/constrained/base_grammar_backend.py | 5 +++-- .../srt/constrained/outlines_backend.py | 14 ++++++++++---- .../srt/constrained/xgrammar_backend.py | 19 +++++++++++++++++-- 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/python/sglang/srt/constrained/base_grammar_backend.py b/python/sglang/srt/constrained/base_grammar_backend.py index e298b3d0c..d1192685e 100644 --- a/python/sglang/srt/constrained/base_grammar_backend.py +++ b/python/sglang/srt/constrained/base_grammar_backend.py @@ -52,7 +52,7 @@ class BaseGrammarBackend: else: entry.value = self.init_value_impl(key) entry.event.set() - return entry.value.copy() + return entry.value.copy() if entry.value else None def init_value_impl(self, key: Tuple[str, str]) -> BaseGrammarObject: raise NotImplementedError() @@ -62,7 +62,8 @@ class BaseGrammarBackend: entry = self.cache.get(key) if not entry or not entry.event.is_set(): return None - return self.cache[key].value.copy() + val = self.cache[key].value + return val.copy() if val else None def get_future_value(self, key: Tuple[str, str]) -> Future: return self.executor.submit(self.init_value, key) diff --git a/python/sglang/srt/constrained/outlines_backend.py b/python/sglang/srt/constrained/outlines_backend.py index 16f32d93f..4999e8dbd 100644 --- a/python/sglang/srt/constrained/outlines_backend.py +++ b/python/sglang/srt/constrained/outlines_backend.py @@ -19,6 +19,7 @@ import json import logging from typing import Dict, List, Optional, Tuple, Union +import interegular import torch from outlines.fsm.guide import RegexGuide from outlines.models.transformers import TransformerTokenizer @@ -147,17 +148,22 @@ class OutlinesGrammarBackend(BaseGrammarBackend): key_string, whitespace_pattern=self.whitespace_pattern, ) - except NotImplementedError as e: + except (NotImplementedError, json.decoder.JSONDecodeError) as e: logger.warning( - f"skip invalid json schema: json_schema={key_string}, {e=}" + f"Skip invalid json_schema: json_schema={key_string}, {e=}" ) - return None, key_string + return None elif key_type == "regex": regex = key_string else: raise ValueError(f"Invalid key_type: {key_type}") - guide = RegexGuide(regex, self.outlines_tokenizer) + try: + guide = RegexGuide(regex, self.outlines_tokenizer) + except interegular.patterns.InvalidSyntax as e: + logger.warning(f"skip invalid regex schema: {regex=}, {e=}") + return None + if self.allow_jump_forward: jump_forward_map = OutlinesJumpForwardMap(regex) else: diff --git a/python/sglang/srt/constrained/xgrammar_backend.py b/python/sglang/srt/constrained/xgrammar_backend.py index d0416ec3d..c36ae00b4 100644 --- a/python/sglang/srt/constrained/xgrammar_backend.py +++ b/python/sglang/srt/constrained/xgrammar_backend.py @@ -15,6 +15,7 @@ limitations under the License. """Constrained decoding with xgrammar backend.""" +import logging from typing import List, Tuple import torch @@ -25,6 +26,9 @@ from sglang.srt.constrained.base_grammar_backend import ( BaseGrammarObject, ) +logger = logging.getLogger(__name__) + + MAX_ROLLBACK_TOKENS = 10 @@ -97,9 +101,20 @@ class XGrammarGrammarBackend(BaseGrammarBackend): def init_value_impl(self, key: Tuple[str, str]) -> XGrammarGrammar: key_type, key_string = key if key_type == "json": - ctx = self.grammar_cache.get_compiled_grammar_for_json_schema(key_string) + try: + ctx = self.grammar_cache.get_compiled_grammar_for_json_schema( + key_string + ) + except RuntimeError as e: + logging.warning( + f"Skip invalid json_schema: json_schema={key_string}, {e=}" + ) + return None elif key_type == "regex": - raise ValueError("regex hasn't been supported by xgrammar yet") + logger.warning( + "regex hasn't been supported by xgrammar yet. This is skipped." + ) + return None else: raise ValueError(f"Invalid key_type: {key_type}")