[FEAT] JSON constrained support (#1125)

Co-authored-by: Yineng Zhang <me@zhyncs.com>
This commit is contained in:
havetc
2024-08-26 18:37:26 +02:00
committed by GitHub
parent c5fe11a8e1
commit 9935f97b3e
10 changed files with 147 additions and 3 deletions

View File

@@ -15,6 +15,8 @@ limitations under the License.
"""Cache for the compressed finite state machine."""
from outlines.fsm.json_schema import build_regex_from_schema
from sglang.srt.constrained import RegexGuide, TransformerTokenizer
from sglang.srt.constrained.base_tool_cache import BaseToolCache
@@ -26,9 +28,12 @@ class FSMCache(BaseToolCache):
tokenizer_args_dict,
enable=True,
skip_tokenizer_init=False,
json_schema_mode=False,
):
super().__init__(enable=enable)
self.json_schema_mode = json_schema_mode
if (
skip_tokenizer_init
or tokenizer_path.endswith(".json")
@@ -72,5 +77,9 @@ class FSMCache(BaseToolCache):
tokenizer_path, **tokenizer_args_dict
)
def init_value(self, regex):
return RegexGuide(regex, self.outlines_tokenizer)
def init_value(self, value):
if self.json_schema_mode:
regex = build_regex_from_schema(value)
return RegexGuide(regex, self.outlines_tokenizer), regex
else:
return RegexGuide(value, self.outlines_tokenizer)

View File

@@ -23,6 +23,7 @@ from collections import defaultdict
import interegular
import outlines.caching
from outlines.fsm.json_schema import build_regex_from_schema
from sglang.srt.constrained import (
FSMInfo,