feat: disable grammar restrictions within reasoning sections (#4984)
Co-authored-by: tianhaoyu <thy@mail.ecust.edu.cn> Co-authored-by: DarkSharpness <2040703891@qq.com>
This commit is contained in:
@@ -28,6 +28,18 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaseGrammarObject(ABC):
|
||||
|
||||
def __init__(self):
|
||||
self._finished = False
|
||||
|
||||
@property
|
||||
def finished(self):
|
||||
return self._finished
|
||||
|
||||
@finished.setter
|
||||
def finished(self, finished):
|
||||
self._finished = finished
|
||||
|
||||
@abstractmethod
|
||||
def try_jump_forward(self, tokenizer) -> Optional[Tuple[List[int], str]]:
|
||||
"""
|
||||
@@ -59,6 +71,13 @@ class BaseGrammarObject(ABC):
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def accept_token(self, token: int) -> None:
|
||||
"""
|
||||
Accept a token in the grammar.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def allocate_vocab_mask(
|
||||
self, vocab_size: int, batch_size: int, device
|
||||
@@ -90,7 +109,7 @@ class CacheEntry:
|
||||
event: Event
|
||||
|
||||
|
||||
class BaseGrammarBackend(ABC):
|
||||
class BaseGrammarBackend:
|
||||
def __init__(self):
|
||||
self.executor = ThreadPoolExecutor()
|
||||
self.cache: Dict[Tuple[str, str], CacheEntry] = {}
|
||||
@@ -107,19 +126,15 @@ class BaseGrammarBackend(ABC):
|
||||
"""
|
||||
raise ValueError(f"Invalid key_type: {key_type}={key_string}")
|
||||
|
||||
@abstractmethod
|
||||
def dispatch_json(self, key_string: str) -> Optional[BaseGrammarObject]:
|
||||
return self._not_supported("json", key_string)
|
||||
|
||||
@abstractmethod
|
||||
def dispatch_regex(self, key_string: str) -> Optional[BaseGrammarObject]:
|
||||
return self._not_supported("regex", key_string)
|
||||
|
||||
@abstractmethod
|
||||
def dispatch_ebnf(self, key_string: str) -> Optional[BaseGrammarObject]:
|
||||
return self._not_supported("ebnf", key_string)
|
||||
|
||||
@abstractmethod
|
||||
def dispatch_structural_tag(self, key_string: str) -> Optional[BaseGrammarObject]:
|
||||
return self._not_supported("structural_tag", key_string)
|
||||
|
||||
@@ -195,4 +210,10 @@ def create_grammar_backend(
|
||||
else:
|
||||
raise ValueError(f"Invalid grammar backend: {server_args.grammar_backend}")
|
||||
|
||||
if server_args.reasoning_parser and hasattr(tokenizer, "think_end_id"):
|
||||
from .reasoner_grammar_backend import ReasonerGrammarBackend
|
||||
|
||||
grammar_backend = ReasonerGrammarBackend(
|
||||
grammar_backend, tokenizer.think_end_id
|
||||
)
|
||||
return grammar_backend
|
||||
|
||||
@@ -33,6 +33,7 @@ class GuidanceGrammar(BaseGrammarObject):
|
||||
def __init__(
|
||||
self, llguidance_tokenizer: llguidance.LLTokenizer, serialized_grammar: str
|
||||
):
|
||||
super().__init__()
|
||||
self.llguidance_tokenizer = llguidance_tokenizer
|
||||
self.serialized_grammar = serialized_grammar
|
||||
|
||||
|
||||
@@ -44,6 +44,7 @@ class OutlinesGrammar(BaseGrammarObject):
|
||||
guide: RegexGuide,
|
||||
jump_forward_map: Union[OutlinesJumpForwardMap, None],
|
||||
) -> None:
|
||||
super().__init__()
|
||||
self.guide = guide
|
||||
self.jump_forward_map = jump_forward_map
|
||||
self.state = 0
|
||||
|
||||
101
python/sglang/srt/constrained/reasoner_grammar_backend.py
Normal file
101
python/sglang/srt/constrained/reasoner_grammar_backend.py
Normal file
@@ -0,0 +1,101 @@
|
||||
# Copyright 2023-2024 SGLang Team
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""The baseclass of a backend for reasoner grammar-guided constrained decoding."""
|
||||
|
||||
from concurrent.futures import Future
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import torch
|
||||
|
||||
from .base_grammar_backend import BaseGrammarBackend, BaseGrammarObject
|
||||
|
||||
|
||||
class ReasonerGrammarObject(BaseGrammarObject):
|
||||
def __init__(self, grammar: BaseGrammarObject, think_end_id):
|
||||
super().__init__()
|
||||
self.grammar = grammar
|
||||
self.think_end_id = think_end_id
|
||||
self.is_in_reasoning = True
|
||||
|
||||
@property
|
||||
def finished(self):
|
||||
return self.grammar.finished
|
||||
|
||||
@finished.setter
|
||||
def finished(self, finished):
|
||||
self.grammar.finished = finished
|
||||
|
||||
def allocate_vocab_mask(
|
||||
self, vocab_size: int, batch_size: int, device
|
||||
) -> torch.Tensor:
|
||||
return self.grammar.allocate_vocab_mask(vocab_size, batch_size, device)
|
||||
|
||||
def fill_vocab_mask(self, vocab_mask: torch.Tensor, idx: int) -> None:
|
||||
if not self.is_in_reasoning:
|
||||
self.grammar.fill_vocab_mask(vocab_mask, idx)
|
||||
|
||||
def move_vocab_mask(self, vocab_mask: torch.Tensor, device) -> torch.Tensor:
|
||||
return self.grammar.move_vocab_mask(vocab_mask, device)
|
||||
|
||||
@property
|
||||
def apply_vocab_mask(self):
|
||||
return self.grammar.apply_vocab_mask
|
||||
|
||||
def accept_token(self, token: int):
|
||||
if token == self.think_end_id:
|
||||
self.is_in_reasoning = False
|
||||
|
||||
if not self.is_in_reasoning and token != self.think_end_id:
|
||||
self.grammar.accept_token(token)
|
||||
|
||||
def try_jump_forward(self, tokenizer):
|
||||
return self.grammar.try_jump_forward(tokenizer)
|
||||
|
||||
def jump_forward_str_state(self, helper):
|
||||
return self.grammar.jump_forward_str_state(helper)
|
||||
|
||||
def jump_and_retokenize(
|
||||
self, old_output_ids: List[int], new_output_ids: List[int], next_state: int
|
||||
):
|
||||
return self.grammar.jump_and_retokenize(
|
||||
old_output_ids, new_output_ids, next_state
|
||||
)
|
||||
|
||||
def copy(self) -> BaseGrammarObject:
|
||||
return ReasonerGrammarObject(self.grammar.copy(), self.think_end_id)
|
||||
|
||||
|
||||
class ReasonerGrammarBackend(BaseGrammarBackend):
|
||||
def __init__(self, grammar_backend: BaseGrammarBackend, think_end_id):
|
||||
self.grammar_backend = grammar_backend
|
||||
self.think_end_id = think_end_id
|
||||
|
||||
def get_cached_value(self, key: Tuple[str, str]) -> Optional[ReasonerGrammarObject]:
|
||||
grammar = self.grammar_backend.get_cached_value(key)
|
||||
return ReasonerGrammarObject(grammar, self.think_end_id) if grammar else None
|
||||
|
||||
def get_future_value(self, key: Tuple[str, str]) -> Future:
|
||||
grammar = Future()
|
||||
|
||||
def callback(f: Future):
|
||||
if result := f.result():
|
||||
grammar.set_result(ReasonerGrammarObject(result, self.think_end_id))
|
||||
else:
|
||||
grammar.set_result(None)
|
||||
|
||||
self.grammar_backend.get_future_value(key).add_done_callback(callback)
|
||||
return grammar
|
||||
|
||||
def reset(self):
|
||||
self.grammar_backend.reset()
|
||||
@@ -48,6 +48,7 @@ class XGrammarGrammar(BaseGrammarObject):
|
||||
ctx: CompiledGrammar,
|
||||
override_stop_tokens: Optional[Union[List[int], int]],
|
||||
) -> None:
|
||||
super().__init__()
|
||||
self.matcher = matcher
|
||||
self.vocab_size = vocab_size
|
||||
self.ctx = ctx
|
||||
|
||||
@@ -113,6 +113,7 @@ from sglang.srt.mem_cache.hiradix_cache import HiRadixCache
|
||||
from sglang.srt.mem_cache.radix_cache import RadixCache
|
||||
from sglang.srt.metrics.collector import SchedulerMetricsCollector, SchedulerStats
|
||||
from sglang.srt.model_executor.forward_batch_info import ForwardMode
|
||||
from sglang.srt.reasoning_parser import ReasoningParser
|
||||
from sglang.srt.server_args import PortArgs, ServerArgs
|
||||
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
|
||||
from sglang.srt.torch_memory_saver_adapter import TorchMemorySaverAdapter
|
||||
@@ -232,6 +233,15 @@ class Scheduler(
|
||||
# Init tokenizer
|
||||
self.init_tokenizer()
|
||||
|
||||
# Set reasoning_parser and think_end_id if --reasoning_parser is enabled
|
||||
if self.server_args.reasoning_parser and self.tokenizer:
|
||||
reasoning_parser = ReasoningParser(
|
||||
model_type=self.server_args.reasoning_parser, stream_reasoning=False
|
||||
)
|
||||
self.tokenizer.think_end_id = self.tokenizer.encode(
|
||||
reasoning_parser.detector.think_end_token, add_special_tokens=False
|
||||
)[0]
|
||||
|
||||
# Check whether overlap can be enabled
|
||||
if not self.is_generation:
|
||||
self.enable_overlap = False
|
||||
|
||||
Reference in New Issue
Block a user