Fix grammar backend (#2018)

2024-11-12 21:17:38 -08:00
parent 125b1199c5
commit ba069a24d3
13 changed files with 401 additions and 434 deletions
--- a/python/sglang/srt/managers/schedule_batch.py
+++ b/python/sglang/srt/managers/schedule_batch.py
@@ -37,7 +37,6 @@ import torch

 from sglang.global_config import global_config
 from sglang.srt.configs.model_config import ModelConfig
-from sglang.srt.constrained.grammar import Grammar
 from sglang.srt.mem_cache.base_prefix_cache import BasePrefixCache
 from sglang.srt.mem_cache.chunk_cache import ChunkCache
 from sglang.srt.mem_cache.memory_pool import BaseTokenToKVPool, ReqToTokenPool
@@ -249,7 +248,7 @@ class Req:
        self.embedding = None

        # Constrained decoding
-        self.grammar: Optional[Grammar] = None
+        self.grammar = None

        # The number of cached tokens, that were already cached in the KV cache
        self.cached_tokens = 0
@@ -359,8 +358,6 @@ class Req:
                    return

    def jump_forward_and_retokenize(self, jump_forward_str, next_state):
-        assert self.grammar is not None and self.tokenizer is not None
-
        if self.origin_input_text is None:
            # Recovering text can only use unpadded ids
            self.origin_input_text = self.tokenizer.decode(
@@ -809,9 +806,10 @@ class ScheduleBatch:

        for i, req in enumerate(self.reqs):
            if req.grammar is not None:
-                jump_helper = req.grammar.try_jump(req.tokenizer)
-                if jump_helper.can_jump():
-                    suffix_ids = jump_helper.suffix_ids
+                jump_helper = req.grammar.try_jump_forward(req.tokenizer)
+                if jump_helper:
+                    suffix_ids, _ = jump_helper
+
                    # Current ids, for cache and revert
                    cur_all_ids = tuple(req.origin_input_ids + req.output_ids)[:-1]
                    cur_output_ids = req.output_ids
@@ -827,6 +825,8 @@ class ScheduleBatch:
                        next_state,
                    ) = req.grammar.jump_forward_str_state(jump_helper)

+                    # Make the incrementally decoded text part of jump_forward_str
+                    # so that the UTF-8 will not corrupt
                    jump_forward_str = new_text + jump_forward_str
                    if not req.jump_forward_and_retokenize(
                        jump_forward_str, next_state