Fix grammar backend (#2018)
This commit is contained in:
@@ -37,7 +37,6 @@ import torch
|
||||
|
||||
from sglang.global_config import global_config
|
||||
from sglang.srt.configs.model_config import ModelConfig
|
||||
from sglang.srt.constrained.grammar import Grammar
|
||||
from sglang.srt.mem_cache.base_prefix_cache import BasePrefixCache
|
||||
from sglang.srt.mem_cache.chunk_cache import ChunkCache
|
||||
from sglang.srt.mem_cache.memory_pool import BaseTokenToKVPool, ReqToTokenPool
|
||||
@@ -249,7 +248,7 @@ class Req:
|
||||
self.embedding = None
|
||||
|
||||
# Constrained decoding
|
||||
self.grammar: Optional[Grammar] = None
|
||||
self.grammar = None
|
||||
|
||||
# The number of cached tokens, that were already cached in the KV cache
|
||||
self.cached_tokens = 0
|
||||
@@ -359,8 +358,6 @@ class Req:
|
||||
return
|
||||
|
||||
def jump_forward_and_retokenize(self, jump_forward_str, next_state):
|
||||
assert self.grammar is not None and self.tokenizer is not None
|
||||
|
||||
if self.origin_input_text is None:
|
||||
# Recovering text can only use unpadded ids
|
||||
self.origin_input_text = self.tokenizer.decode(
|
||||
@@ -809,9 +806,10 @@ class ScheduleBatch:
|
||||
|
||||
for i, req in enumerate(self.reqs):
|
||||
if req.grammar is not None:
|
||||
jump_helper = req.grammar.try_jump(req.tokenizer)
|
||||
if jump_helper.can_jump():
|
||||
suffix_ids = jump_helper.suffix_ids
|
||||
jump_helper = req.grammar.try_jump_forward(req.tokenizer)
|
||||
if jump_helper:
|
||||
suffix_ids, _ = jump_helper
|
||||
|
||||
# Current ids, for cache and revert
|
||||
cur_all_ids = tuple(req.origin_input_ids + req.output_ids)[:-1]
|
||||
cur_output_ids = req.output_ids
|
||||
@@ -827,6 +825,8 @@ class ScheduleBatch:
|
||||
next_state,
|
||||
) = req.grammar.jump_forward_str_state(jump_helper)
|
||||
|
||||
# Make the incrementally decoded text part of jump_forward_str
|
||||
# so that the UTF-8 will not corrupt
|
||||
jump_forward_str = new_text + jump_forward_str
|
||||
if not req.jump_forward_and_retokenize(
|
||||
jump_forward_str, next_state
|
||||
|
||||
Reference in New Issue
Block a user