Support chunked prefill when radix cache is disabled (#811)

This commit is contained in:
Liangsheng Yin
2024-08-01 00:29:01 -07:00
committed by GitHub
parent ca600e8cd6
commit c020f9ceda
9 changed files with 163 additions and 26 deletions

View File

@@ -13,12 +13,12 @@ See the License for the specific language governing permissions and
limitations under the License.
"""
"""Base cache class."""
"""Base tool cache for constrained decoding tools."""
import time
class BaseCache:
class BaseToolCache:
def __init__(self, enable=True):
self.enable = enable
self.reset()

View File

@@ -16,10 +16,10 @@ limitations under the License.
"""Cache for the compressed finite state machine."""
from sglang.srt.constrained import RegexGuide, TransformerTokenizer
from sglang.srt.constrained.base_cache import BaseCache
from sglang.srt.constrained.base_tool_cache import BaseToolCache
class FSMCache(BaseCache):
class FSMCache(BaseToolCache):
def __init__(self, tokenizer_path, tokenizer_args_dict, enable=True):
super().__init__(enable=enable)

View File

@@ -30,7 +30,7 @@ from sglang.srt.constrained import (
make_byte_level_fsm,
make_deterministic_fsm,
)
from sglang.srt.constrained.base_cache import BaseCache
from sglang.srt.constrained.base_tool_cache import BaseToolCache
IP_REGEX = r"((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)"
@@ -151,7 +151,7 @@ class JumpForwardMap:
)
class JumpForwardCache(BaseCache):
class JumpForwardCache(BaseToolCache):
def __init__(self):
super().__init__()