Support chunked prefill when radix cache is disabled (#811)
This commit is contained in:
@@ -13,12 +13,12 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
"""Base cache class."""
|
||||
"""Base tool cache for constrained decoding tools."""
|
||||
|
||||
import time
|
||||
|
||||
|
||||
class BaseCache:
|
||||
class BaseToolCache:
|
||||
def __init__(self, enable=True):
|
||||
self.enable = enable
|
||||
self.reset()
|
||||
@@ -16,10 +16,10 @@ limitations under the License.
|
||||
"""Cache for the compressed finite state machine."""
|
||||
|
||||
from sglang.srt.constrained import RegexGuide, TransformerTokenizer
|
||||
from sglang.srt.constrained.base_cache import BaseCache
|
||||
from sglang.srt.constrained.base_tool_cache import BaseToolCache
|
||||
|
||||
|
||||
class FSMCache(BaseCache):
|
||||
class FSMCache(BaseToolCache):
|
||||
def __init__(self, tokenizer_path, tokenizer_args_dict, enable=True):
|
||||
super().__init__(enable=enable)
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ from sglang.srt.constrained import (
|
||||
make_byte_level_fsm,
|
||||
make_deterministic_fsm,
|
||||
)
|
||||
from sglang.srt.constrained.base_cache import BaseCache
|
||||
from sglang.srt.constrained.base_tool_cache import BaseToolCache
|
||||
|
||||
IP_REGEX = r"((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)"
|
||||
|
||||
@@ -151,7 +151,7 @@ class JumpForwardMap:
|
||||
)
|
||||
|
||||
|
||||
class JumpForwardCache(BaseCache):
|
||||
class JumpForwardCache(BaseToolCache):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user