Support chunked prefill when radix cache is disabled (#811)

This commit is contained in:
Liangsheng Yin
2024-08-01 00:29:01 -07:00
committed by GitHub
parent ca600e8cd6
commit c020f9ceda
9 changed files with 163 additions and 26 deletions

View File

@@ -419,10 +419,6 @@ class ServerArgs:
self.dp_size > 1 and self.node_rank is not None
), "multi-node data parallel is not supported"
assert not (
self.chunked_prefill_size is not None and self.disable_radix_cache
), "chunked prefill is not supported with radix cache disabled currently"
@dataclasses.dataclass
class PortArgs: