[Core] Support the features of prefix cache and chunked prefill in v0/v1 (#782)
### What this PR does / why we need it? Support the features of prefix cache and chunked prefill in v0/v1. --------- Signed-off-by: rjg-lyh <1318825571@qq.com>
This commit is contained in:
@@ -175,11 +175,11 @@ class NPUPlatform(Platform):
|
||||
if cache_config:
|
||||
if cache_config.block_size is None:
|
||||
cache_config.block_size = 128
|
||||
if envs.VLLM_USE_V1 and cache_config.enable_prefix_caching:
|
||||
if cache_config.enable_prefix_caching and cache_config.block_size != 128:
|
||||
logger.warning(
|
||||
"Prefix caching is not supported for V1 now, disable prefix caching"
|
||||
"If prefix caching is enabled, block size must be set to 128."
|
||||
)
|
||||
cache_config.enable_prefix_caching = False
|
||||
cache_config.block_size = 128
|
||||
|
||||
if envs.VLLM_USE_V1:
|
||||
# Activate custom ops for v1.
|
||||
|
||||
Reference in New Issue
Block a user