[Core] Support the features of prefix cache and chunked prefill in v0/v1 (#782)

### What this PR does / why we need it?
Support the features of prefix cache and chunked prefill in v0/v1.

---------

Signed-off-by: rjg-lyh <1318825571@qq.com>
This commit is contained in:
rjg-lyh
2025-05-09 16:39:28 +08:00
committed by GitHub
parent 324f819b92
commit fa99f89e93
6 changed files with 156 additions and 32 deletions

View File

@@ -175,11 +175,11 @@ class NPUPlatform(Platform):
if cache_config:
if cache_config.block_size is None:
cache_config.block_size = 128
if envs.VLLM_USE_V1 and cache_config.enable_prefix_caching:
if cache_config.enable_prefix_caching and cache_config.block_size != 128:
logger.warning(
"Prefix caching is not supported for V1 now, disable prefix caching"
"If prefix caching is enabled, block size must be set to 128."
)
cache_config.enable_prefix_caching = False
cache_config.block_size = 128
if envs.VLLM_USE_V1:
# Activate custom ops for v1.