diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py index 2b847de..1f22e56 100644 --- a/vllm_ascend/platform.py +++ b/vllm_ascend/platform.py @@ -96,7 +96,7 @@ class NPUPlatform(Platform): parallel_config.worker_cls = "vllm_ascend.worker.NPUWorker" cache_config = vllm_config.cache_config if cache_config and cache_config.block_size is None: - cache_config.block_size = 16 + cache_config.block_size = 128 @classmethod def get_attn_backend_cls(cls, selected_backend, head_size, dtype,