reset default block_size from 16 to 128 (#84)
### What this PR does / why we need it? Changed default block_size in platform.py from 16 to 128, as Ascend Devices have a better affinity for block size 128. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed Signed-off-by: hzji210@gmail.com <hzji210@gmail.com>
This commit is contained in:
@@ -96,7 +96,7 @@ class NPUPlatform(Platform):
|
|||||||
parallel_config.worker_cls = "vllm_ascend.worker.NPUWorker"
|
parallel_config.worker_cls = "vllm_ascend.worker.NPUWorker"
|
||||||
cache_config = vllm_config.cache_config
|
cache_config = vllm_config.cache_config
|
||||||
if cache_config and cache_config.block_size is None:
|
if cache_config and cache_config.block_size is None:
|
||||||
cache_config.block_size = 16
|
cache_config.block_size = 128
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_attn_backend_cls(cls, selected_backend, head_size, dtype,
|
def get_attn_backend_cls(cls, selected_backend, head_size, dtype,
|
||||||
|
|||||||
Reference in New Issue
Block a user