[Enhancement] Add padding for ACL Graph (#803)

### What this PR does / why we need it?
Add padding for ACL Graph and refactor graph batch size adjustments to
utils.py

---------

Signed-off-by: Yizhou Liu <liu_yizhou@outlook.com>
This commit is contained in:
yiz-liu
2025-05-12 20:26:22 +08:00
committed by GitHub
parent efabd722eb
commit 701b0fd95e
4 changed files with 97 additions and 79 deletions

View File

@@ -25,6 +25,8 @@ from vllm.logger import logger
from vllm.platforms import Platform, PlatformEnum
from vllm.utils import supports_dynamo
from vllm_ascend.utils import update_aclgraph_sizes
CUSTOM_OP_ENABLED = False
try:
# register custom ops into torch_library here
@@ -144,6 +146,7 @@ class NPUPlatform(Platform):
compilation_config.use_inductor = False
compilation_config.splitting_ops.extend(
["vllm.unified_ascend_attention_with_output"])
update_aclgraph_sizes(vllm_config)
if vllm_config.additional_config is not None:
enable_graph_mode = vllm_config.additional_config.get(