[Enhancement] Add padding for ACL Graph (#803)
### What this PR does / why we need it? Add padding for ACL Graph and refactor graph batch size adjustments to utils.py --------- Signed-off-by: Yizhou Liu <liu_yizhou@outlook.com>
This commit is contained in:
@@ -25,6 +25,8 @@ from vllm.logger import logger
|
||||
from vllm.platforms import Platform, PlatformEnum
|
||||
from vllm.utils import supports_dynamo
|
||||
|
||||
from vllm_ascend.utils import update_aclgraph_sizes
|
||||
|
||||
CUSTOM_OP_ENABLED = False
|
||||
try:
|
||||
# register custom ops into torch_library here
|
||||
@@ -144,6 +146,7 @@ class NPUPlatform(Platform):
|
||||
compilation_config.use_inductor = False
|
||||
compilation_config.splitting_ops.extend(
|
||||
["vllm.unified_ascend_attention_with_output"])
|
||||
update_aclgraph_sizes(vllm_config)
|
||||
|
||||
if vllm_config.additional_config is not None:
|
||||
enable_graph_mode = vllm_config.additional_config.get(
|
||||
|
||||
Reference in New Issue
Block a user