[Refactor] cleanup converting_weight_acl_format_format (#2482)

move maybe_converting_weight_acl_format_format to torchair module, it's only used with 310p+torchair - vLLM version: v0.10.1.1 - vLLM main: 49ab23b3cc Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-08-25 19:48:55 +08:00
parent 0f81e032f0
commit de7649492d
6 changed files with 75 additions and 108 deletions
--- a/vllm_ascend/utils.py
+++ b/vllm_ascend/utils.py
@@ -32,7 +32,6 @@ from torch_npu.npu.streams import Event
 from vllm.logger import logger

 import vllm_ascend.envs as envs_ascend
-from vllm_ascend.ascend_config import get_ascend_config

 if TYPE_CHECKING:
    from vllm.config import VllmConfig
@@ -170,28 +169,6 @@ def aligned_16(tensor: torch.Tensor):
    return new_tensor


-def maybe_converting_weight_acl_format(model, format=ACL_FORMAT_FRACTAL_NZ):
-    # currently, there are some operations which do not support ACL_FORMAT_FRACTAL_NZ
-    # in eager mode but support it in torchair graph mode. since ACL_FORMAT_FRACTAL_NZ
-    # is much more preferred than ACL_FORMAT_FRACTAL_ND on 300I Duo, we add this
-    # conversion when using torchair graph mode on 300I Duo platform.
-    # TODO: we will remove this conversion if npu_quant_grouped_matmul_dequant
-    # accepts weight format of ACL_FORMAT_FRACTAL_NZ in eager mode.
-    from vllm.model_executor.layers.fused_moe.layer import FusedMoE
-
-    use_torchair = get_ascend_config().torchair_graph_config.enabled
-    if not is_310p() or not use_torchair:
-        return
-    for module in model.modules():
-        if isinstance(module, FusedMoE):
-            if torch_npu.get_npu_format(module.w13_weight.data) == format:
-                return
-            module.w13_weight.data = torch_npu.npu_format_cast(
-                module.w13_weight.data, format)
-            module.w2_weight.data = torch_npu.npu_format_cast(
-                module.w2_weight.data, format)
-
-
 def try_register_lib(lib_name: str, lib_info: str = ""):
    import importlib
    import importlib.util