[1/N][CustomOp] Register activation customop instead of overwrite forward_oot (#1841)

### What this PR does / why we need it? We'll refator `CustomOp` in vllm-ascend from this pr on. Use function `CustomOp.register_oot` to achieve the customop registery, taking `AscendQuickGELU` as an example: ```python from vllm_ascend.ops.activation import AscendQuickGELU CustomOp.register_oot(_decorated_op_cls=AscendQuickGELU, name="QuickGELU") ``` This is a quick adapt for `CustomOp.register_oot` mechanism from vllm 0.9.2. For further step, we can remove inherit from `QuickGELU` can write our own `QuickGELU` at all. Part of https://github.com/vllm-project/vllm-ascend/pull/1647 - vLLM version: v0.9.2 - vLLM main: 8dfb45ca33 --------- Signed-off-by: MengqingCao <cmq0113@163.com>
2025-07-18 23:07:14 +08:00
parent 8a91e6e59c
commit 574fe407eb
8 changed files with 154 additions and 22 deletions
--- a/vllm_ascend/ops/activation.py
+++ b/vllm_ascend/ops/activation.py
@@ -18,25 +18,25 @@
 import torch
 from vllm.model_executor.layers.activation import QuickGELU, SiluAndMul

-from vllm_ascend.utils import is_310p
+
+class AscendQuickGELU(QuickGELU):
+
+    def forward_oot(self, x: torch.tensor) -> torch.Tensor:
+        import torch_npu
+
+        out = torch_npu.npu_fast_gelu(x)
+        return out


-def silu_and_mul_forward_oot(self, x: torch.Tensor) -> torch.Tensor:
-    import torch_npu
+class AscendSiluAndMul(SiluAndMul):

-    if is_310p():
-        out = torch_npu.npu_swiglu(x.to(torch.float32)).to(torch.float16)
-    else:
-        out = torch_npu.npu_swiglu(x)
-    return out
+    def forward_oot(self, x: torch.Tensor) -> torch.Tensor:
+        import torch_npu

+        from vllm_ascend.utils import is_310p

-def quick_gelu_forward_oot(self, x: torch.tensor) -> torch.Tensor:
-    import torch_npu
-
-    out = torch_npu.npu_fast_gelu(x)
-    return out
-
-
-QuickGELU.forward_oot = quick_gelu_forward_oot
-SiluAndMul.forward_oot = silu_and_mul_forward_oot
+        if is_310p():
+            out = torch_npu.npu_swiglu(x.to(torch.float32)).to(torch.float16)
+        else:
+            out = torch_npu.npu_swiglu(x)
+        return out
--- a/vllm_ascend/platform.py
+++ b/vllm_ascend/platform.py
@@ -29,7 +29,7 @@ from vllm.platforms import Platform, PlatformEnum
 from vllm_ascend.ascend_config import (check_ascend_config, get_ascend_config,
                                       init_ascend_config)
 from vllm_ascend.utils import (ASCEND_QUATIZATION_METHOD, is_310p,
-                               update_aclgraph_sizes)
+                               register_ascend_customop, update_aclgraph_sizes)

 if TYPE_CHECKING:
    from vllm.config import ModelConfig, VllmConfig
@@ -205,6 +205,9 @@ class NPUPlatform(Platform):
                ascend_config.ascend_scheduler_config)
            vllm_config.scheduler_config = ascend_scheduler_config

+        # register Ascend CustomOp
+        register_ascend_customop()
+
    @classmethod
    def get_attn_backend_cls(cls, selected_backend, head_size, dtype,
                             kv_cache_dtype, block_size, use_v1, use_mla):
--- a/vllm_ascend/utils.py
+++ b/vllm_ascend/utils.py
@@ -561,3 +561,26 @@ def delete_torchair_cache_file():
    torch_air_abs_path = get_torchair_current_work_dir()
    if os.path.exists(torch_air_abs_path):
        shutil.rmtree(torch_air_abs_path)
+
+
+_ASCEND_CUSTOMOP_IS_REIGISTERED = False
+
+
+def register_ascend_customop():
+    """Register Ascend CustomOP
+
+    NOTE: if the register branch requires model type, please use `vllm.config.get_current_vllm_config`, 
+    and ensure this will execute after model config is initilazed.
+    """
+    global _ASCEND_CUSTOMOP_IS_REIGISTERED
+    if _ASCEND_CUSTOMOP_IS_REIGISTERED:
+        return
+    from vllm.model_executor.custom_op import CustomOp
+
+    from vllm_ascend.ops.activation import AscendQuickGELU, AscendSiluAndMul
+    CustomOp.register_oot(_decorated_op_cls=AscendQuickGELU, name="QuickGELU")
+    CustomOp.register_oot(_decorated_op_cls=AscendSiluAndMul,
+                          name="SiluAndMul")
+
+    # NOTE: Keep this at last to ensure all custom actions are registered
+    _ASCEND_CUSTOMOP_IS_REIGISTERED = True