[CustomOp] Register RotaryEmbedding instead of overwrite forward (#2385)

### What this PR does / why we need it? Register RotaryEmbedding instead of overwrite forward ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? CI passed with new added/existing test. - vLLM version: v0.10.0 - vLLM main: 808d2e9aa0 --------- Signed-off-by: Icey <1790571317@qq.com> Signed-off-by: wxsIcey <1790571317@qq.com>
2025-08-25 09:32:35 +08:00
parent 950c4b219a
commit f796e6280b
6 changed files with 426 additions and 381 deletions
--- a/vllm_ascend/ops/init.py
+++ b/vllm_ascend/ops/init.py
@@ -17,12 +17,13 @@

 import torch

-import vllm_ascend.ops.activation  # noqa
 import vllm_ascend.ops.common_fused_moe  # noqa
 import vllm_ascend.ops.fused_moe  # noqa
 import vllm_ascend.ops.layernorm  # noqa
-import vllm_ascend.ops.rotary_embedding  # noqa
 import vllm_ascend.ops.vocab_parallel_embedding  # noqa
+from vllm_ascend.ops.activation import AscendQuickGELU, AscendSiluAndMul
+from vllm_ascend.ops.rotary_embedding import (
+    AscendDeepseekScalingRotaryEmbedding, AscendRotaryEmbedding)


 class dummyFusionOp:
@@ -47,3 +48,9 @@ def register_dummy_fusion_op() -> None:
        name="fused_add_rms_norm_static_fp8_quant")
    torch.ops._C.rms_norm_dynamic_per_token_quant = dummyFusionOp(
        name="rms_norm_dynamic_per_token_quant")
+
+
+__all__ = [
+    "AscendQuickGELU", "AscendSiluAndMul", "AscendRotaryEmbedding",
+    "AscendDeepseekScalingRotaryEmbedding"
+]