[Triton][Config] Add muls_add triton kernel and refactor AscendCompilationConfig (#5518)

### What this PR does / why we need it? Add muls_add triton kernel with related fusion pass. What's more, this PR refactors `AscendCompilationConfig` and delete `NpugraphExConfig`. ### Does this PR introduce _any_ user-facing change? None ### How was this patch tested? CI passed with new added test. - vLLM version: v0.13.0 - vLLM main: 45c1ca1ca1 --------- Signed-off-by: whx-sjtu <2952154980@qq.com>
2026-03-02 17:54:25 +08:00
parent 8547520726
commit 16c879cdf7
14 changed files with 290 additions and 98 deletions
--- a/vllm_ascend/ops/register_custom_ops.py
+++ b/vllm_ascend/ops/register_custom_ops.py
@@ -15,6 +15,7 @@ from vllm.utils.torch_utils import direct_register_custom_op

 from vllm_ascend.ascend_forward_context import MoECommType
 from vllm_ascend.ops.rotary_embedding import rope_forward_oot
+from vllm_ascend.ops.triton.muls_add import muls_add_triton
 from vllm_ascend.ops.weight_prefetch import maybe_npu_prefetch
 from vllm_ascend.utils import npu_stream_switch, prefetch_stream

@@ -201,6 +202,14 @@ def _rope_forward_oot_impl_fake(
    return query, key


+def _muls_add_impl_fake(
+    x: torch.Tensor,
+    y: torch.Tensor,
+    scale: float,
+) -> torch.Tensor:
+    return torch.empty_like(x)
+
+
 direct_register_custom_op(
    op_name="maybe_chunk_residual",
    op_func=_maybe_chunk_residual_impl,
@@ -272,3 +281,11 @@ direct_register_custom_op(
    mutates_args=[],
    dispatch_key="PrivateUse1",
 )
+
+direct_register_custom_op(
+    op_name="muls_add",
+    op_func=muls_add_triton,
+    fake_impl=_muls_add_impl_fake,
+    mutates_args=[],
+    dispatch_key="PrivateUse1",
+)