Add patch_qwen3_5 for triton ops fused_recurrent_gated_delta_rule (#7109)

### What this PR does / why we need it? The ops `torch_npu.npu_recurrent_gated_delta_rule` currently does not support `ssm_state` inputs in float32 format, we temporarily retain the _forward_core implementation with triton for Qwen3_5 --------- Signed-off-by: pppeng <zepengliu912@qq.com> Signed-off-by: pppeng <60355449+ppppeng@users.noreply.github.com>
2026-03-10 23:28:58 +08:00
parent a78a00e0b1
commit 0f289fa2a8
4 changed files with 275 additions and 0 deletions
--- a/vllm_ascend/patch/worker/init.py
+++ b/vllm_ascend/patch/worker/init.py
@@ -17,9 +17,14 @@

 from vllm.triton_utils import HAS_TRITON

+from vllm_ascend.utils import vllm_version_is
+
 if HAS_TRITON:
    import vllm_ascend.patch.worker.patch_triton

+if not vllm_version_is("v0.16.0"):
+    import vllm_ascend.patch.worker.patch_qwen3_5  # noqa
+
 # isort: off
 import vllm_ascend.patch.platform.patch_sched_yield  # noqa
 import vllm_ascend.patch.worker.patch_unquantized_gemm  # noqa