【OPS】qwen3-next support triton chunk_gated_delta_rule ops (#4070)

### What this PR does / why we need it?
qwen3-next suppot  triton chunk_gated_delta_rule ops

### co-owners
@OsirisDuan

- vLLM version: v0.11.2

Signed-off-by: shiyuan680 <917935075@qq.com>
This commit is contained in:
shiyuan680
2025-11-28 20:55:43 +08:00
committed by GitHub
parent 5447a039b9
commit 1c4a0468ee
13 changed files with 1625 additions and 149 deletions

View File

@@ -1,10 +1,7 @@
import vllm.model_executor.layers.fla.ops.chunk
import vllm.model_executor.layers.fla.ops.fused_recurrent
import vllm.model_executor.layers.fla.ops.layernorm_guard
import vllm.model_executor.layers.mamba.ops.causal_conv1d
from vllm_ascend.ops.triton.fla.fla import (LayerNormFn,
torch_chunk_gated_delta_rule)
from vllm_ascend.ops.triton.fla.chunk import chunk_gated_delta_rule
from vllm_ascend.ops.triton.fla.layernorm_guard import LayerNormFn
from vllm_ascend.ops.triton.fla.sigmoid_gating import \
fused_recurrent_gated_delta_rule_fwd_kernel
from vllm_ascend.ops.triton.mamba.casual_conv1d import (
@@ -14,4 +11,4 @@ vllm.model_executor.layers.mamba.ops.causal_conv1d.causal_conv1d_update = causal
vllm.model_executor.layers.mamba.ops.causal_conv1d.causal_conv1d_fn = causal_conv1d_fn
vllm.model_executor.layers.fla.ops.fused_recurrent.fused_recurrent_gated_delta_rule_fwd_kernel = fused_recurrent_gated_delta_rule_fwd_kernel
vllm.model_executor.layers.fla.ops.layernorm_guard.LayerNormFn = LayerNormFn
vllm.model_executor.layers.fla.ops.chunk.chunk_gated_delta_rule = torch_chunk_gated_delta_rule
vllm.model_executor.layers.fla.ops.chunk.chunk_gated_delta_rule = chunk_gated_delta_rule