[Model] Add qwen3Next support in Main (#4596)

### What this PR does / why we need it? Add Qwen3Next support in main ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.11.2 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.2 --------- Signed-off-by: SunnyLee219 <3294305115@qq.com>
2025-12-03 14:17:37 +08:00
parent 3f81c4bb25
commit 38bd95229f
4 changed files with 7 additions and 3 deletions
--- a/vllm_ascend/patch/worker/patch_triton.py
+++ b/vllm_ascend/patch/worker/patch_triton.py
@@ -11,4 +11,4 @@ vllm.model_executor.layers.mamba.ops.causal_conv1d.causal_conv1d_update = causal
 vllm.model_executor.layers.mamba.ops.causal_conv1d.causal_conv1d_fn = causal_conv1d_fn
 vllm.model_executor.layers.fla.ops.fused_recurrent.fused_recurrent_gated_delta_rule_fwd_kernel = fused_recurrent_gated_delta_rule_fwd_kernel
 vllm.model_executor.layers.fla.ops.layernorm_guard.LayerNormFn = LayerNormFn
-vllm.model_executor.layers.fla.ops.chunk.chunk_gated_delta_rule = chunk_gated_delta_rule
+vllm.model_executor.layers.fla.ops.chunk_gated_delta_rule = chunk_gated_delta_rule