[Ops] Add layernorm for qwen3Next (#5765)
### What this PR does / why we need it?
Add layernormFn triton op for qwen3Next model for better performance.
<img width="248" height="526" alt="image"
src="https://github.com/user-attachments/assets/27b47157-5df5-4db1-aa88-1dae799b2bf6"
/>
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
- vLLM version: v0.13.0
- vLLM main:
2f4e6548ef
---------
Signed-off-by: SunnyLee219 <3294305115@qq.com>
This commit is contained in:
@@ -669,7 +669,7 @@ def register_ascend_customop(vllm_config: VllmConfig | None = None):
|
||||
|
||||
from vllm_ascend.ops.activation import AscendQuickGELU, AscendSiluAndMul
|
||||
from vllm_ascend.ops.fused_moe.fused_moe import AscendFusedMoE, AscendSharedFusedMoE
|
||||
from vllm_ascend.ops.layernorm import AscendGemmaRMSNorm, AscendRMSNorm
|
||||
from vllm_ascend.ops.layernorm import AscendGemmaRMSNorm, AscendRMSNorm, AscendRMSNormGated
|
||||
from vllm_ascend.ops.linear import (
|
||||
AscendColumnParallelLinear,
|
||||
AscendMergedColumnParallelLinear,
|
||||
@@ -715,6 +715,7 @@ def register_ascend_customop(vllm_config: VllmConfig | None = None):
|
||||
"MultiHeadLatentAttentionWrapper": AscendMultiHeadLatentAttention,
|
||||
"MMEncoderAttention": AscendMMEncoderAttention,
|
||||
"ApplyRotaryEmb": AscendApplyRotaryEmb,
|
||||
"RMSNormGated": AscendRMSNormGated,
|
||||
}
|
||||
|
||||
# 310P: override selected ops with 310P implementations (keep minimal changes outside _310p)
|
||||
|
||||
Reference in New Issue
Block a user