[Feat] QWen-1M context support[1/2]: Update block sparse attention backend utils kernel (#5847)

Co-authored-by: sighingnow <sighingnow@gmail.com>
This commit is contained in:
PGFLMG
2025-04-29 02:03:17 +08:00
committed by GitHub
parent d364b9b0f2
commit ee71ed8a41
6 changed files with 763 additions and 1 deletions

View File

@@ -176,6 +176,7 @@ set(SOURCES
"csrc/attention/cascade.cu"
"csrc/attention/merge_attn_states.cu"
"csrc/attention/cutlass_mla_kernel.cu"
"csrc/attention/vertical_slash_index.cu"
"csrc/attention/lightning_attention_decode_kernel.cu"
"csrc/elementwise/activation.cu"
"csrc/elementwise/fused_add_rms_norm_kernel.cu"