Fuse writing KV buffer into rope kernel (part 1: sgl-kernel) (#9077)

This commit is contained in:
fzyzcjy
2025-08-12 16:46:40 +08:00
committed by GitHub
parent fcc11e5ed5
commit 9aea255522
11 changed files with 1152 additions and 194 deletions

View File

@@ -21,6 +21,7 @@ from sgl_kernel.attention import (
)
from sgl_kernel.cutlass_moe import cutlass_w4a8_moe_mm, get_cutlass_w4a8_moe_mm_data
from sgl_kernel.elementwise import (
FusedSetKVBufferArg,
apply_rope_with_cos_sin_cache_inplace,
fused_add_rmsnorm,
gelu_and_mul,