[1/n]: add cutlass W4A8 moe kernel for hopper architecture (#7772)

Signed-off-by: yangsijia.614 <yangsijia.614@bytedance.com>
Co-authored-by: yicwang <yichen.wang@bytedance.com>
This commit is contained in:
SijiaYang
2025-07-05 11:50:12 +08:00
committed by GitHub
parent cb432f1770
commit da3890e82a
16 changed files with 3602 additions and 0 deletions

View File

@@ -249,6 +249,9 @@ set(SOURCES
"csrc/speculative/speculative_sampling.cu"
"csrc/grammar/apply_token_bitmask_inplace_cuda.cu"
"csrc/kvcacheio/transfer.cu"
"csrc/moe/cutlass_moe/w4a8/scaled_mm_entry.cu"
"csrc/moe/cutlass_moe/w4a8/w4a8_moe_data.cu"
"csrc/moe/cutlass_moe/w4a8/w4a8_grouped_mm_c3x.cu"
"csrc/common_extension.cc"
"csrc/moe/marlin_moe_wna16/ops.cu"
"csrc/moe/marlin_moe_wna16/gptq_marlin_repack.cu"