[1/n]: add cutlass W4A8 moe kernel for hopper architecture (#7772)
Signed-off-by: yangsijia.614 <yangsijia.614@bytedance.com> Co-authored-by: yicwang <yichen.wang@bytedance.com>
This commit is contained in:
@@ -249,6 +249,9 @@ set(SOURCES
|
||||
"csrc/speculative/speculative_sampling.cu"
|
||||
"csrc/grammar/apply_token_bitmask_inplace_cuda.cu"
|
||||
"csrc/kvcacheio/transfer.cu"
|
||||
"csrc/moe/cutlass_moe/w4a8/scaled_mm_entry.cu"
|
||||
"csrc/moe/cutlass_moe/w4a8/w4a8_moe_data.cu"
|
||||
"csrc/moe/cutlass_moe/w4a8/w4a8_grouped_mm_c3x.cu"
|
||||
"csrc/common_extension.cc"
|
||||
"csrc/moe/marlin_moe_wna16/ops.cu"
|
||||
"csrc/moe/marlin_moe_wna16/gptq_marlin_repack.cu"
|
||||
|
||||
Reference in New Issue
Block a user