[1/2] Support Qserve (#6457)

Co-authored-by: yych0745 <1398089567@qq.com>
Co-authored-by: sleepcoo <sleepcoo@gmail.com>
This commit is contained in:
HandH1998
2025-05-22 10:48:59 +08:00
committed by GitHub
parent 6ce0ed073b
commit 4d643f6c7a
10 changed files with 2086 additions and 0 deletions

View File

@@ -203,6 +203,8 @@ set(SOURCES
"csrc/gemm/per_tensor_quant_fp8.cu"
"csrc/gemm/per_token_group_quant_8bit.cu"
"csrc/gemm/per_token_quant_fp8.cu"
"csrc/gemm/qserve_w4a8_per_chn_gemm.cu"
"csrc/gemm/qserve_w4a8_per_group_gemm.cu"
"csrc/moe/moe_align_kernel.cu"
"csrc/moe/moe_fused_gate.cu"
"csrc/moe/moe_topk_softmax_kernels.cu"