[7/n] decouple quantization impl from vllm dependency - gguf kernel (#11019)
This commit is contained in:
@@ -271,6 +271,8 @@ set(SOURCES
|
||||
"csrc/elementwise/topk.cu"
|
||||
"csrc/common_extension.cc"
|
||||
|
||||
"csrc/quantization/gguf/gguf_kernel.cu"
|
||||
|
||||
"csrc/gemm/awq_kernel.cu"
|
||||
"csrc/gemm/bmm_fp8.cu"
|
||||
"csrc/gemm/dsv3_fused_a_gemm.cu"
|
||||
@@ -306,6 +308,7 @@ set(SOURCES
|
||||
"csrc/moe/marlin_moe_wna16/ops.cu"
|
||||
"csrc/moe/moe_align_kernel.cu"
|
||||
"csrc/moe/moe_fused_gate.cu"
|
||||
"csrc/moe/moe_sum.cu"
|
||||
"csrc/moe/moe_sum_reduce.cu"
|
||||
"csrc/moe/moe_topk_softmax_kernels.cu"
|
||||
"csrc/moe/nvfp4_blockwise_moe.cu"
|
||||
|
||||
Reference in New Issue
Block a user