[7/n] decouple quantization impl from vllm dependency - gguf kernel (#11019)

This commit is contained in:
PGFLMG
2025-10-12 05:04:57 +08:00
committed by GitHub
parent b5dcfd4154
commit 8fdcd98efe
19 changed files with 7936 additions and 1 deletions

View File

@@ -271,6 +271,8 @@ set(SOURCES
"csrc/elementwise/topk.cu"
"csrc/common_extension.cc"
"csrc/quantization/gguf/gguf_kernel.cu"
"csrc/gemm/awq_kernel.cu"
"csrc/gemm/bmm_fp8.cu"
"csrc/gemm/dsv3_fused_a_gemm.cu"
@@ -306,6 +308,7 @@ set(SOURCES
"csrc/moe/marlin_moe_wna16/ops.cu"
"csrc/moe/moe_align_kernel.cu"
"csrc/moe/moe_fused_gate.cu"
"csrc/moe/moe_sum.cu"
"csrc/moe/moe_sum_reduce.cu"
"csrc/moe/moe_topk_softmax_kernels.cu"
"csrc/moe/nvfp4_blockwise_moe.cu"