[7/n] decouple quantization impl from vllm dependency - gguf kernel (#11019)

2025-10-12 05:04:57 +08:00
parent b5dcfd4154
commit 8fdcd98efe
19 changed files with 7936 additions and 1 deletions
--- a/sgl-kernel/CMakeLists.txt
+++ b/sgl-kernel/CMakeLists.txt
@@ -271,6 +271,8 @@ set(SOURCES
    "csrc/elementwise/topk.cu"
    "csrc/common_extension.cc"

+    "csrc/quantization/gguf/gguf_kernel.cu"
+
    "csrc/gemm/awq_kernel.cu"
    "csrc/gemm/bmm_fp8.cu"
    "csrc/gemm/dsv3_fused_a_gemm.cu"
@@ -306,6 +308,7 @@ set(SOURCES
    "csrc/moe/marlin_moe_wna16/ops.cu"
    "csrc/moe/moe_align_kernel.cu"
    "csrc/moe/moe_fused_gate.cu"
+    "csrc/moe/moe_sum.cu"
    "csrc/moe/moe_sum_reduce.cu"
    "csrc/moe/moe_topk_softmax_kernels.cu"
    "csrc/moe/nvfp4_blockwise_moe.cu"