[2/n]decouple quantization implementation from vLLM dependency (#8112)

Co-authored-by: walker-ai <yiyun.wyt@antgroup.com> Co-authored-by: leoneo <1320612015@qq.com>
2025-08-14 18:19:03 +08:00
parent 4dbf43601d
commit 5aa1ebd242
32 changed files with 6506 additions and 202 deletions
--- a/sgl-kernel/csrc/moe/marlin_moe_wna16/kernel.h
+++ b/sgl-kernel/csrc/moe/marlin_moe_wna16/kernel.h
@@ -3,8 +3,8 @@
 #define MARLIN_NAMESPACE_NAME marlin_moe_wna16
 #endif

-#include "gptq_marlin/marlin.cuh"
-#include "gptq_marlin/marlin_dtypes.cuh"
+#include "gemm/marlin/marlin.cuh"
+#include "gemm/marlin/marlin_dtypes.cuh"
 #include "scalar_type.hpp"

 #define MARLIN_KERNEL_PARAMS                                                                                         \