[2/n]decouple quantization implementation from vLLM dependency (#8112)

Co-authored-by: walker-ai <yiyun.wyt@antgroup.com>
Co-authored-by: leoneo <1320612015@qq.com>
This commit is contained in:
Peng Zhang
2025-08-14 18:19:03 +08:00
committed by GitHub
parent 4dbf43601d
commit 5aa1ebd242
32 changed files with 6506 additions and 202 deletions

View File

@@ -23,7 +23,6 @@
#define MARLIN_NAMESPACE_NAME marlin_moe_wna16
#endif
#include "core/registration.h"
#include "kernel.h"
#define STATIC_ASSERT_SCALAR_TYPE_VALID(scalar_t) \
@@ -50,8 +49,7 @@ __global__ void permute_cols_kernel(
int size_m,
int size_k,
int top_k) {};
} // namespace marlin
}
torch::Tensor moe_wna16_marlin_gemm(
torch::Tensor& a,