[2/n]decouple quantization implementation from vLLM dependency (#8112)
Co-authored-by: walker-ai <yiyun.wyt@antgroup.com> Co-authored-by: leoneo <1320612015@qq.com>
This commit is contained in:
@@ -23,7 +23,6 @@
|
||||
#define MARLIN_NAMESPACE_NAME marlin_moe_wna16
|
||||
#endif
|
||||
|
||||
#include "core/registration.h"
|
||||
#include "kernel.h"
|
||||
|
||||
#define STATIC_ASSERT_SCALAR_TYPE_VALID(scalar_t) \
|
||||
@@ -50,8 +49,7 @@ __global__ void permute_cols_kernel(
|
||||
int size_m,
|
||||
int size_k,
|
||||
int top_k) {};
|
||||
|
||||
} // namespace marlin
|
||||
}
|
||||
|
||||
torch::Tensor moe_wna16_marlin_gemm(
|
||||
torch::Tensor& a,
|
||||
|
||||
Reference in New Issue
Block a user