[1/n] apply wna16marlin kernel in moe weight only quantization (#7683)

Co-authored-by: 晟海 <huangtingwei.htw@antgroup.com> Co-authored-by: yych0745 <1398089567@qq.com> Co-authored-by: HandH1998 <1335248067@qq.com> Co-authored-by: 弋云 <yiyun.wyt@antgroup.com> Co-authored-by: walker-ai <2398833647@qq.com>
2025-07-02 14:21:25 +08:00
parent b3fa5dc3c8
commit 8e03b641ba
27 changed files with 6104 additions and 1 deletions
--- a/sgl-kernel/python/sgl_kernel/init.py
+++ b/sgl-kernel/python/sgl_kernel/init.py
@@ -29,6 +29,7 @@ from sgl_kernel.elementwise import (
    rmsnorm,
    silu_and_mul,
 )
+from sgl_kernel.fused_moe import fused_marlin_moe
 from sgl_kernel.gemm import (
    awq_dequantize,
    bmm_fp8,
@@ -55,6 +56,11 @@ from sgl_kernel.kvcacheio import (
    transfer_kv_per_layer,
    transfer_kv_per_layer_mla,
 )
+from sgl_kernel.marlin import (
+    awq_marlin_moe_repack,
+    awq_marlin_repack,
+    gptq_marlin_repack,
+)
 from sgl_kernel.moe import (
    apply_shuffle_mul_sum,
    cutlass_fp4_group_mm,