[2/n]decouple quantization implementation from vLLM dependency (#8112)

Co-authored-by: walker-ai <yiyun.wyt@antgroup.com> Co-authored-by: leoneo <1320612015@qq.com>
2025-08-14 18:19:03 +08:00
parent 4dbf43601d
commit 5aa1ebd242
32 changed files with 6506 additions and 202 deletions
--- a/sgl-kernel/python/sgl_kernel/marlin.py
+++ b/sgl-kernel/python/sgl_kernel/marlin.py
@@ -7,8 +7,8 @@ def gptq_marlin_repack(
    size_k,
    size_n,
    num_bits,
-):
-    torch.ops.sgl_kernel.gptq_marlin_repack.default(
+) -> torch.Tensor:
+    return torch.ops.sgl_kernel.gptq_marlin_repack(
        b_q_weight,
        perm,
        size_k,