[7/n] decouple quantization impl from vllm dependency - gguf kernel (#11019)

This commit is contained in:
PGFLMG
2025-10-12 05:04:57 +08:00
committed by GitHub
parent b5dcfd4154
commit 8fdcd98efe
19 changed files with 7936 additions and 1 deletions

View File

@@ -48,6 +48,16 @@ def moe_sum_reduce(
)
def moe_sum(
input_tensor: torch.Tensor,
output_tensor: torch.Tensor,
):
torch.ops.sgl_kernel.moe_sum.default(
input_tensor,
output_tensor,
)
def moe_fused_gate(
input_tensor,
bias,