[Refactor] move deep_gemm_wrapper out of quantization (#11784)

This commit is contained in:
Cheng Wan
2025-10-17 18:57:54 -07:00
committed by GitHub
parent 13219e1e48
commit 5b214b50b6
19 changed files with 18 additions and 24 deletions

View File

@@ -6,8 +6,8 @@ import triton
from sgl_kernel import scaled_fp4_grouped_quant, silu_and_mul_scaled_fp4_grouped_quant
from sgl_kernel.elementwise import silu_and_mul
from sglang.srt.layers import deep_gemm_wrapper
from sglang.srt.layers.moe.ep_moe.kernels import silu_and_mul_masked_post_quant_fwd
from sglang.srt.layers.quantization import deep_gemm_wrapper
def _test_accuracy_once(E, M, K, input_dtype, device):