[refactor] slightly tidy fp8 module (#5993)

This commit is contained in:
JieXin Liang
2025-05-08 08:28:24 +08:00
committed by GitHub
parent e444c13fb4
commit b70957fcf8
12 changed files with 238 additions and 231 deletions

View File

@@ -7,9 +7,9 @@ import torch
from sglang.srt.layers.activation import SiluAndMul
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
from sglang.srt.layers.quantization.fp8_kernel import (
per_tensor_quant_mla_deep_gemm_masked_fp8,
per_tensor_quant_mla_fp8,
per_token_group_quant_fp8,
per_token_group_quant_mla_deep_gemm_masked_fp8,
static_quant_fp8,
w8a8_block_fp8_matmul,
)
@@ -236,7 +236,7 @@ class TestPerTokenGroupQuantMlaDeepGemmMaskedFP8(CustomTestCase):
with torch.inference_mode():
ref_out, ref_scale = native_per_token_group_quant_fp8(x, group_size, 1e-12)
out, scale, _, _, _ = per_tensor_quant_mla_deep_gemm_masked_fp8(
out, scale, _, _, _ = per_token_group_quant_mla_deep_gemm_masked_fp8(
x, group_size
)
out = out[:, :num_tokens, :]