[refactor] slightly tidy fp8 module (#5993)
This commit is contained in:
@@ -7,9 +7,9 @@ import torch
|
||||
from sglang.srt.layers.activation import SiluAndMul
|
||||
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
|
||||
from sglang.srt.layers.quantization.fp8_kernel import (
|
||||
per_tensor_quant_mla_deep_gemm_masked_fp8,
|
||||
per_tensor_quant_mla_fp8,
|
||||
per_token_group_quant_fp8,
|
||||
per_token_group_quant_mla_deep_gemm_masked_fp8,
|
||||
static_quant_fp8,
|
||||
w8a8_block_fp8_matmul,
|
||||
)
|
||||
@@ -236,7 +236,7 @@ class TestPerTokenGroupQuantMlaDeepGemmMaskedFP8(CustomTestCase):
|
||||
|
||||
with torch.inference_mode():
|
||||
ref_out, ref_scale = native_per_token_group_quant_fp8(x, group_size, 1e-12)
|
||||
out, scale, _, _, _ = per_tensor_quant_mla_deep_gemm_masked_fp8(
|
||||
out, scale, _, _, _ = per_token_group_quant_mla_deep_gemm_masked_fp8(
|
||||
x, group_size
|
||||
)
|
||||
out = out[:, :num_tokens, :]
|
||||
|
||||
Reference in New Issue
Block a user