From 403566bcca66cd892804d0b379fb37cb213e5074 Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Sun, 3 Aug 2025 08:08:40 +0800 Subject: [PATCH] Remove assertions about per group quant fp8 (#8717) --- python/sglang/srt/layers/quantization/fp8_kernel.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/python/sglang/srt/layers/quantization/fp8_kernel.py b/python/sglang/srt/layers/quantization/fp8_kernel.py index acde08f82..16d1a4d7f 100644 --- a/python/sglang/srt/layers/quantization/fp8_kernel.py +++ b/python/sglang/srt/layers/quantization/fp8_kernel.py @@ -354,10 +354,6 @@ def sglang_per_token_group_quant_fp8( ), "the last dimension of `x` cannot be divisible by `group_size`" assert x.is_contiguous(), "`x` is not contiguous" - if scale_ue8m0: - # TODO: handle this case by fixing the (token=4, dim=256, group_size=128) UT case - assert x.shape[-1] % (group_size * 4) == 0 - x_q = torch.empty_like(x, device=x.device, dtype=fp8_dtype) x_s = create_per_token_group_quant_fp8_output_scale( x_shape=x.shape,