From 72031173e49350582641179042435f90b9cd4019 Mon Sep 17 00:00:00 2001 From: Jiaqi <57028284+ZhuJiaqi9905@users.noreply.github.com> Date: Fri, 28 Mar 2025 12:06:47 +0800 Subject: [PATCH] fix: fix typo of comments in w8a8_fp8.py (#4843) --- python/sglang/srt/layers/quantization/w8a8_fp8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/layers/quantization/w8a8_fp8.py b/python/sglang/srt/layers/quantization/w8a8_fp8.py index 533813c6f..77819d4fb 100644 --- a/python/sglang/srt/layers/quantization/w8a8_fp8.py +++ b/python/sglang/srt/layers/quantization/w8a8_fp8.py @@ -37,7 +37,7 @@ class W8A8Fp8Config(QuantizationConfig): Note: - For models without offline quantization, weights will be quantized during model loading - If CUTLASS is supported: Per-channel weight quantization is used - - If CUTLASS is not supported: Falls back to per-token weight quantization + - If CUTLASS is not supported: Falls back to per-tensor weight quantization """ def __init__(self, is_checkpoint_fp8_serialized: bool = False):