fix: fix typo of comments in w8a8_fp8.py (#4843)
This commit is contained in:
@@ -37,7 +37,7 @@ class W8A8Fp8Config(QuantizationConfig):
|
|||||||
Note:
|
Note:
|
||||||
- For models without offline quantization, weights will be quantized during model loading
|
- For models without offline quantization, weights will be quantized during model loading
|
||||||
- If CUTLASS is supported: Per-channel weight quantization is used
|
- If CUTLASS is supported: Per-channel weight quantization is used
|
||||||
- If CUTLASS is not supported: Falls back to per-token weight quantization
|
- If CUTLASS is not supported: Falls back to per-tensor weight quantization
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, is_checkpoint_fp8_serialized: bool = False):
|
def __init__(self, is_checkpoint_fp8_serialized: bool = False):
|
||||||
|
|||||||
Reference in New Issue
Block a user