fix some typos (#6209)
Co-authored-by: Brayden Zhong <b8zhong@uwaterloo.ca>
This commit is contained in:
@@ -363,7 +363,7 @@ class CompressedTensorsConfig(QuantizationConfig):
|
||||
if self._is_wNa16_group_channel(weight_quant, input_quant):
|
||||
if not VLLM_AVAILABLE:
|
||||
raise ImportError(
|
||||
"vllm is not installed, to use CompressedTensorsW4A16Sparse24 and CompressedTensorsWNA16, please install vllm"
|
||||
"vLLM is not installed, to use CompressedTensorsW4A16Sparse24 and CompressedTensorsWNA16, please install vLLM"
|
||||
)
|
||||
if (
|
||||
self.quant_format == CompressionFormat.marlin_24.value
|
||||
@@ -409,7 +409,7 @@ class CompressedTensorsConfig(QuantizationConfig):
|
||||
if self._is_fp8_w8a16(weight_quant, input_quant):
|
||||
if not VLLM_AVAILABLE:
|
||||
raise ImportError(
|
||||
"vllm is not installed, to use CompressedTensorsW8A16Fp8, please install vllm"
|
||||
"vLLM is not installed, to use CompressedTensorsW8A16Fp8, please install vLLM"
|
||||
)
|
||||
is_static_input_scheme = input_quant and not input_quant.dynamic
|
||||
return CompressedTensorsW8A16Fp8(
|
||||
@@ -491,7 +491,7 @@ class CompressedTensorsConfig(QuantizationConfig):
|
||||
):
|
||||
if not VLLM_AVAILABLE:
|
||||
raise ImportError(
|
||||
"vllm is not installed, to use CompressedTensors24, please install vllm"
|
||||
"vLLM is not installed, to use CompressedTensors24, please install vLLM"
|
||||
)
|
||||
# Have a valid sparsity scheme
|
||||
# Validate layer is supported by Cutlass 2:4 Kernel
|
||||
|
||||
@@ -65,7 +65,7 @@ class CompressedTensorsMoEMethod:
|
||||
if quant_config._is_wNa16_group_channel(weight_quant, input_quant):
|
||||
if not VLLM_AVAILABLE:
|
||||
raise ImportError(
|
||||
"vllm is not installed, to use CompressedTensorsWNA16MoEMethod, please install vllm."
|
||||
"vLLM is not installed, to use CompressedTensorsWNA16MoEMethod, please install vLLM."
|
||||
)
|
||||
return CompressedTensorsWNA16MoEMethod(quant_config)
|
||||
elif quant_config._is_fp8_w8a8(weight_quant, input_quant):
|
||||
|
||||
@@ -27,10 +27,10 @@ except ImportError:
|
||||
MARLIN_FP8_AVAILABLE = False
|
||||
|
||||
def apply_fp8_marlin_linear(*args, **kwargs):
|
||||
raise ImportError("vllm is not installed")
|
||||
raise ImportError("vLLM is not installed")
|
||||
|
||||
def prepare_fp8_layer_for_marlin(*args, **kwargs):
|
||||
raise ImportError("vllm is not installed")
|
||||
raise ImportError("vLLM is not installed")
|
||||
|
||||
|
||||
__all__ = ["CompressedTensorsW8A16Fp8"]
|
||||
@@ -45,7 +45,7 @@ class CompressedTensorsW8A16Fp8(CompressedTensorsScheme):
|
||||
|
||||
if not MARLIN_FP8_AVAILABLE:
|
||||
raise ImportError(
|
||||
"vllm is not installed. To use CompressedTensorsW8A16Fp8, please install vllm"
|
||||
"vLLM is not installed. To use CompressedTensorsW8A16Fp8, please install vLLM"
|
||||
)
|
||||
|
||||
@classmethod
|
||||
|
||||
Reference in New Issue
Block a user