diff --git a/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py b/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py index 50d90406d..8afc15a73 100644 --- a/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py +++ b/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py @@ -40,7 +40,10 @@ from sglang.srt.layers.quantization.compressed_tensors.utils import ( from sglang.srt.layers.quantization.unquant import UnquantizedLinearMethod try: - import vllm + from vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_wNa16 import ( + WNA16_SUPPORTED_BITS, + CompressedTensorsWNA16, + ) VLLM_AVAILABLE = True except ImportError: