diff --git a/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py b/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py index b0a664460..bf4fa827a 100644 --- a/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py +++ b/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py @@ -40,6 +40,13 @@ from sglang.srt.layers.quantization.compressed_tensors.utils import ( should_ignore_layer, ) +try: + import vllm + + VLLM_AVAILABLE = True +except ImportError: + VLLM_AVAILABLE = False + logger = logging.getLogger(__name__) __all__ = ["CompressedTensorsLinearMethod"]