From fd63b62eaad903ac0b75630e5b1eee9002783b10 Mon Sep 17 00:00:00 2001 From: Enrique Shockwave <33002121+qeternity@users.noreply.github.com> Date: Fri, 18 Jul 2025 19:34:14 +0100 Subject: [PATCH] fix compressed tensors WNA16 imports (#8142) --- .../quantization/compressed_tensors/compressed_tensors.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py b/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py index 50d90406d..8afc15a73 100644 --- a/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py +++ b/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py @@ -40,7 +40,10 @@ from sglang.srt.layers.quantization.compressed_tensors.utils import ( from sglang.srt.layers.quantization.unquant import UnquantizedLinearMethod try: - import vllm + from vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_wNa16 import ( + WNA16_SUPPORTED_BITS, + CompressedTensorsWNA16, + ) VLLM_AVAILABLE = True except ImportError: