From fd63b62eaad903ac0b75630e5b1eee9002783b10 Mon Sep 17 00:00:00 2001
From: Enrique Shockwave <33002121+qeternity@users.noreply.github.com>
Date: Fri, 18 Jul 2025 19:34:14 +0100
Subject: [PATCH] fix compressed tensors WNA16 imports (#8142)

---
 .../quantization/compressed_tensors/compressed_tensors.py    | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py b/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py
index 50d90406d..8afc15a73 100644
--- a/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py
+++ b/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py
@@ -40,7 +40,10 @@ from sglang.srt.layers.quantization.compressed_tensors.utils import (
 from sglang.srt.layers.quantization.unquant import UnquantizedLinearMethod
 
 try:
-    import vllm
+    from vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_wNa16 import (
+        WNA16_SUPPORTED_BITS,
+        CompressedTensorsWNA16,
+    )
 
     VLLM_AVAILABLE = True
 except ImportError: