Fix circular imports in gptq.py and unblock test explorer (#4736)

2025-03-24 18:07:08 -07:00
parent 77cf771ebe
commit 4c584fc632
3 changed files with 3 additions and 1 deletions
--- a/python/sglang/srt/layers/quantization/gptq.py
+++ b/python/sglang/srt/layers/quantization/gptq.py
@@ -6,7 +6,6 @@ import torch

 from sglang.srt.layers.linear import LinearBase
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
-from sglang.srt.layers.vocab_parallel_embedding import ParallelLMHead
 from sglang.srt.utils import is_cuda

 _is_cuda = is_cuda()
@@ -434,6 +433,9 @@ class MarlinConfig(QuantizationConfig):

        from vllm.model_executor.layers.quantization.marlin import MarlinLinearMethod

+        # Delay import to avoid circular dependency
+        from sglang.srt.layers.vocab_parallel_embedding import ParallelLMHead
+
        if isinstance(layer, LinearBase) or (
            isinstance(layer, ParallelLMHead) and self.lm_head_quantized
        ):
--- a/python/sglang/test/init.py
+++ b/python/sglang/test/init.py
--- a/python/sglang/test/attention/init.py
+++ b/python/sglang/test/attention/init.py