Fix circular imports in gptq.py and unblock test explorer (#4736)
This commit is contained in:
@@ -6,7 +6,6 @@ import torch
|
||||
|
||||
from sglang.srt.layers.linear import LinearBase
|
||||
from sglang.srt.layers.quantization.base_config import QuantizationConfig
|
||||
from sglang.srt.layers.vocab_parallel_embedding import ParallelLMHead
|
||||
from sglang.srt.utils import is_cuda
|
||||
|
||||
_is_cuda = is_cuda()
|
||||
@@ -434,6 +433,9 @@ class MarlinConfig(QuantizationConfig):
|
||||
|
||||
from vllm.model_executor.layers.quantization.marlin import MarlinLinearMethod
|
||||
|
||||
# Delay import to avoid circular dependency
|
||||
from sglang.srt.layers.vocab_parallel_embedding import ParallelLMHead
|
||||
|
||||
if isinstance(layer, LinearBase) or (
|
||||
isinstance(layer, ParallelLMHead) and self.lm_head_quantized
|
||||
):
|
||||
|
||||
0
python/sglang/test/__init__.py
Normal file
0
python/sglang/test/__init__.py
Normal file
0
python/sglang/test/attention/__init__.py
Normal file
0
python/sglang/test/attention/__init__.py
Normal file
Reference in New Issue
Block a user