Fix circular imports in gptq.py and unblock test explorer (#4736)
This commit is contained in:
@@ -6,7 +6,6 @@ import torch
|
|||||||
|
|
||||||
from sglang.srt.layers.linear import LinearBase
|
from sglang.srt.layers.linear import LinearBase
|
||||||
from sglang.srt.layers.quantization.base_config import QuantizationConfig
|
from sglang.srt.layers.quantization.base_config import QuantizationConfig
|
||||||
from sglang.srt.layers.vocab_parallel_embedding import ParallelLMHead
|
|
||||||
from sglang.srt.utils import is_cuda
|
from sglang.srt.utils import is_cuda
|
||||||
|
|
||||||
_is_cuda = is_cuda()
|
_is_cuda = is_cuda()
|
||||||
@@ -434,6 +433,9 @@ class MarlinConfig(QuantizationConfig):
|
|||||||
|
|
||||||
from vllm.model_executor.layers.quantization.marlin import MarlinLinearMethod
|
from vllm.model_executor.layers.quantization.marlin import MarlinLinearMethod
|
||||||
|
|
||||||
|
# Delay import to avoid circular dependency
|
||||||
|
from sglang.srt.layers.vocab_parallel_embedding import ParallelLMHead
|
||||||
|
|
||||||
if isinstance(layer, LinearBase) or (
|
if isinstance(layer, LinearBase) or (
|
||||||
isinstance(layer, ParallelLMHead) and self.lm_head_quantized
|
isinstance(layer, ParallelLMHead) and self.lm_head_quantized
|
||||||
):
|
):
|
||||||
|
|||||||
0
python/sglang/test/__init__.py
Normal file
0
python/sglang/test/__init__.py
Normal file
0
python/sglang/test/attention/__init__.py
Normal file
0
python/sglang/test/attention/__init__.py
Normal file
Reference in New Issue
Block a user