Clean up import vllm in quantization/__init__.py (#4834)

This commit is contained in:
Lianmin Zheng
2025-03-28 10:34:10 -07:00
committed by GitHub
parent ef9a378a20
commit 74e0ac1dbd
14 changed files with 191 additions and 254 deletions

View File

@@ -22,11 +22,7 @@ import torch
from transformers import PretrainedConfig
from sglang.srt.hf_transformers_utils import get_config, get_context_length
from sglang.srt.layers.quantization import (
BASE_QUANTIZATION_METHODS,
QUANTIZATION_METHODS,
VLLM_AVAILABLE,
)
from sglang.srt.layers.quantization import QUANTIZATION_METHODS
from sglang.srt.utils import get_bool_env_var, is_hip
logger = logging.getLogger(__name__)
@@ -239,12 +235,7 @@ class ModelConfig:
# adapted from https://github.com/vllm-project/vllm/blob/v0.6.4.post1/vllm/config.py
def _verify_quantization(self) -> None:
# Select supported quantization methods based on vllm availability
if VLLM_AVAILABLE:
supported_quantization = [*QUANTIZATION_METHODS]
else:
supported_quantization = [*BASE_QUANTIZATION_METHODS]
supported_quantization = [*QUANTIZATION_METHODS]
rocm_supported_quantization = [
"awq",
"gptq",
@@ -282,11 +273,7 @@ class ModelConfig:
quant_method = quant_cfg.get("quant_method", "").lower()
# Detect which checkpoint is it
# Only iterate through currently available quantization methods
available_methods = (
QUANTIZATION_METHODS if VLLM_AVAILABLE else BASE_QUANTIZATION_METHODS
)
for _, method in available_methods.items():
for _, method in QUANTIZATION_METHODS.items():
quantization_override = method.override_quantization_method(
quant_cfg, self.quantization
)