Clean up import vllm in quantization/__init__.py (#4834)
This commit is contained in:
@@ -22,11 +22,7 @@ import torch
|
||||
from transformers import PretrainedConfig
|
||||
|
||||
from sglang.srt.hf_transformers_utils import get_config, get_context_length
|
||||
from sglang.srt.layers.quantization import (
|
||||
BASE_QUANTIZATION_METHODS,
|
||||
QUANTIZATION_METHODS,
|
||||
VLLM_AVAILABLE,
|
||||
)
|
||||
from sglang.srt.layers.quantization import QUANTIZATION_METHODS
|
||||
from sglang.srt.utils import get_bool_env_var, is_hip
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -239,12 +235,7 @@ class ModelConfig:
|
||||
|
||||
# adapted from https://github.com/vllm-project/vllm/blob/v0.6.4.post1/vllm/config.py
|
||||
def _verify_quantization(self) -> None:
|
||||
# Select supported quantization methods based on vllm availability
|
||||
if VLLM_AVAILABLE:
|
||||
supported_quantization = [*QUANTIZATION_METHODS]
|
||||
else:
|
||||
supported_quantization = [*BASE_QUANTIZATION_METHODS]
|
||||
|
||||
supported_quantization = [*QUANTIZATION_METHODS]
|
||||
rocm_supported_quantization = [
|
||||
"awq",
|
||||
"gptq",
|
||||
@@ -282,11 +273,7 @@ class ModelConfig:
|
||||
quant_method = quant_cfg.get("quant_method", "").lower()
|
||||
|
||||
# Detect which checkpoint is it
|
||||
# Only iterate through currently available quantization methods
|
||||
available_methods = (
|
||||
QUANTIZATION_METHODS if VLLM_AVAILABLE else BASE_QUANTIZATION_METHODS
|
||||
)
|
||||
for _, method in available_methods.items():
|
||||
for _, method in QUANTIZATION_METHODS.items():
|
||||
quantization_override = method.override_quantization_method(
|
||||
quant_cfg, self.quantization
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user