Remove references to squeezellm (#1603)
This commit is contained in:
@@ -19,7 +19,6 @@ from vllm.model_executor.layers.quantization.gptq_marlin import GPTQMarlinConfig
|
|||||||
from vllm.model_executor.layers.quantization.gptq_marlin_24 import GPTQMarlin24Config
|
from vllm.model_executor.layers.quantization.gptq_marlin_24 import GPTQMarlin24Config
|
||||||
from vllm.model_executor.layers.quantization.marlin import MarlinConfig
|
from vllm.model_executor.layers.quantization.marlin import MarlinConfig
|
||||||
from vllm.model_executor.layers.quantization.qqq import QQQConfig
|
from vllm.model_executor.layers.quantization.qqq import QQQConfig
|
||||||
from vllm.model_executor.layers.quantization.squeezellm import SqueezeLLMConfig
|
|
||||||
from vllm.model_executor.layers.quantization.tpu_int8 import Int8TpuConfig
|
from vllm.model_executor.layers.quantization.tpu_int8 import Int8TpuConfig
|
||||||
|
|
||||||
from sglang.srt.layers.quantization.base_config import QuantizationConfig
|
from sglang.srt.layers.quantization.base_config import QuantizationConfig
|
||||||
@@ -39,7 +38,6 @@ QUANTIZATION_METHODS: Dict[str, Type[QuantizationConfig]] = {
|
|||||||
"gptq_marlin": GPTQMarlinConfig,
|
"gptq_marlin": GPTQMarlinConfig,
|
||||||
"awq_marlin": AWQMarlinConfig,
|
"awq_marlin": AWQMarlinConfig,
|
||||||
"gptq": GPTQConfig,
|
"gptq": GPTQConfig,
|
||||||
"squeezellm": SqueezeLLMConfig,
|
|
||||||
"compressed-tensors": CompressedTensorsConfig,
|
"compressed-tensors": CompressedTensorsConfig,
|
||||||
"bitsandbytes": BitsAndBytesConfig,
|
"bitsandbytes": BitsAndBytesConfig,
|
||||||
"qqq": QQQConfig,
|
"qqq": QQQConfig,
|
||||||
|
|||||||
@@ -266,7 +266,6 @@ class ServerArgs:
|
|||||||
"marlin",
|
"marlin",
|
||||||
"gptq_marlin",
|
"gptq_marlin",
|
||||||
"awq_marlin",
|
"awq_marlin",
|
||||||
"squeezellm",
|
|
||||||
"bitsandbytes",
|
"bitsandbytes",
|
||||||
],
|
],
|
||||||
help="The quantization method.",
|
help="The quantization method.",
|
||||||
|
|||||||
Reference in New Issue
Block a user