Update modelopt config and fix running issue (#2792)
This commit is contained in:
@@ -17,12 +17,12 @@ from vllm.model_executor.layers.quantization.gptq import GPTQConfig
|
|||||||
from vllm.model_executor.layers.quantization.gptq_marlin import GPTQMarlinConfig
|
from vllm.model_executor.layers.quantization.gptq_marlin import GPTQMarlinConfig
|
||||||
from vllm.model_executor.layers.quantization.gptq_marlin_24 import GPTQMarlin24Config
|
from vllm.model_executor.layers.quantization.gptq_marlin_24 import GPTQMarlin24Config
|
||||||
from vllm.model_executor.layers.quantization.marlin import MarlinConfig
|
from vllm.model_executor.layers.quantization.marlin import MarlinConfig
|
||||||
from vllm.model_executor.layers.quantization.modelopt import ModelOptFp8Config
|
|
||||||
from vllm.model_executor.layers.quantization.qqq import QQQConfig
|
from vllm.model_executor.layers.quantization.qqq import QQQConfig
|
||||||
from vllm.model_executor.layers.quantization.tpu_int8 import Int8TpuConfig
|
from vllm.model_executor.layers.quantization.tpu_int8 import Int8TpuConfig
|
||||||
|
|
||||||
from sglang.srt.layers.quantization.base_config import QuantizationConfig
|
from sglang.srt.layers.quantization.base_config import QuantizationConfig
|
||||||
from sglang.srt.layers.quantization.fp8 import Fp8Config
|
from sglang.srt.layers.quantization.fp8 import Fp8Config
|
||||||
|
from sglang.srt.layers.quantization.modelopt_quant import ModelOptFp8Config
|
||||||
|
|
||||||
QUANTIZATION_METHODS: Dict[str, Type[QuantizationConfig]] = {
|
QUANTIZATION_METHODS: Dict[str, Type[QuantizationConfig]] = {
|
||||||
"aqlm": AQLMConfig,
|
"aqlm": AQLMConfig,
|
||||||
|
|||||||
@@ -142,6 +142,7 @@ class ModelOptFp8LinearMethod(LinearMethodBase):
|
|||||||
data=torch.full(
|
data=torch.full(
|
||||||
(len(output_partition_sizes),),
|
(len(output_partition_sizes),),
|
||||||
torch.finfo(torch.float32).min,
|
torch.finfo(torch.float32).min,
|
||||||
|
dtype=torch.float32,
|
||||||
),
|
),
|
||||||
weight_loader=weight_loader,
|
weight_loader=weight_loader,
|
||||||
),
|
),
|
||||||
Reference in New Issue
Block a user