diff --git a/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py b/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py index 145edbbdf..14822c9e7 100644 --- a/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py +++ b/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py @@ -86,7 +86,7 @@ class CompressedTensorsConfig(QuantizationConfig): sparsity_ignore_list: List[str], kv_cache_scheme: Optional[Dict[str, Any]] = None, config: Optional[Dict[str, Any]] = None, - packed_modules_mapping: Dict[str, List[str]] = {}, + packed_modules_mapping: Optional[Dict[str, List[str]]] = None, ): super().__init__() self.ignore = ignore @@ -97,7 +97,7 @@ class CompressedTensorsConfig(QuantizationConfig): self.sparsity_scheme_map = sparsity_scheme_map self.sparsity_ignore_list = sparsity_ignore_list self.config = config - self.packed_modules_mapping = packed_modules_mapping + self.packed_modules_mapping = packed_modules_mapping or {} def get_linear_method(self) -> CompressedTensorsLinearMethod: return CompressedTensorsLinearMethod(self) diff --git a/python/sglang/srt/utils/common.py b/python/sglang/srt/utils/common.py index 44ac84fbc..67fc8f608 100644 --- a/python/sglang/srt/utils/common.py +++ b/python/sglang/srt/utils/common.py @@ -492,7 +492,7 @@ def make_layers( pp_size: Optional[int] = None, prefix: str = "", return_tuple: bool = False, - offloader_kwargs: Dict[str, Any] = {}, + offloader_kwargs: Optional[Dict[str, Any]] = None, ) -> Tuple[torch.nn.Module, int, int]: """Make a list of layers with the given layer function""" # circula imports @@ -517,7 +517,7 @@ def make_layers( layer_fn(idx=idx, prefix=add_prefix(idx, prefix)) for idx in range(start_layer, end_layer) ), - **offloader_kwargs, + **(offloader_kwargs or {}), ) + [ PPMissingLayer(return_tuple=return_tuple)