Tiny refactor weight loading logic (#5232)

2025-05-08 16:02:56 +08:00
parent b6cf3532b5
commit 6450c1228c
2 changed files with 19 additions and 17 deletions
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -557,12 +557,7 @@ class ModelRunner:
            return iter
        def model_load_weights(model, iter):
-            model.load_weights(iter)
+            DefaultModelLoader.load_weights_and_postprocess(model, iter, target_device)
            for _, module in self.model.named_modules():
                quant_method = getattr(module, "quant_method", None)
                if quant_method is not None:
                    with device_loading_context(module, target_device):
                        quant_method.process_weights_after_loading(module)
            return model
        with set_default_torch_dtype(self.model_config.dtype):
--- a/python/sglang/srt/model_loader/loader.py
+++ b/python/sglang/srt/model_loader/loader.py
@@ -374,7 +374,15 @@ class DefaultModelLoader(BaseModelLoader):
                    self.load_config,
                )
-            model.load_weights(self._get_all_weights(model_config, model))
+            self.load_weights_and_postprocess(
                model, self._get_all_weights(model_config, model), target_device
            )
        return model.eval()
    @staticmethod
    def load_weights_and_postprocess(model, weights, target_device):
        model.load_weights(weights)
        for _, module in model.named_modules():
            quant_method = getattr(module, "quant_method", None)
@@ -386,7 +394,6 @@ class DefaultModelLoader(BaseModelLoader):
                # parameters onto device for processing and back off after.
                with device_loading_context(module, target_device):
                    quant_method.process_weights_after_loading(module)
        return model.eval()
 class LayeredModelLoader(DefaultModelLoader):