diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index 22a4c4b..9421a98 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -2614,11 +2614,16 @@ class NPUModelRunner(LoRAModelRunnerMixin): self.model.get_eagle3_aux_hidden_state_layers()) if self.lora_config: - self.model = self.load_lora_model(self.model, - self.model_config, - self.scheduler_config, - self.lora_config, - self.device) + if vllm_version_is("0.10.2"): + self.model = self.load_lora_model(self.model, + self.model_config, + self.scheduler_config, + self.lora_config, + self.device) + else: + self.model = self.load_lora_model(self.model, + self.vllm_config, + self.device) logger.info("Loading model weights took %.4f GB", m.consumed_memory / float(2**30))