From 704467cd9ae3a5077637564113f00af2aad01161 Mon Sep 17 00:00:00 2001 From: yupeng <507435917@qq.com> Date: Mon, 22 Sep 2025 22:26:01 +0800 Subject: [PATCH] [Bugfix][LoRA] Fix bug introduced by upstream vllm#25249 (#3095) ### What this PR does / why we need it? Fix the impact to LoRA that https://github.com/vllm-project/vllm/pull/25249 brought. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? pytest -sv tests/e2e/singlecard/test_ilama_lora.py pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py - vLLM version: v0.10.2 - vLLM main: https://github.com/vllm-project/vllm/commit/9607d5eb449711b349d4c2bee0a9c94afcc7ed14 --------- Signed-off-by: paulyu12 <507435917@qq.com> --- vllm_ascend/worker/model_runner_v1.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index 22a4c4b..9421a98 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -2614,11 +2614,16 @@ class NPUModelRunner(LoRAModelRunnerMixin): self.model.get_eagle3_aux_hidden_state_layers()) if self.lora_config: - self.model = self.load_lora_model(self.model, - self.model_config, - self.scheduler_config, - self.lora_config, - self.device) + if vllm_version_is("0.10.2"): + self.model = self.load_lora_model(self.model, + self.model_config, + self.scheduler_config, + self.lora_config, + self.device) + else: + self.model = self.load_lora_model(self.model, + self.vllm_config, + self.device) logger.info("Loading model weights took %.4f GB", m.consumed_memory / float(2**30))