From 704467cd9ae3a5077637564113f00af2aad01161 Mon Sep 17 00:00:00 2001
From: yupeng <507435917@qq.com>
Date: Mon, 22 Sep 2025 22:26:01 +0800
Subject: [PATCH] [Bugfix][LoRA] Fix bug introduced by upstream vllm#25249
 (#3095)

### What this PR does / why we need it?
Fix the impact to LoRA that
https://github.com/vllm-project/vllm/pull/25249 brought.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
pytest -sv tests/e2e/singlecard/test_ilama_lora.py
pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py

- vLLM version: v0.10.2
- vLLM main:
https://github.com/vllm-project/vllm/commit/9607d5eb449711b349d4c2bee0a9c94afcc7ed14

---------

Signed-off-by: paulyu12 <507435917@qq.com>
---
 vllm_ascend/worker/model_runner_v1.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
index 22a4c4b..9421a98 100644
--- a/vllm_ascend/worker/model_runner_v1.py
+++ b/vllm_ascend/worker/model_runner_v1.py
@@ -2614,11 +2614,16 @@ class NPUModelRunner(LoRAModelRunnerMixin):
                         self.model.get_eagle3_aux_hidden_state_layers())
 
             if self.lora_config:
-                self.model = self.load_lora_model(self.model,
-                                                  self.model_config,
-                                                  self.scheduler_config,
-                                                  self.lora_config,
-                                                  self.device)
+                if vllm_version_is("0.10.2"):
+                    self.model = self.load_lora_model(self.model,
+                                                      self.model_config,
+                                                      self.scheduler_config,
+                                                      self.lora_config,
+                                                      self.device)
+                else:
+                    self.model = self.load_lora_model(self.model,
+                                                      self.vllm_config,
+                                                      self.device)
         logger.info("Loading model weights took %.4f GB",
                     m.consumed_memory / float(2**30))