[Bugfix][LoRA] Fix bug introduced by upstream vllm#25249 (#3095)

### What this PR does / why we need it? Fix the impact to LoRA that https://github.com/vllm-project/vllm/pull/25249 brought. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? pytest -sv tests/e2e/singlecard/test_ilama_lora.py pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py - vLLM version: v0.10.2 - vLLM main: 9607d5eb44 --------- Signed-off-by: paulyu12 <507435917@qq.com>
2025-09-22 22:26:01 +08:00
parent 3fa7cf6345
commit 704467cd9a
1 changed files with 10 additions and 5 deletions
--- a/vllm_ascend/worker/model_runner_v1.py
+++ b/vllm_ascend/worker/model_runner_v1.py
@@ -2614,11 +2614,16 @@ class NPUModelRunner(LoRAModelRunnerMixin):
                        self.model.get_eagle3_aux_hidden_state_layers())

            if self.lora_config:
-                self.model = self.load_lora_model(self.model,
-                                                  self.model_config,
-                                                  self.scheduler_config,
-                                                  self.lora_config,
-                                                  self.device)
+                if vllm_version_is("0.10.2"):
+                    self.model = self.load_lora_model(self.model,
+                                                      self.model_config,
+                                                      self.scheduler_config,
+                                                      self.lora_config,
+                                                      self.device)
+                else:
+                    self.model = self.load_lora_model(self.model,
+                                                      self.vllm_config,
+                                                      self.device)
        logger.info("Loading model weights took %.4f GB",
                    m.consumed_memory / float(2**30))