[Bugfix][LoRA] Fix bug introduced by upstream vllm#25249 (#3095)
### What this PR does / why we need it?
Fix the impact to LoRA that
https://github.com/vllm-project/vllm/pull/25249 brought.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
pytest -sv tests/e2e/singlecard/test_ilama_lora.py
pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
- vLLM version: v0.10.2
- vLLM main:
9607d5eb44
---------
Signed-off-by: paulyu12 <507435917@qq.com>
This commit is contained in:
@@ -2614,11 +2614,16 @@ class NPUModelRunner(LoRAModelRunnerMixin):
|
||||
self.model.get_eagle3_aux_hidden_state_layers())
|
||||
|
||||
if self.lora_config:
|
||||
self.model = self.load_lora_model(self.model,
|
||||
self.model_config,
|
||||
self.scheduler_config,
|
||||
self.lora_config,
|
||||
self.device)
|
||||
if vllm_version_is("0.10.2"):
|
||||
self.model = self.load_lora_model(self.model,
|
||||
self.model_config,
|
||||
self.scheduler_config,
|
||||
self.lora_config,
|
||||
self.device)
|
||||
else:
|
||||
self.model = self.load_lora_model(self.model,
|
||||
self.vllm_config,
|
||||
self.device)
|
||||
logger.info("Loading model weights took %.4f GB",
|
||||
m.consumed_memory / float(2**30))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user