[V1][LoRA][Test] V1 Engine LoRA support & e2e test (#893)

### What this PR does / why we need it? Add V1Engine LoRA support. Add LoRA e2e test on single card and multiple cards. ### Does this PR introduce _any_ user-facing change? support lora for V1 ### How was this patch tested? CI passed with new added test --------- Signed-off-by: jesse <szxfml@gmail.com> Signed-off-by: paulyu <paulyu0307@gmail.com> Signed-off-by: paulyu12 <507435917@qq.com> Co-authored-by: jesse <szxfml@gmail.com> Co-authored-by: paulyu <paulyu0307@gmail.com>
2025-05-22 19:20:51 +08:00
parent 7aa4f85f10
commit 0f53b138f6
6 changed files with 167 additions and 38 deletions
--- a/vllm_ascend/worker/worker_v1.py
+++ b/vllm_ascend/worker/worker_v1.py
@@ -31,6 +31,7 @@ from vllm.distributed import (ensure_model_parallel_initialized,
                              set_custom_all_reduce)
 from vllm.distributed.kv_transfer import ensure_kv_transfer_initialized
 from vllm.logger import logger
+from vllm.lora.request import LoRARequest
 from vllm.model_executor import set_random_seed
 from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE
 from vllm.v1.core.sched.output import SchedulerOutput
@@ -216,6 +217,18 @@ class NPUWorker(WorkerBase):
        else:
            self.profiler.stop()

+    def add_lora(self, lora_request: LoRARequest) -> bool:
+        return self.model_runner.add_lora(lora_request)
+
+    def remove_lora(self, lora_id: int) -> bool:
+        return self.model_runner.remove_lora(lora_id)
+
+    def list_loras(self) -> set[int]:
+        return self.model_runner.list_loras()
+
+    def pin_lora(self, lora_id: int) -> bool:
+        return self.model_runner.pin_lora(lora_id)
+
    def execute_dummy_batch(self) -> None:
        self.model_runner._dummy_run(1)