[V1][LoRA][Test] V1 Engine LoRA support & e2e test (#893)

### What this PR does / why we need it? Add V1Engine LoRA support. Add LoRA e2e test on single card and multiple cards. ### Does this PR introduce _any_ user-facing change? support lora for V1 ### How was this patch tested? CI passed with new added test --------- Signed-off-by: jesse <szxfml@gmail.com> Signed-off-by: paulyu <paulyu0307@gmail.com> Signed-off-by: paulyu12 <507435917@qq.com> Co-authored-by: jesse <szxfml@gmail.com> Co-authored-by: paulyu <paulyu0307@gmail.com>
2025-05-22 19:20:51 +08:00
parent 7aa4f85f10
commit 0f53b138f6
6 changed files with 167 additions and 38 deletions
--- a/tests/multicard/test_ilama_lora_tp2.py
+++ b/tests/multicard/test_ilama_lora_tp2.py
@@ -0,0 +1,21 @@
+import pytest
+
+from tests.conftest import VllmRunner
+from tests.singlecard.test_ilama_lora import (EXPECTED_LORA_OUTPUT, MODEL_PATH,
+                                              do_sample)
+
+
+@pytest.mark.parametrize("distributed_executor_backend", ["mp"])
+def test_ilama_lora_tp2(distributed_executor_backend, ilama_lora_files):
+    with VllmRunner(model_name=MODEL_PATH,
+                    enable_lora=True,
+                    max_loras=4,
+                    max_model_len=1024,
+                    max_num_seqs=16,
+                    tensor_parallel_size=2,
+                    distributed_executor_backend=distributed_executor_backend
+                    ) as vllm_model:
+        output = do_sample(vllm_model.model, ilama_lora_files, lora_id=2)
+
+    for i in range(len(EXPECTED_LORA_OUTPUT)):
+        assert output[i] == EXPECTED_LORA_OUTPUT[i]