[V1][LoRA][Test] V1 Engine LoRA support & e2e test (#893)
### What this PR does / why we need it? Add V1Engine LoRA support. Add LoRA e2e test on single card and multiple cards. ### Does this PR introduce _any_ user-facing change? support lora for V1 ### How was this patch tested? CI passed with new added test --------- Signed-off-by: jesse <szxfml@gmail.com> Signed-off-by: paulyu <paulyu0307@gmail.com> Signed-off-by: paulyu12 <507435917@qq.com> Co-authored-by: jesse <szxfml@gmail.com> Co-authored-by: paulyu <paulyu0307@gmail.com>
This commit is contained in:
21
tests/multicard/test_ilama_lora_tp2.py
Normal file
21
tests/multicard/test_ilama_lora_tp2.py
Normal file
@@ -0,0 +1,21 @@
|
||||
import pytest
|
||||
|
||||
from tests.conftest import VllmRunner
|
||||
from tests.singlecard.test_ilama_lora import (EXPECTED_LORA_OUTPUT, MODEL_PATH,
|
||||
do_sample)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("distributed_executor_backend", ["mp"])
|
||||
def test_ilama_lora_tp2(distributed_executor_backend, ilama_lora_files):
|
||||
with VllmRunner(model_name=MODEL_PATH,
|
||||
enable_lora=True,
|
||||
max_loras=4,
|
||||
max_model_len=1024,
|
||||
max_num_seqs=16,
|
||||
tensor_parallel_size=2,
|
||||
distributed_executor_backend=distributed_executor_backend
|
||||
) as vllm_model:
|
||||
output = do_sample(vllm_model.model, ilama_lora_files, lora_id=2)
|
||||
|
||||
for i in range(len(EXPECTED_LORA_OUTPUT)):
|
||||
assert output[i] == EXPECTED_LORA_OUTPUT[i]
|
||||
Reference in New Issue
Block a user