xc-llm-ascend/tests/multicard/test_ilama_lora_tp2.py

import pytest

from tests.conftest import VllmRunner
from tests.singlecard.test_ilama_lora import (EXPECTED_LORA_OUTPUT, MODEL_PATH,
                                              do_sample)


@pytest.mark.parametrize("distributed_executor_backend", ["mp"])
def test_ilama_lora_tp2(distributed_executor_backend, ilama_lora_files):
    with VllmRunner(model_name=MODEL_PATH,
                    enable_lora=True,
                    max_loras=4,
                    max_model_len=1024,
                    max_num_seqs=16,
                    tensor_parallel_size=2,
                    distributed_executor_backend=distributed_executor_backend
                    ) as vllm_model:
        output = do_sample(vllm_model.model, ilama_lora_files, lora_id=2)

    for i in range(len(EXPECTED_LORA_OUTPUT)):
        assert output[i] == EXPECTED_LORA_OUTPUT[i]
[V1][LoRA][Test] V1 Engine LoRA support & e2e test (#893) ### What this PR does / why we need it? Add V1Engine LoRA support. Add LoRA e2e test on single card and multiple cards. ### Does this PR introduce _any_ user-facing change? support lora for V1 ### How was this patch tested? CI passed with new added test --------- Signed-off-by: jesse <szxfml@gmail.com> Signed-off-by: paulyu <paulyu0307@gmail.com> Signed-off-by: paulyu12 <507435917@qq.com> Co-authored-by: jesse <szxfml@gmail.com> Co-authored-by: paulyu <paulyu0307@gmail.com> 2025-05-22 19:20:51 +08:00			`import pytest`

			`from tests.conftest import VllmRunner`
			`from tests.singlecard.test_ilama_lora import (EXPECTED_LORA_OUTPUT, MODEL_PATH,`
			`do_sample)`


			`@pytest.mark.parametrize("distributed_executor_backend", ["mp"])`
			`def test_ilama_lora_tp2(distributed_executor_backend, ilama_lora_files):`
			`with VllmRunner(model_name=MODEL_PATH,`
			`enable_lora=True,`
			`max_loras=4,`
			`max_model_len=1024,`
			`max_num_seqs=16,`
			`tensor_parallel_size=2,`
			`distributed_executor_backend=distributed_executor_backend`
			`) as vllm_model:`
			`output = do_sample(vllm_model.model, ilama_lora_files, lora_id=2)`

			`for i in range(len(EXPECTED_LORA_OUTPUT)):`
			`assert output[i] == EXPECTED_LORA_OUTPUT[i]`