### What this PR does / why we need it?
| File Path |
| :--- |
| `tests/e2e/singlecard/compile/backend.py` |
| `tests/e2e/singlecard/compile/test_graphex_norm_quant_fusion.py` |
| `tests/e2e/singlecard/compile/test_graphex_qknorm_rope_fusion.py` |
| `tests/e2e/singlecard/compile/test_norm_quant_fusion.py` |
| `tests/e2e/singlecard/model_runner_v2/test_basic.py` |
| `tests/e2e/singlecard/test_aclgraph_accuracy.py` |
| `tests/e2e/singlecard/test_aclgraph_batch_invariant.py` |
| `tests/e2e/singlecard/test_aclgraph_mem.py` |
| `tests/e2e/singlecard/test_async_scheduling.py` |
| `tests/e2e/singlecard/test_auto_fit_max_mode_len.py` |
| `tests/e2e/singlecard/test_batch_invariant.py` |
| `tests/e2e/singlecard/test_camem.py` |
| `tests/e2e/singlecard/test_completion_with_prompt_embeds.py` |
| `tests/e2e/singlecard/test_cpu_offloading.py` |
| `tests/e2e/singlecard/test_guided_decoding.py` |
| `tests/e2e/singlecard/test_ilama_lora.py` |
| `tests/e2e/singlecard/test_llama32_lora.py` |
| `tests/e2e/singlecard/test_models.py` |
| `tests/e2e/singlecard/test_multistream_overlap_shared_expert.py` |
| `tests/e2e/singlecard/test_quantization.py` |
| `tests/e2e/singlecard/test_qwen3_multi_loras.py` |
| `tests/e2e/singlecard/test_sampler.py` |
| `tests/e2e/singlecard/test_vlm.py` |
| `tests/e2e/singlecard/test_xlite.py` |
| `tests/e2e/singlecard/utils.py` |
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: v0.15.0
- vLLM main:
9562912cea
---------
Signed-off-by: MrZ20 <2609716663@qq.com>
This commit is contained in:
@@ -1,8 +1,9 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
from unittest.mock import patch
|
||||
|
||||
from vllm import SamplingParams
|
||||
from vllm.lora.request import LoRARequest
|
||||
from unittest.mock import patch
|
||||
|
||||
from tests.e2e.conftest import VllmRunner
|
||||
from vllm_ascend.utils import enable_custom_op
|
||||
@@ -27,16 +28,11 @@ LORA_TEST_EXPECTED = [
|
||||
|
||||
def format_chatml_messages(prompt: str):
|
||||
return [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful assistant."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
},
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": prompt},
|
||||
]
|
||||
|
||||
|
||||
@patch.dict("os.environ", {"VLLM_USE_MODELSCOPE": "False"})
|
||||
def test_multi_loras_with_tp_sync():
|
||||
lora_name_id_map = {}
|
||||
@@ -102,9 +98,7 @@ def test_multi_loras_with_tp_sync():
|
||||
outputs = llm.chat(
|
||||
[messages],
|
||||
sampling_params,
|
||||
chat_template_kwargs={
|
||||
"enable_thinking": False
|
||||
}, # for those loras, ensure enable_thinking=False
|
||||
chat_template_kwargs={"enable_thinking": False}, # for those loras, ensure enable_thinking=False
|
||||
lora_request=lora_request,
|
||||
use_tqdm=False,
|
||||
)
|
||||
@@ -113,15 +107,13 @@ def test_multi_loras_with_tp_sync():
|
||||
|
||||
def reload_lora(name: str):
|
||||
"""
|
||||
reload a lora to simulate the case:
|
||||
setting `VLLM_ALLOW_RUNTIME_LORA_UPDATING=true`
|
||||
reload a lora to simulate the case:
|
||||
setting `VLLM_ALLOW_RUNTIME_LORA_UPDATING=true`
|
||||
for dynamic lora loading and unloading
|
||||
"""
|
||||
remove_lora_response = llm.llm_engine.remove_lora(
|
||||
lora_id=lora_name_id_map[name])
|
||||
remove_lora_response = llm.llm_engine.remove_lora(lora_id=lora_name_id_map[name])
|
||||
|
||||
add_lora_response = llm.llm_engine.add_lora(
|
||||
make_add_lora_request(name, LORA_NAME_PATH_MAP[name]))
|
||||
add_lora_response = llm.llm_engine.add_lora(make_add_lora_request(name, LORA_NAME_PATH_MAP[name]))
|
||||
|
||||
print(f"{remove_lora_response=}, {add_lora_response=}")
|
||||
|
||||
@@ -131,7 +123,6 @@ def test_multi_loras_with_tp_sync():
|
||||
assert outputs == expected
|
||||
|
||||
for prompt, expected_output in zip(LORA_TEST_PROMPTS, LORA_TEST_EXPECTED):
|
||||
|
||||
output_text = call_llm_get_outputs(prompt, "Alice")
|
||||
check_outputs(output_text, expected_output, prompt)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user