[Lint]Style: Convert test/ to ruff format(Batch #5) (#6747)

### What this PR does / why we need it? | File Path | | :--- | | `tests/e2e/singlecard/compile/backend.py` | | `tests/e2e/singlecard/compile/test_graphex_norm_quant_fusion.py` | | `tests/e2e/singlecard/compile/test_graphex_qknorm_rope_fusion.py` | | `tests/e2e/singlecard/compile/test_norm_quant_fusion.py` | | `tests/e2e/singlecard/model_runner_v2/test_basic.py` | | `tests/e2e/singlecard/test_aclgraph_accuracy.py` | | `tests/e2e/singlecard/test_aclgraph_batch_invariant.py` | | `tests/e2e/singlecard/test_aclgraph_mem.py` | | `tests/e2e/singlecard/test_async_scheduling.py` | | `tests/e2e/singlecard/test_auto_fit_max_mode_len.py` | | `tests/e2e/singlecard/test_batch_invariant.py` | | `tests/e2e/singlecard/test_camem.py` | | `tests/e2e/singlecard/test_completion_with_prompt_embeds.py` | | `tests/e2e/singlecard/test_cpu_offloading.py` | | `tests/e2e/singlecard/test_guided_decoding.py` | | `tests/e2e/singlecard/test_ilama_lora.py` | | `tests/e2e/singlecard/test_llama32_lora.py` | | `tests/e2e/singlecard/test_models.py` | | `tests/e2e/singlecard/test_multistream_overlap_shared_expert.py` | | `tests/e2e/singlecard/test_quantization.py` | | `tests/e2e/singlecard/test_qwen3_multi_loras.py` | | `tests/e2e/singlecard/test_sampler.py` | | `tests/e2e/singlecard/test_vlm.py` | | `tests/e2e/singlecard/test_xlite.py` | | `tests/e2e/singlecard/utils.py` | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: 9562912cea --------- Signed-off-by: MrZ20 <2609716663@qq.com>
2026-02-24 15:50:00 +08:00
parent 747484cb64
commit 62ea664aa7
26 changed files with 859 additions and 1052 deletions
--- a/tests/e2e/singlecard/test_llama32_lora.py
+++ b/tests/e2e/singlecard/test_llama32_lora.py
@@ -1,12 +1,12 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

-import pytest
+from unittest.mock import patch

+import pytest
 import vllm
 import vllm.config
 from vllm.lora.request import LoRARequest
-from unittest.mock import patch

 from tests.e2e.conftest import VllmRunner
 from vllm_ascend.utils import enable_custom_op
@@ -53,17 +53,12 @@ def do_sample(
        PROMPT_TEMPLATE.format(context="How many candidates are there?"),
        PROMPT_TEMPLATE.format(context="Count the number of candidates."),
        PROMPT_TEMPLATE.format(
-            context=
-            "Which poll resource provided the most number of candidate information?"  # noqa: E501
+            context="Which poll resource provided the most number of candidate information?"  # noqa: E501
        ),
-        PROMPT_TEMPLATE.format(
-            context=
-            "Return the poll resource associated with the most candidates."),
+        PROMPT_TEMPLATE.format(context="Return the poll resource associated with the most candidates."),
    ]

-    sampling_params = vllm.SamplingParams(temperature=0,
-                                          max_tokens=64,
-                                          stop=["<|im_end|>"])
+    sampling_params = vllm.SamplingParams(temperature=0, max_tokens=64, stop=["<|im_end|>"])
    if tensorizer_config_dict is not None:
        outputs = llm.generate(
            prompts,
@@ -73,14 +68,15 @@ def do_sample(
                lora_id,
                lora_path,
                tensorizer_config_dict=tensorizer_config_dict,
-            ) if lora_id else None,
+            )
+            if lora_id
+            else None,
        )
    else:
        outputs = llm.generate(
            prompts,
            sampling_params,
-            lora_request=LoRARequest(str(lora_id), lora_id, lora_path)
-            if lora_id else None,
+            lora_request=LoRARequest(str(lora_id), lora_id, lora_path) if lora_id else None,
        )

    generated_texts: list[str] = []
@@ -92,33 +88,40 @@ def do_sample(
    return generated_texts


-def generate_and_test(llm,
-                      llama32_lora_files,
-                      tensorizer_config_dict: dict | None = None):
+def generate_and_test(llm, llama32_lora_files, tensorizer_config_dict: dict | None = None):
    print("lora adapter created")
    print("lora 1")
-    assert (do_sample(
-        llm,
-        llama32_lora_files,
-        tensorizer_config_dict=tensorizer_config_dict,
-        lora_id=1,
-    ) == EXPECTED_LORA_OUTPUT)
+    assert (
+        do_sample(
+            llm,
+            llama32_lora_files,
+            tensorizer_config_dict=tensorizer_config_dict,
+            lora_id=1,
+        )
+        == EXPECTED_LORA_OUTPUT
+    )

    print("lora 2")
-    assert (do_sample(
-        llm,
-        llama32_lora_files,
-        tensorizer_config_dict=tensorizer_config_dict,
-        lora_id=2,
-    ) == EXPECTED_LORA_OUTPUT)
+    assert (
+        do_sample(
+            llm,
+            llama32_lora_files,
+            tensorizer_config_dict=tensorizer_config_dict,
+            lora_id=2,
+        )
+        == EXPECTED_LORA_OUTPUT
+    )

    print("base model")
-    assert (do_sample(
-        llm,
-        llama32_lora_files,
-        tensorizer_config_dict=tensorizer_config_dict,
-        lora_id=0,
-    ) == EXPECTED_BASE_MODEL_OUTPUT)
+    assert (
+        do_sample(
+            llm,
+            llama32_lora_files,
+            tensorizer_config_dict=tensorizer_config_dict,
+            lora_id=0,
+        )
+        == EXPECTED_BASE_MODEL_OUTPUT
+    )

    print("removing lora")