[Lint]Style: Convert test/ to ruff format(Batch #1) (#6738)

### What this PR does / why we need it? **Scope of Changes**: | File Path | | :--- | | `tests/e2e/310p/multicard/test_vl_model_multicard.py` | | `tests/e2e/310p/singlecard/test_vl_model_singlecard.py` | | `tests/e2e/310p/test_utils.py` | | `tests/e2e/conftest.py` | | `tests/e2e/model_utils.py` | | `tests/e2e/models/conftest.py` | | `tests/e2e/models/test_lm_eval_correctness.py` | | `tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py` | | `tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py` | | `tests/e2e/multicard/2-cards/test_data_parallel.py` | | `tests/e2e/multicard/2-cards/test_disaggregated_encoder.py` | | `tests/e2e/multicard/2-cards/test_expert_parallel.py` | | `tests/e2e/multicard/2-cards/test_external_launcher.py` | | `tests/e2e/multicard/2-cards/test_full_graph_mode.py` | | `tests/e2e/multicard/2-cards/test_ilama_lora_tp2.py` | | `tests/e2e/multicard/2-cards/test_offline_inference_distributed.py` | | `tests/e2e/multicard/2-cards/test_offline_weight_load.py` | | `tests/e2e/multicard/2-cards/test_pipeline_parallel.py` | | `tests/e2e/multicard/2-cards/test_prefix_caching.py` | | `tests/e2e/multicard/2-cards/test_quantization.py` | | `tests/e2e/multicard/2-cards/test_qwen3_moe.py` | | `tests/e2e/multicard/2-cards/test_qwen3_moe_routing_replay.py` | | `tests/e2e/multicard/2-cards/test_qwen3_performance.py` | | `tests/e2e/multicard/2-cards/test_shared_expert_dp.py` | | `tests/e2e/multicard/2-cards/test_single_request_aclgraph.py` | | `tests/e2e/multicard/2-cards/test_sp_pass.py` | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: 9562912cea Signed-off-by: MrZ20 <2609716663@qq.com> Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com>
2026-03-10 09:52:50 +08:00
parent 9216e1b050
commit 43df2cb2fc
27 changed files with 753 additions and 859 deletions
--- a/tests/e2e/multicard/2-cards/test_prefix_caching.py
+++ b/tests/e2e/multicard/2-cards/test_prefix_caching.py
@@ -11,11 +11,14 @@ MODELS = [
    # for MHA
    "Qwen/Qwen3-8B",
    # for MLA
-    "deepseek-ai/DeepSeek-V2-Lite-Chat"
+    "deepseek-ai/DeepSeek-V2-Lite-Chat",
 ]

 # A prompt containing a large markdown table. The table is randomly generated by GPT-4.
-LONG_PROMPT = "You are a helpful assistant in recognizes the content of tables in markdown format. Here is a table as follows.\n# Table\n" + """
+# ruff: noqa: E501
+LONG_PROMPT = (
+    "You are a helpful assistant in recognizes the content of tables in markdown format. Here is a table as follows.\n# Table\n"
+    + """
 | ID  | Name          | Age | Occupation    | Country       | Email                  | Phone Number   | Address                       |
 |-----|---------------|-----|---------------|---------------|------------------------|----------------|------------------------------|
 | 1   | John Doe      | 29  | Engineer      | USA           | john.doe@example.com   | 555-1234       | 123 Elm St, Springfield, IL  |
@@ -49,32 +52,34 @@ LONG_PROMPT = "You are a helpful assistant in recognizes the content of tables i
 | 29  | Amy White     | 33  | Musician      | New Zealand   | amy.w@example.com      | 555-5658       | 159 Maple St, Wellington, NZ |
 | 30  | Ben Black     | 38  | Chef          | Ireland       | ben.b@example.com      | 555-7870       | 246 Fir St, Waterford, IE    |
 """
+)

 INPUT_PROMPTS = [
-    LONG_PROMPT +
-    "Question: what is the age of John Doe? Your answer: The age of John Doe is ",
-    LONG_PROMPT +
-    "Question: what is the age of Zack Blue? Your answer: The age of Zack Blue is "
+    LONG_PROMPT + "Question: what is the age of John Doe? Your answer: The age of John Doe is ",
+    LONG_PROMPT + "Question: what is the age of Zack Blue? Your answer: The age of Zack Blue is ",
 ]


@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("max_tokens", [50])
 def test_models_prefix_cache_tp2(model: str, max_tokens: int) -> None:
-    with VllmRunner(model,
-                    max_model_len=2048,
-                    tensor_parallel_size=2,
-                    cudagraph_capture_sizes=[1, 2, 4, 8],
-                    gpu_memory_utilization=0.7) as vllm_model:
-        prefix_cache_output = vllm_model.generate_greedy(
-            INPUT_PROMPTS, max_tokens)
+    with VllmRunner(
+        model,
+        max_model_len=2048,
+        tensor_parallel_size=2,
+        cudagraph_capture_sizes=[1, 2, 4, 8],
+        gpu_memory_utilization=0.7,
+    ) as vllm_model:
+        prefix_cache_output = vllm_model.generate_greedy(INPUT_PROMPTS, max_tokens)

-    with VllmRunner(model,
-                    enable_prefix_caching=False,
-                    max_model_len=2048,
-                    tensor_parallel_size=2,
-                    cudagraph_capture_sizes=[1, 2, 4, 8],
-                    gpu_memory_utilization=0.7) as vllm_model:
+    with VllmRunner(
+        model,
+        enable_prefix_caching=False,
+        max_model_len=2048,
+        tensor_parallel_size=2,
+        cudagraph_capture_sizes=[1, 2, 4, 8],
+        gpu_memory_utilization=0.7,
+    ) as vllm_model:
        vllm_output = vllm_model.generate_greedy(INPUT_PROMPTS, max_tokens)

    check_outputs_equal(