[Lint]Style: Convert test/ to ruff format(Batch #5) (#6747)

### What this PR does / why we need it? | File Path | | :--- | | `tests/e2e/singlecard/compile/backend.py` | | `tests/e2e/singlecard/compile/test_graphex_norm_quant_fusion.py` | | `tests/e2e/singlecard/compile/test_graphex_qknorm_rope_fusion.py` | | `tests/e2e/singlecard/compile/test_norm_quant_fusion.py` | | `tests/e2e/singlecard/model_runner_v2/test_basic.py` | | `tests/e2e/singlecard/test_aclgraph_accuracy.py` | | `tests/e2e/singlecard/test_aclgraph_batch_invariant.py` | | `tests/e2e/singlecard/test_aclgraph_mem.py` | | `tests/e2e/singlecard/test_async_scheduling.py` | | `tests/e2e/singlecard/test_auto_fit_max_mode_len.py` | | `tests/e2e/singlecard/test_batch_invariant.py` | | `tests/e2e/singlecard/test_camem.py` | | `tests/e2e/singlecard/test_completion_with_prompt_embeds.py` | | `tests/e2e/singlecard/test_cpu_offloading.py` | | `tests/e2e/singlecard/test_guided_decoding.py` | | `tests/e2e/singlecard/test_ilama_lora.py` | | `tests/e2e/singlecard/test_llama32_lora.py` | | `tests/e2e/singlecard/test_models.py` | | `tests/e2e/singlecard/test_multistream_overlap_shared_expert.py` | | `tests/e2e/singlecard/test_quantization.py` | | `tests/e2e/singlecard/test_qwen3_multi_loras.py` | | `tests/e2e/singlecard/test_sampler.py` | | `tests/e2e/singlecard/test_vlm.py` | | `tests/e2e/singlecard/test_xlite.py` | | `tests/e2e/singlecard/utils.py` | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: 9562912cea --------- Signed-off-by: MrZ20 <2609716663@qq.com>
2026-02-24 15:50:00 +08:00
parent 747484cb64
commit 62ea664aa7
26 changed files with 859 additions and 1052 deletions
--- a/tests/e2e/singlecard/test_async_scheduling.py
+++ b/tests/e2e/singlecard/test_async_scheduling.py
@@ -15,8 +15,7 @@ from tests.e2e.model_utils import check_outputs_equal
 MODEL = "Qwen/Qwen3-0.6B"
 MTP_MODEL = "wemaster/deepseek_mtp_main_random_bf16"

-first_prompt = ("The following numbers of the sequence " +
-                ", ".join(str(i) for i in range(10)) + " are:")
+first_prompt = "The following numbers of the sequence " + ", ".join(str(i) for i in range(10)) + " are:"
 example_prompts = [
    "Hello, my name is",
    "The president of the United States is",
@@ -31,7 +30,9 @@ default_params = dict(
 )


-def test_without_spec_decoding(monkeypatch: pytest.MonkeyPatch, ):
+def test_without_spec_decoding(
+    monkeypatch: pytest.MonkeyPatch,
+):
    """Test consistency of combos of async scheduling, preemption,
    uni/multiproc executor, prefill chunking."""
    test_sampling_params: list[dict[str, Any]] = [
@@ -85,11 +86,11 @@ def run_tests(
        # avoid precision errors
        outputs: list[tuple[str, list, list]] = []
        for n, (
-                test_preemption,
-                executor,
-                async_scheduling,
-                spec_config,
-                test_prefill_chunking,
+            test_preemption,
+            executor,
+            async_scheduling,
+            spec_config,
+            test_prefill_chunking,
        ) in enumerate(test_configs, 1):
            test_str = f"{n}/{len(test_configs)}"
            test_results = run_test(
@@ -105,21 +106,18 @@ def run_tests(
            outputs.append(test_results)

    baseline_config, baseline_tests, _ = outputs[0]
-    _, _, baseline_acceptances = next((o for o in outputs if o[2] is not None),
-                                      (None, None, None))
+    _, _, baseline_acceptances = next((o for o in outputs if o[2] is not None), (None, None, None))

-    print(
-        f"BASELINE: config=[{baseline_config}], accept_rates={baseline_acceptances}"
-    )
+    print(f"BASELINE: config=[{baseline_config}], accept_rates={baseline_acceptances}")

    failure = None
    for test_config, test_outputs, test_acceptance_rates in outputs[1:]:
        for base_outs, base_acceptance_rate, test_outs, test_acceptance_rate, params in zip(
-                baseline_tests,
-                baseline_acceptances or repeat(None),
-                test_outputs,
-                test_acceptance_rates or repeat(None),
-                test_sampling_params,
+            baseline_tests,
+            baseline_acceptances or repeat(None),
+            test_outputs,
+            test_acceptance_rates or repeat(None),
+            test_sampling_params,
        ):
            try:
                check_outputs_equal(
@@ -129,21 +127,18 @@ def run_tests(
                    name_1=f"config=[{test_config}], params={params}",
                )

-                if (base_acceptance_rate is not None
-                        and test_acceptance_rate is not None):
+                if base_acceptance_rate is not None and test_acceptance_rate is not None:
                    if "spec_mml=None" in test_config:
-                        assert (test_acceptance_rate > base_acceptance_rate
-                                or test_acceptance_rate == pytest.approx(
-                                    base_acceptance_rate, rel=5e-2))
+                        assert test_acceptance_rate > base_acceptance_rate or test_acceptance_rate == pytest.approx(
+                            base_acceptance_rate, rel=5e-2
+                        )
                    else:
                        # Currently the reported acceptance rate is expected to be
                        # lower when we sometimes skip drafting altogether.
                        assert test_acceptance_rate > 0.1
-                print(f"PASSED: config=[{test_config}], params={params}"
-                      f" accept_rate={test_acceptance_rate}")
+                print(f"PASSED: config=[{test_config}], params={params} accept_rate={test_acceptance_rate}")
            except AssertionError as e:
-                print(f"FAILED: config=[{test_config}], params={params}"
-                      f" accept_rate={test_acceptance_rate}")
+                print(f"FAILED: config=[{test_config}], params={params} accept_rate={test_acceptance_rate}")
                if failure is None:
                    failure = e

@@ -161,33 +156,35 @@ def run_test(
    spec_config: dict[str, Any] | None,
    test_prefill_chunking: bool,
 ):
-    os.environ['VLLM_WORKER_MULTIPROC_METHOD'] = 'spawn'
+    os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
    spec_decoding = spec_config is not None
    cache_arg: dict[str, Any] = (
        # Force preemptions
-        dict(num_gpu_blocks_override=2) if test_preemption else dict(
-            gpu_memory_utilization=0.9))
+        dict(num_gpu_blocks_override=2) if test_preemption else dict(gpu_memory_utilization=0.9)
+    )
    spec_mml = (spec_config or {}).get("max_model_len")
-    test_config = (f"executor={executor}, preemption={test_preemption}, "
-                   f"async_sched={async_scheduling}, "
-                   f"chunk_prefill={test_prefill_chunking}, "
-                   f"spec_decoding={spec_decoding}, spec_mml={spec_mml}")
+    test_config = (
+        f"executor={executor}, preemption={test_preemption}, "
+        f"async_sched={async_scheduling}, "
+        f"chunk_prefill={test_prefill_chunking}, "
+        f"spec_decoding={spec_decoding}, spec_mml={spec_mml}"
+    )
    print("-" * 80)
    print(f"---- TESTING {test_str}: {test_config}")
    print("-" * 80)
    with VllmRunner(
-            model,
-            max_model_len=512,
-            enable_chunked_prefill=test_prefill_chunking,
-            # Force prefill chunking
-            max_num_batched_tokens=48 if test_prefill_chunking else None,
-            enforce_eager=True,
-            async_scheduling=async_scheduling,
-            distributed_executor_backend=executor,
-            dtype="float16",  # avoid precision errors
-            speculative_config=spec_config,
-            disable_log_stats=False,
-            **cache_arg,
+        model,
+        max_model_len=512,
+        enable_chunked_prefill=test_prefill_chunking,
+        # Force prefill chunking
+        max_num_batched_tokens=48 if test_prefill_chunking else None,
+        enforce_eager=True,
+        async_scheduling=async_scheduling,
+        distributed_executor_backend=executor,
+        dtype="float16",  # avoid precision errors
+        speculative_config=spec_config,
+        disable_log_stats=False,
+        **cache_arg,
    ) as vllm_model:
        results = []
        acceptance_rates: list[float] | None = [] if spec_decoding else None
@@ -197,26 +194,23 @@ def run_test(
            results.append(
                vllm_model.generate(
                    example_prompts,
-                    sampling_params=SamplingParams(**default_params,
-                                                   **override_params),
-                ))
+                    sampling_params=SamplingParams(**default_params, **override_params),
+                )
+            )
            metrics_after = vllm_model.model.get_metrics()
            if acceptance_rates is not None:
-                acceptance_rate = _get_acceptance_rate(metrics_before,
-                                                       metrics_after)
+                acceptance_rate = _get_acceptance_rate(metrics_before, metrics_after)
                acceptance_rates.append(acceptance_rate)
                print(f"ACCEPTANCE RATE {acceptance_rate}")

            if test_preemption:
-                preemptions = _get_count(metrics_before, metrics_after,
-                                         "vllm:num_preemptions")
+                preemptions = _get_count(metrics_before, metrics_after, "vllm:num_preemptions")
                assert preemptions > 0, "preemption test had no preemptions"

    if len(results) > 1:
        # First check that the different parameter configs
        # actually result in different output.
-        for other_test_outs, params in zip(results[1:],
-                                           sampling_param_tests[1:]):
+        for other_test_outs, params in zip(results[1:], sampling_param_tests[1:]):
            with pytest.raises(AssertionError):
                check_outputs_equal(
                    outputs_0_lst=results[0][0],