[Lint]Style: Convert test/ to ruff format(Batch #1) (#6738)

### What this PR does / why we need it?
**Scope of Changes**:
| File Path |
| :--- |
| `tests/e2e/310p/multicard/test_vl_model_multicard.py` |
| `tests/e2e/310p/singlecard/test_vl_model_singlecard.py` |
| `tests/e2e/310p/test_utils.py` |
| `tests/e2e/conftest.py` |
| `tests/e2e/model_utils.py` |
| `tests/e2e/models/conftest.py` |
| `tests/e2e/models/test_lm_eval_correctness.py` |
| `tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py` |
| `tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py` |
| `tests/e2e/multicard/2-cards/test_data_parallel.py` |
| `tests/e2e/multicard/2-cards/test_disaggregated_encoder.py` |
| `tests/e2e/multicard/2-cards/test_expert_parallel.py` |
| `tests/e2e/multicard/2-cards/test_external_launcher.py` |
| `tests/e2e/multicard/2-cards/test_full_graph_mode.py` |
| `tests/e2e/multicard/2-cards/test_ilama_lora_tp2.py` |
| `tests/e2e/multicard/2-cards/test_offline_inference_distributed.py` |
| `tests/e2e/multicard/2-cards/test_offline_weight_load.py` |
| `tests/e2e/multicard/2-cards/test_pipeline_parallel.py` |
| `tests/e2e/multicard/2-cards/test_prefix_caching.py` |
| `tests/e2e/multicard/2-cards/test_quantization.py` |
| `tests/e2e/multicard/2-cards/test_qwen3_moe.py` |
| `tests/e2e/multicard/2-cards/test_qwen3_moe_routing_replay.py` |
| `tests/e2e/multicard/2-cards/test_qwen3_performance.py` |
| `tests/e2e/multicard/2-cards/test_shared_expert_dp.py` |
| `tests/e2e/multicard/2-cards/test_single_request_aclgraph.py` |
| `tests/e2e/multicard/2-cards/test_sp_pass.py` |

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.15.0
- vLLM main:
9562912cea

Signed-off-by: MrZ20 <2609716663@qq.com>
Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
SILONG ZENG
2026-03-10 09:52:50 +08:00
committed by GitHub
parent 9216e1b050
commit 43df2cb2fc
27 changed files with 753 additions and 859 deletions

View File

@@ -18,15 +18,12 @@
from __future__ import annotations
import math
import os
import random
from typing import Any, Union
from unittest.mock import patch
import pytest
from transformers import AutoTokenizer
from vllm import LLM, SamplingParams
from vllm import SamplingParams
from vllm.config import CompilationConfig
from vllm.v1.metrics.reader import Counter, Vector
@@ -101,7 +98,8 @@ def test_eagle3_sp_acceptance(
[prompt],
tokenize=False,
add_generation_prompt=True,
) for prompt in prompts
)
for prompt in prompts
]
speculative_config = {
@@ -112,21 +110,20 @@ def test_eagle3_sp_acceptance(
"model": spec_model_name,
}
compilation_config = CompilationConfig(cudagraph_mode="FULL_DECODE_ONLY",
cudagraph_capture_sizes=[12])
compilation_config = CompilationConfig(cudagraph_mode="FULL_DECODE_ONLY", cudagraph_capture_sizes=[12])
with VllmRunner(
main_model_name,
enforce_eager=True,
max_model_len=8192,
disable_log_stats=False,
tensor_parallel_size=2,
max_num_seqs=256,
distributed_executor_backend="mp",
gpu_memory_utilization=0.7,
speculative_config=speculative_config,
compilation_config=compilation_config,
async_scheduling=async_scheduling,
main_model_name,
enforce_eager=True,
max_model_len=8192,
disable_log_stats=False,
tensor_parallel_size=2,
max_num_seqs=256,
distributed_executor_backend="mp",
gpu_memory_utilization=0.7,
speculative_config=speculative_config,
compilation_config=compilation_config,
async_scheduling=async_scheduling,
) as llm:
_ = llm.generate(prompts, sampling_params)
metrics = llm.model.get_metrics()
@@ -142,10 +139,7 @@ def test_eagle3_sp_acceptance(
for pos in range(len(metric.values)):
num_accepted_tokens_per_pos[pos] += metric.values[pos]
acceptance_per_pos = [
num_accepted_tokens / num_drafts
for num_accepted_tokens in num_accepted_tokens_per_pos
]
acceptance_per_pos = [num_accepted_tokens / num_drafts for num_accepted_tokens in num_accepted_tokens_per_pos]
golden = BASELINES_SP[method]
match = all(abs(a - b) < 0.06 for a, b in zip(acceptance_per_pos, golden))