[Lint]Style: Convert test/ to ruff format(Batch #5) (#6747)

### What this PR does / why we need it?
| File Path |
| :--- |
| `tests/e2e/singlecard/compile/backend.py` |
| `tests/e2e/singlecard/compile/test_graphex_norm_quant_fusion.py` |
| `tests/e2e/singlecard/compile/test_graphex_qknorm_rope_fusion.py` |
| `tests/e2e/singlecard/compile/test_norm_quant_fusion.py` |
| `tests/e2e/singlecard/model_runner_v2/test_basic.py` |
| `tests/e2e/singlecard/test_aclgraph_accuracy.py` |
| `tests/e2e/singlecard/test_aclgraph_batch_invariant.py` |
| `tests/e2e/singlecard/test_aclgraph_mem.py` |
| `tests/e2e/singlecard/test_async_scheduling.py` |
| `tests/e2e/singlecard/test_auto_fit_max_mode_len.py` |
| `tests/e2e/singlecard/test_batch_invariant.py` |
| `tests/e2e/singlecard/test_camem.py` |
| `tests/e2e/singlecard/test_completion_with_prompt_embeds.py` |
| `tests/e2e/singlecard/test_cpu_offloading.py` |
| `tests/e2e/singlecard/test_guided_decoding.py` |
| `tests/e2e/singlecard/test_ilama_lora.py` |
| `tests/e2e/singlecard/test_llama32_lora.py` |
| `tests/e2e/singlecard/test_models.py` |
| `tests/e2e/singlecard/test_multistream_overlap_shared_expert.py` |
| `tests/e2e/singlecard/test_quantization.py` |
| `tests/e2e/singlecard/test_qwen3_multi_loras.py` |
| `tests/e2e/singlecard/test_sampler.py` |
| `tests/e2e/singlecard/test_vlm.py` |
| `tests/e2e/singlecard/test_xlite.py` |
| `tests/e2e/singlecard/utils.py` |

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.15.0
- vLLM main:
9562912cea

---------

Signed-off-by: MrZ20 <2609716663@qq.com>
This commit is contained in:
SILONG ZENG
2026-02-24 15:50:00 +08:00
committed by GitHub
parent 747484cb64
commit 62ea664aa7
26 changed files with 859 additions and 1052 deletions

View File

@@ -40,7 +40,6 @@ def test_aclgraph_mem_use(model: str, max_tokens: int) -> None:
capture_mem_after = multiprocessing.Value("q", -1) # long long
def capture_model_wrapper(original_method):
def wrapped(self):
mem_before = torch.npu.mem_get_info()[0] # free memory
result = original_method(self)
@@ -55,19 +54,16 @@ def test_aclgraph_mem_use(model: str, max_tokens: int) -> None:
original_capture = NPUModelRunner.capture_model
with patch.object(NPUModelRunner,
'capture_model',
new=capture_model_wrapper(original_capture)):
with patch.object(NPUModelRunner, "capture_model", new=capture_model_wrapper(original_capture)):
prompts = [
"Hello, my name is", "The president of the United States is",
"The capital of France is", "The future of AI is"
"Hello, my name is",
"The president of the United States is",
"The capital of France is",
"The future of AI is",
]
sampling_params = SamplingParams(max_tokens=max_tokens,
temperature=0.0)
sampling_params = SamplingParams(max_tokens=max_tokens, temperature=0.0)
if model == "vllm-ascend/DeepSeek-V2-Lite-W8A8":
vllm_model = VllmRunner(model,
max_model_len=1024,
quantization="ascend")
vllm_model = VllmRunner(model, max_model_len=1024, quantization="ascend")
else:
vllm_model = VllmRunner(model)
_ = vllm_model.generate(prompts, sampling_params)
@@ -94,5 +90,6 @@ def test_aclgraph_mem_use(model: str, max_tokens: int) -> None:
assert mem_used_by_capture < max_mem_expected, (
f"capture_model used more memory than expected. "
f"Used: {mem_used_by_capture / (1024**3):.2f} GiB, "
f"Expected: < {max_capture_mem_gib:.2f} GiB")
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = 'spawn'
f"Expected: < {max_capture_mem_gib:.2f} GiB"
)
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"