[Lint]Style: Convert test/ to ruff format(Batch #5) (#6747)

### What this PR does / why we need it?
| File Path |
| :--- |
| `tests/e2e/singlecard/compile/backend.py` |
| `tests/e2e/singlecard/compile/test_graphex_norm_quant_fusion.py` |
| `tests/e2e/singlecard/compile/test_graphex_qknorm_rope_fusion.py` |
| `tests/e2e/singlecard/compile/test_norm_quant_fusion.py` |
| `tests/e2e/singlecard/model_runner_v2/test_basic.py` |
| `tests/e2e/singlecard/test_aclgraph_accuracy.py` |
| `tests/e2e/singlecard/test_aclgraph_batch_invariant.py` |
| `tests/e2e/singlecard/test_aclgraph_mem.py` |
| `tests/e2e/singlecard/test_async_scheduling.py` |
| `tests/e2e/singlecard/test_auto_fit_max_mode_len.py` |
| `tests/e2e/singlecard/test_batch_invariant.py` |
| `tests/e2e/singlecard/test_camem.py` |
| `tests/e2e/singlecard/test_completion_with_prompt_embeds.py` |
| `tests/e2e/singlecard/test_cpu_offloading.py` |
| `tests/e2e/singlecard/test_guided_decoding.py` |
| `tests/e2e/singlecard/test_ilama_lora.py` |
| `tests/e2e/singlecard/test_llama32_lora.py` |
| `tests/e2e/singlecard/test_models.py` |
| `tests/e2e/singlecard/test_multistream_overlap_shared_expert.py` |
| `tests/e2e/singlecard/test_quantization.py` |
| `tests/e2e/singlecard/test_qwen3_multi_loras.py` |
| `tests/e2e/singlecard/test_sampler.py` |
| `tests/e2e/singlecard/test_vlm.py` |
| `tests/e2e/singlecard/test_xlite.py` |
| `tests/e2e/singlecard/utils.py` |

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.15.0
- vLLM main:
9562912cea

---------

Signed-off-by: MrZ20 <2609716663@qq.com>
This commit is contained in:
SILONG ZENG
2026-02-24 15:50:00 +08:00
committed by GitHub
parent 747484cb64
commit 62ea664aa7
26 changed files with 859 additions and 1052 deletions

View File

@@ -39,59 +39,56 @@ def test_models_with_multistream_overlap_shared_expert(
max_tokens: int,
) -> None:
prompts = [
"Hello, my name is", "The president of the United States is",
"The capital of France is", "The future of AI is"
"Hello, my name is",
"The president of the United States is",
"The capital of France is",
"The future of AI is",
]
sampling_params = SamplingParams(max_tokens=max_tokens, temperature=0.0)
with VllmRunner(
model,
max_model_len=1024,
enforce_eager=True,
cudagraph_capture_sizes=[4, 8, 16, 32],
additional_config={
"multistream_overlap_shared_expert": True,
},
quantization="ascend",
model,
max_model_len=1024,
enforce_eager=True,
cudagraph_capture_sizes=[4, 8, 16, 32],
additional_config={
"multistream_overlap_shared_expert": True,
},
quantization="ascend",
) as runner:
vllm_moe_ms_eager_outputs = runner.model.generate(
prompts, sampling_params)
vllm_moe_ms_eager_outputs = runner.model.generate(prompts, sampling_params)
with VllmRunner(
model,
max_model_len=1024,
cudagraph_capture_sizes=[4, 8, 16, 32],
additional_config={
"multistream_overlap_shared_expert": True,
},
quantization="ascend",
model,
max_model_len=1024,
cudagraph_capture_sizes=[4, 8, 16, 32],
additional_config={
"multistream_overlap_shared_expert": True,
},
quantization="ascend",
) as runner:
vllm_moe_ms_aclgraph_outputs = runner.model.generate(
prompts, sampling_params)
vllm_moe_ms_aclgraph_outputs = runner.model.generate(prompts, sampling_params)
with VllmRunner(
model,
max_model_len=1024,
enforce_eager=True,
cudagraph_capture_sizes=[4, 8, 16, 32],
quantization="ascend",
model,
max_model_len=1024,
enforce_eager=True,
cudagraph_capture_sizes=[4, 8, 16, 32],
quantization="ascend",
) as runner:
vllm_eager_outputs = runner.model.generate(prompts, sampling_params)
vllm_moe_ms_eager_outputs_list = []
for output in vllm_moe_ms_eager_outputs:
vllm_moe_ms_eager_outputs_list.append(
(output.outputs[0].index, output.outputs[0].text))
vllm_moe_ms_eager_outputs_list.append((output.outputs[0].index, output.outputs[0].text))
vllm_moe_ms_aclgraph_outputs_list = []
for output in vllm_moe_ms_aclgraph_outputs:
vllm_moe_ms_aclgraph_outputs_list.append(
(output.outputs[0].index, output.outputs[0].text))
vllm_moe_ms_aclgraph_outputs_list.append((output.outputs[0].index, output.outputs[0].text))
vllm_eager_outputs_list = []
for output in vllm_eager_outputs:
vllm_eager_outputs_list.append(
(output.outputs[0].index, output.outputs[0].text))
vllm_eager_outputs_list.append((output.outputs[0].index, output.outputs[0].text))
check_outputs_equal(
outputs_0_lst=vllm_eager_outputs_list,