[Lint]Style: Convert test/ to ruff format(Batch #1) (#6738)

### What this PR does / why we need it?
**Scope of Changes**:
| File Path |
| :--- |
| `tests/e2e/310p/multicard/test_vl_model_multicard.py` |
| `tests/e2e/310p/singlecard/test_vl_model_singlecard.py` |
| `tests/e2e/310p/test_utils.py` |
| `tests/e2e/conftest.py` |
| `tests/e2e/model_utils.py` |
| `tests/e2e/models/conftest.py` |
| `tests/e2e/models/test_lm_eval_correctness.py` |
| `tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py` |
| `tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py` |
| `tests/e2e/multicard/2-cards/test_data_parallel.py` |
| `tests/e2e/multicard/2-cards/test_disaggregated_encoder.py` |
| `tests/e2e/multicard/2-cards/test_expert_parallel.py` |
| `tests/e2e/multicard/2-cards/test_external_launcher.py` |
| `tests/e2e/multicard/2-cards/test_full_graph_mode.py` |
| `tests/e2e/multicard/2-cards/test_ilama_lora_tp2.py` |
| `tests/e2e/multicard/2-cards/test_offline_inference_distributed.py` |
| `tests/e2e/multicard/2-cards/test_offline_weight_load.py` |
| `tests/e2e/multicard/2-cards/test_pipeline_parallel.py` |
| `tests/e2e/multicard/2-cards/test_prefix_caching.py` |
| `tests/e2e/multicard/2-cards/test_quantization.py` |
| `tests/e2e/multicard/2-cards/test_qwen3_moe.py` |
| `tests/e2e/multicard/2-cards/test_qwen3_moe_routing_replay.py` |
| `tests/e2e/multicard/2-cards/test_qwen3_performance.py` |
| `tests/e2e/multicard/2-cards/test_shared_expert_dp.py` |
| `tests/e2e/multicard/2-cards/test_single_request_aclgraph.py` |
| `tests/e2e/multicard/2-cards/test_sp_pass.py` |

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.15.0
- vLLM main:
9562912cea

Signed-off-by: MrZ20 <2609716663@qq.com>
Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
SILONG ZENG
2026-03-10 09:52:50 +08:00
committed by GitHub
parent 9216e1b050
commit 43df2cb2fc
27 changed files with 753 additions and 859 deletions

View File

@@ -13,69 +13,65 @@ MODELS = [
@pytest.mark.parametrize("model", MODELS)
def test_deepseek_v2_lite_enable_shared_expert_dp_tp2(model: str) -> None:
if 'HCCL_OP_EXPANSION_MODE' in os.environ:
del os.environ['HCCL_OP_EXPANSION_MODE']
if "HCCL_OP_EXPANSION_MODE" in os.environ:
del os.environ["HCCL_OP_EXPANSION_MODE"]
prompts = [
"Hello, my name is", "The capital of the United States is",
"The capital of France is", "The future of AI is"
"Hello, my name is",
"The capital of the United States is",
"The capital of France is",
"The future of AI is",
]
sampling_params = SamplingParams(max_tokens=32, temperature=0.0)
with VllmRunner(
model,
max_model_len=1024,
enforce_eager=True,
tensor_parallel_size=2,
enable_expert_parallel=True,
model,
max_model_len=1024,
enforce_eager=True,
tensor_parallel_size=2,
enable_expert_parallel=True,
) as runner:
vllm_eager_outputs = runner.model.generate(prompts, sampling_params)
os.environ["VLLM_ASCEND_ENABLE_FLASHCOMM1"] = "1"
with VllmRunner(
model,
max_model_len=1024,
enforce_eager=True,
tensor_parallel_size=2,
enable_expert_parallel=True,
additional_config={
"enable_shared_expert_dp": True,
},
model,
max_model_len=1024,
enforce_eager=True,
tensor_parallel_size=2,
enable_expert_parallel=True,
additional_config={
"enable_shared_expert_dp": True,
},
) as runner:
shared_expert_dp_eager_outputs = runner.model.generate(
prompts, sampling_params)
shared_expert_dp_eager_outputs = runner.model.generate(prompts, sampling_params)
with VllmRunner(
model,
max_model_len=1024,
tensor_parallel_size=2,
enable_expert_parallel=True,
compilation_config={
"cudagraph_capture_sizes": [1, 4, 8, 16],
"cudagraph_mode": "FULL_DECODE_ONLY",
},
additional_config={
"enable_shared_expert_dp": True,
},
model,
max_model_len=1024,
tensor_parallel_size=2,
enable_expert_parallel=True,
compilation_config={
"cudagraph_capture_sizes": [1, 4, 8, 16],
"cudagraph_mode": "FULL_DECODE_ONLY",
},
additional_config={
"enable_shared_expert_dp": True,
},
) as runner:
shared_expert_dp_aclgraph_outputs = runner.model.generate(
prompts, sampling_params)
shared_expert_dp_aclgraph_outputs = runner.model.generate(prompts, sampling_params)
vllm_eager_outputs_list = []
for output in vllm_eager_outputs:
vllm_eager_outputs_list.append(
(output.outputs[0].index, output.outputs[0].text))
vllm_eager_outputs_list.append((output.outputs[0].index, output.outputs[0].text))
shared_expert_dp_eager_outputs_list = []
for output in shared_expert_dp_eager_outputs:
shared_expert_dp_eager_outputs_list.append(
(output.outputs[0].index, output.outputs[0].text))
shared_expert_dp_eager_outputs_list.append((output.outputs[0].index, output.outputs[0].text))
shared_expert_dp_aclgraph_outputs_list = []
for output in shared_expert_dp_aclgraph_outputs:
shared_expert_dp_aclgraph_outputs_list.append(
(output.outputs[0].index, output.outputs[0].text))
shared_expert_dp_aclgraph_outputs_list.append((output.outputs[0].index, output.outputs[0].text))
check_outputs_equal(
outputs_0_lst=vllm_eager_outputs_list,