[Lint]Style: Convert test/ to ruff format(Batch #5) (#6747)

### What this PR does / why we need it?
| File Path |
| :--- |
| `tests/e2e/singlecard/compile/backend.py` |
| `tests/e2e/singlecard/compile/test_graphex_norm_quant_fusion.py` |
| `tests/e2e/singlecard/compile/test_graphex_qknorm_rope_fusion.py` |
| `tests/e2e/singlecard/compile/test_norm_quant_fusion.py` |
| `tests/e2e/singlecard/model_runner_v2/test_basic.py` |
| `tests/e2e/singlecard/test_aclgraph_accuracy.py` |
| `tests/e2e/singlecard/test_aclgraph_batch_invariant.py` |
| `tests/e2e/singlecard/test_aclgraph_mem.py` |
| `tests/e2e/singlecard/test_async_scheduling.py` |
| `tests/e2e/singlecard/test_auto_fit_max_mode_len.py` |
| `tests/e2e/singlecard/test_batch_invariant.py` |
| `tests/e2e/singlecard/test_camem.py` |
| `tests/e2e/singlecard/test_completion_with_prompt_embeds.py` |
| `tests/e2e/singlecard/test_cpu_offloading.py` |
| `tests/e2e/singlecard/test_guided_decoding.py` |
| `tests/e2e/singlecard/test_ilama_lora.py` |
| `tests/e2e/singlecard/test_llama32_lora.py` |
| `tests/e2e/singlecard/test_models.py` |
| `tests/e2e/singlecard/test_multistream_overlap_shared_expert.py` |
| `tests/e2e/singlecard/test_quantization.py` |
| `tests/e2e/singlecard/test_qwen3_multi_loras.py` |
| `tests/e2e/singlecard/test_sampler.py` |
| `tests/e2e/singlecard/test_vlm.py` |
| `tests/e2e/singlecard/test_xlite.py` |
| `tests/e2e/singlecard/utils.py` |

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.15.0
- vLLM main:
9562912cea

---------

Signed-off-by: MrZ20 <2609716663@qq.com>
This commit is contained in:
SILONG ZENG
2026-02-24 15:50:00 +08:00
committed by GitHub
parent 747484cb64
commit 62ea664aa7
26 changed files with 859 additions and 1052 deletions

View File

@@ -20,13 +20,14 @@ Compare the outputs of vLLM with and without xlite.
Run `pytest tests/e2e/singlecard/test_xlite.py`.
"""
# ruff: noqa: E501
import os
import pytest
from vllm import SamplingParams
from tests.e2e.singlecard.utils import (PROMPTS_SHORT, LLMTestCase,
gen_and_valid)
from tests.e2e.singlecard.utils import PROMPTS_SHORT, LLMTestCase, gen_and_valid
os.environ["VLLM_ASCEND_ENABLE_NZ"] = "2"
@@ -35,9 +36,9 @@ CASE_DECODE_ONLY = LLMTestCase(
prompts=PROMPTS_SHORT,
golden_answers=[
"Hello, my name is Lina. I'm a 22-year-old student from China.",
'The president of the United States is the same as the president of the United Nations. This is because the president',
'The capital of France is Paris. The capital of France is also the capital of the French Republic.',
'The future of AI is not just a technological challenge but a profound transformation of how we live, work'
"The president of the United States is the same as the president of the United Nations. This is because the president",
"The capital of France is Paris. The capital of France is also the capital of the French Republic.",
"The future of AI is not just a technological challenge but a profound transformation of how we live, work",
],
sampling_params=SamplingParams(
max_tokens=15,
@@ -45,19 +46,22 @@ CASE_DECODE_ONLY = LLMTestCase(
top_p=1.0,
top_k=0,
n=1,
))
),
)
CASE_FULL = LLMTestCase(
model="Qwen/Qwen3-0.6B",
prompts=[
"Hello, my name is", "The president of the United States is",
"The capital of France is", "The future of AI is"
"Hello, my name is",
"The president of the United States is",
"The capital of France is",
"The future of AI is",
],
golden_answers=[
" Lina. I'm a 22-year-old student from China. I'm interested in studying in the US. I'm looking for a job in the",
' the same as the president of the United Nations. This is because the president of the United States is the same as the president of the United Nations. The president',
' Paris. The capital of Italy is Rome. The capital of Spain is Madrid. The capital of China is Beijing. The capital of Japan is Tokyo. The capital',
" not just a technological challenge but a profound transformation of how we live, work, and interact with the world. As we stand at the intersection of artificial intelligence and"
" the same as the president of the United Nations. This is because the president of the United States is the same as the president of the United Nations. The president",
" Paris. The capital of Italy is Rome. The capital of Spain is Madrid. The capital of China is Beijing. The capital of Japan is Tokyo. The capital",
" not just a technological challenge but a profound transformation of how we live, work, and interact with the world. As we stand at the intersection of artificial intelligence and",
],
sampling_params=SamplingParams(
max_tokens=32,
@@ -65,27 +69,25 @@ CASE_FULL = LLMTestCase(
top_p=1.0,
top_k=0,
n=1,
))
),
)
@pytest.mark.skip(
reason="TODO: Re-enable xlite_decode_only e2e test when stable.")
@pytest.mark.skip(reason="TODO: Re-enable xlite_decode_only e2e test when stable.")
@pytest.mark.parametrize("cur_case", [CASE_DECODE_ONLY])
def test_models_with_xlite_decode_only(cur_case: LLMTestCase):
runner_kwargs = {
"model_name": cur_case.model,
"max_model_len": 1024,
"block_size": 128,
"additional_config": {
"xlite_graph_config": {
"enabled": True
}
},
"additional_config": {"xlite_graph_config": {"enabled": True}},
}
gen_and_valid(runner_kwargs=runner_kwargs,
prompts=cur_case.prompts,
sampling_params=cur_case.sampling_params,
golden_answers=cur_case.golden_answers)
gen_and_valid(
runner_kwargs=runner_kwargs,
prompts=cur_case.prompts,
sampling_params=cur_case.sampling_params,
golden_answers=cur_case.golden_answers,
)
@pytest.mark.parametrize("cur_case", [CASE_FULL])
@@ -94,14 +96,11 @@ def test_models_with_xlite_full_mode(cur_case: LLMTestCase):
"model_name": cur_case.model,
"max_model_len": 1024,
"block_size": 128,
"additional_config": {
"xlite_graph_config": {
"enabled": True,
"full_mode": True
}
},
"additional_config": {"xlite_graph_config": {"enabled": True, "full_mode": True}},
}
gen_and_valid(runner_kwargs=runner_kwargs,
prompts=cur_case.prompts,
sampling_params=cur_case.sampling_params,
golden_answers=cur_case.golden_answers)
gen_and_valid(
runner_kwargs=runner_kwargs,
prompts=cur_case.prompts,
sampling_params=cur_case.sampling_params,
golden_answers=cur_case.golden_answers,
)