[Lint]Style: Convert root, benchmarks, tools and docs to ruff format (#5843)

### What this PR does / why we need it?
Description
This PR fixes linting issues in the root directory, benchmarks/, tools/
and docs/ to align with the project's Ruff configuration.

This is part of a gradual effort to enable full linting coverage across
the repository. The corresponding paths have been removed from the
exclude list in pyproject.toml.

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.13.0
- vLLM main:
2f4e6548ef

---------

Signed-off-by: root <root@LAPTOP-VQKDDVMG.localdomain>
Co-authored-by: root <root@LAPTOP-VQKDDVMG.localdomain>
This commit is contained in:
SILONG ZENG
2026-01-13 15:29:34 +08:00
committed by GitHub
parent 4b679984de
commit 523e83016b
14 changed files with 425 additions and 531 deletions

View File

@@ -1,5 +1,3 @@
from typing import Tuple
import numpy as np
import pytest
import torch
@@ -47,20 +45,12 @@ def get_masked_input_and_mask_ref(
num_org_vocab_padding: int,
added_vocab_start_index: int,
added_vocab_end_index: int,
) -> Tuple[torch.Tensor, torch.Tensor]:
) -> tuple[torch.Tensor, torch.Tensor]:
"""Reference implementation for verification"""
org_vocab_mask = (input_ >= org_vocab_start_index) & (input_ < org_vocab_end_index)
added_vocab_mask = (input_ >= added_vocab_start_index) & (
input_ < added_vocab_end_index
)
added_offset = (
added_vocab_start_index
- (org_vocab_end_index - org_vocab_start_index)
- num_org_vocab_padding
)
valid_offset = (org_vocab_start_index * org_vocab_mask) + (
added_offset * added_vocab_mask
)
added_vocab_mask = (input_ >= added_vocab_start_index) & (input_ < added_vocab_end_index)
added_offset = added_vocab_start_index - (org_vocab_end_index - org_vocab_start_index) - num_org_vocab_padding
valid_offset = (org_vocab_start_index * org_vocab_mask) + (added_offset * added_vocab_mask)
vocab_mask = org_vocab_mask | added_vocab_mask
masked_input = vocab_mask * (input_ - valid_offset)
return masked_input, ~vocab_mask
@@ -78,7 +68,7 @@ SEEDS = [0]
@pytest.mark.parametrize("seed", SEEDS)
@torch.inference_mode()
def test_get_masked_input_and_mask(
shape: Tuple[int, ...],
shape: tuple[int, ...],
dtype: torch.dtype,
device: str,
seed: int,

View File

@@ -59,9 +59,7 @@ def results_to_json(latency, throughput, serving):
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Process the results of the benchmark tests."
)
parser = argparse.ArgumentParser(description="Process the results of the benchmark tests.")
parser.add_argument(
"--results_folder",
type=str,
@@ -80,12 +78,8 @@ if __name__ == "__main__":
default="./perf_result_template.md",
help="The template file for the markdown report.",
)
parser.add_argument(
"--tag", default="main", help="Tag to be used for release message."
)
parser.add_argument(
"--commit_id", default="", help="Commit ID to be used for release message."
)
parser.add_argument("--tag", default="main", help="Tag to be used for release message.")
parser.add_argument("--commit_id", default="", help="Commit ID to be used for release message.")
args = parser.parse_args()
results_folder = (CUR_PATH / args.results_folder).resolve()
@@ -116,9 +110,7 @@ if __name__ == "__main__":
# get different percentiles
for perc in [10, 25, 50, 75, 90, 99]:
# Multiply 1000 to convert the time unit from s to ms
raw_result.update(
{f"P{perc}": 1000 * raw_result["percentiles"][str(perc)]}
)
raw_result.update({f"P{perc}": 1000 * raw_result["percentiles"][str(perc)]})
raw_result["avg_latency"] = raw_result["avg_latency"] * 1000
# add the result to raw_result
@@ -142,38 +134,24 @@ if __name__ == "__main__":
serving_results = pd.DataFrame.from_dict(serving_results)
throughput_results = pd.DataFrame.from_dict(throughput_results)
raw_results_json = results_to_json(
latency_results, throughput_results, serving_results
)
raw_results_json = results_to_json(latency_results, throughput_results, serving_results)
# remapping the key, for visualization purpose
if not latency_results.empty:
latency_results = latency_results[list(latency_column_mapping.keys())].rename(
columns=latency_column_mapping
)
latency_results = latency_results[list(latency_column_mapping.keys())].rename(columns=latency_column_mapping)
if not serving_results.empty:
serving_results = serving_results[list(serving_column_mapping.keys())].rename(
columns=serving_column_mapping
)
serving_results = serving_results[list(serving_column_mapping.keys())].rename(columns=serving_column_mapping)
if not throughput_results.empty:
throughput_results = throughput_results[
list(throughput_results_column_mapping.keys())
].rename(columns=throughput_results_column_mapping)
throughput_results = throughput_results[list(throughput_results_column_mapping.keys())].rename(
columns=throughput_results_column_mapping
)
processed_results_json = results_to_json(
latency_results, throughput_results, serving_results
)
processed_results_json = results_to_json(latency_results, throughput_results, serving_results)
# get markdown tables
latency_md_table = tabulate(
latency_results, headers="keys", tablefmt="pipe", showindex=False
)
serving_md_table = tabulate(
serving_results, headers="keys", tablefmt="pipe", showindex=False
)
throughput_md_table = tabulate(
throughput_results, headers="keys", tablefmt="pipe", showindex=False
)
latency_md_table = tabulate(latency_results, headers="keys", tablefmt="pipe", showindex=False)
serving_md_table = tabulate(serving_results, headers="keys", tablefmt="pipe", showindex=False)
throughput_md_table = tabulate(throughput_results, headers="keys", tablefmt="pipe", showindex=False)
# document the result
print(output_folder)