[Lint]Style: Convert root, benchmarks, tools and docs to ruff format (#5843)
### What this PR does / why we need it?
Description
This PR fixes linting issues in the root directory, benchmarks/, tools/
and docs/ to align with the project's Ruff configuration.
This is part of a gradual effort to enable full linting coverage across
the repository. The corresponding paths have been removed from the
exclude list in pyproject.toml.
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: v0.13.0
- vLLM main:
2f4e6548ef
---------
Signed-off-by: root <root@LAPTOP-VQKDDVMG.localdomain>
Co-authored-by: root <root@LAPTOP-VQKDDVMG.localdomain>
This commit is contained in:
@@ -1,5 +1,3 @@
|
||||
from typing import Tuple
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import torch
|
||||
@@ -47,20 +45,12 @@ def get_masked_input_and_mask_ref(
|
||||
num_org_vocab_padding: int,
|
||||
added_vocab_start_index: int,
|
||||
added_vocab_end_index: int,
|
||||
) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
) -> tuple[torch.Tensor, torch.Tensor]:
|
||||
"""Reference implementation for verification"""
|
||||
org_vocab_mask = (input_ >= org_vocab_start_index) & (input_ < org_vocab_end_index)
|
||||
added_vocab_mask = (input_ >= added_vocab_start_index) & (
|
||||
input_ < added_vocab_end_index
|
||||
)
|
||||
added_offset = (
|
||||
added_vocab_start_index
|
||||
- (org_vocab_end_index - org_vocab_start_index)
|
||||
- num_org_vocab_padding
|
||||
)
|
||||
valid_offset = (org_vocab_start_index * org_vocab_mask) + (
|
||||
added_offset * added_vocab_mask
|
||||
)
|
||||
added_vocab_mask = (input_ >= added_vocab_start_index) & (input_ < added_vocab_end_index)
|
||||
added_offset = added_vocab_start_index - (org_vocab_end_index - org_vocab_start_index) - num_org_vocab_padding
|
||||
valid_offset = (org_vocab_start_index * org_vocab_mask) + (added_offset * added_vocab_mask)
|
||||
vocab_mask = org_vocab_mask | added_vocab_mask
|
||||
masked_input = vocab_mask * (input_ - valid_offset)
|
||||
return masked_input, ~vocab_mask
|
||||
@@ -78,7 +68,7 @@ SEEDS = [0]
|
||||
@pytest.mark.parametrize("seed", SEEDS)
|
||||
@torch.inference_mode()
|
||||
def test_get_masked_input_and_mask(
|
||||
shape: Tuple[int, ...],
|
||||
shape: tuple[int, ...],
|
||||
dtype: torch.dtype,
|
||||
device: str,
|
||||
seed: int,
|
||||
|
||||
@@ -59,9 +59,7 @@ def results_to_json(latency, throughput, serving):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Process the results of the benchmark tests."
|
||||
)
|
||||
parser = argparse.ArgumentParser(description="Process the results of the benchmark tests.")
|
||||
parser.add_argument(
|
||||
"--results_folder",
|
||||
type=str,
|
||||
@@ -80,12 +78,8 @@ if __name__ == "__main__":
|
||||
default="./perf_result_template.md",
|
||||
help="The template file for the markdown report.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tag", default="main", help="Tag to be used for release message."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--commit_id", default="", help="Commit ID to be used for release message."
|
||||
)
|
||||
parser.add_argument("--tag", default="main", help="Tag to be used for release message.")
|
||||
parser.add_argument("--commit_id", default="", help="Commit ID to be used for release message.")
|
||||
|
||||
args = parser.parse_args()
|
||||
results_folder = (CUR_PATH / args.results_folder).resolve()
|
||||
@@ -116,9 +110,7 @@ if __name__ == "__main__":
|
||||
# get different percentiles
|
||||
for perc in [10, 25, 50, 75, 90, 99]:
|
||||
# Multiply 1000 to convert the time unit from s to ms
|
||||
raw_result.update(
|
||||
{f"P{perc}": 1000 * raw_result["percentiles"][str(perc)]}
|
||||
)
|
||||
raw_result.update({f"P{perc}": 1000 * raw_result["percentiles"][str(perc)]})
|
||||
raw_result["avg_latency"] = raw_result["avg_latency"] * 1000
|
||||
|
||||
# add the result to raw_result
|
||||
@@ -142,38 +134,24 @@ if __name__ == "__main__":
|
||||
serving_results = pd.DataFrame.from_dict(serving_results)
|
||||
throughput_results = pd.DataFrame.from_dict(throughput_results)
|
||||
|
||||
raw_results_json = results_to_json(
|
||||
latency_results, throughput_results, serving_results
|
||||
)
|
||||
raw_results_json = results_to_json(latency_results, throughput_results, serving_results)
|
||||
|
||||
# remapping the key, for visualization purpose
|
||||
if not latency_results.empty:
|
||||
latency_results = latency_results[list(latency_column_mapping.keys())].rename(
|
||||
columns=latency_column_mapping
|
||||
)
|
||||
latency_results = latency_results[list(latency_column_mapping.keys())].rename(columns=latency_column_mapping)
|
||||
if not serving_results.empty:
|
||||
serving_results = serving_results[list(serving_column_mapping.keys())].rename(
|
||||
columns=serving_column_mapping
|
||||
)
|
||||
serving_results = serving_results[list(serving_column_mapping.keys())].rename(columns=serving_column_mapping)
|
||||
if not throughput_results.empty:
|
||||
throughput_results = throughput_results[
|
||||
list(throughput_results_column_mapping.keys())
|
||||
].rename(columns=throughput_results_column_mapping)
|
||||
throughput_results = throughput_results[list(throughput_results_column_mapping.keys())].rename(
|
||||
columns=throughput_results_column_mapping
|
||||
)
|
||||
|
||||
processed_results_json = results_to_json(
|
||||
latency_results, throughput_results, serving_results
|
||||
)
|
||||
processed_results_json = results_to_json(latency_results, throughput_results, serving_results)
|
||||
|
||||
# get markdown tables
|
||||
latency_md_table = tabulate(
|
||||
latency_results, headers="keys", tablefmt="pipe", showindex=False
|
||||
)
|
||||
serving_md_table = tabulate(
|
||||
serving_results, headers="keys", tablefmt="pipe", showindex=False
|
||||
)
|
||||
throughput_md_table = tabulate(
|
||||
throughput_results, headers="keys", tablefmt="pipe", showindex=False
|
||||
)
|
||||
latency_md_table = tabulate(latency_results, headers="keys", tablefmt="pipe", showindex=False)
|
||||
serving_md_table = tabulate(serving_results, headers="keys", tablefmt="pipe", showindex=False)
|
||||
throughput_md_table = tabulate(throughput_results, headers="keys", tablefmt="pipe", showindex=False)
|
||||
|
||||
# document the result
|
||||
print(output_folder)
|
||||
|
||||
Reference in New Issue
Block a user