### What this PR does / why we need it?
Follow vllm-project/vllm lint way:
https://github.com/vllm-project/vllm/blob/main/.pre-commit-config.yaml
Enable pre-commit to avoid some low level error AMAP.
This pr is one step of #1241, The purpose is make linting system more
clear and convenient, on this step, Mainly did the following things:
yapf, actionlint, ruff, typos, isort, mypy, png-lint, signoff-commit,
enforce-import-regex-instead-of-re.
TODO:
- clang-format(check for csrc with google style)
need clean code, disable for now
- pymarkdown
need clean code, disable for now
- shellcheck
need clean code, disable for now
### Does this PR introduce _any_ user-facing change?
Only developer UX change:
https://vllm-ascend--1256.org.readthedocs.build/en/1256/developer_guide/contributing.html#run-lint-locally
```
pip install -r requirements-lint.txt && pre-commit install
bash format.sh
```
### How was this patch tested?
CI passed with new added/existing test.
Co-authored-by: Yikun [yikunkero@gmail.com](mailto:yikunkero@gmail.com)
Co-authored-by: wangli
[wangli858794774@gmail.com](mailto:wangli858794774@gmail.com)
- vLLM version: v0.9.1
- vLLM main:
5358cce5ff
---------
Signed-off-by: wangli <wangli858794774@gmail.com>
80 lines
2.5 KiB
Python
80 lines
2.5 KiB
Python
import os
|
|
from argparse import ArgumentParser
|
|
|
|
import libcst as cst
|
|
import libcst.matchers as m
|
|
|
|
# Patch the benchmark_dataset.py file to set streaming=False in load_dataset calls
|
|
|
|
|
|
# TODO(Potabk): Remove this patch when the issue is fixed in the upstream
|
|
class StreamingFalseTransformer(cst.CSTTransformer):
|
|
def __init__(self):
|
|
self.in_target_class = False
|
|
self.in_target_func = False
|
|
|
|
def visit_ClassDef(self, node):
|
|
if node.name.value == "HuggingFaceDataset":
|
|
self.in_target_class = True
|
|
|
|
def leave_ClassDef(self, original_node, updated_node):
|
|
self.in_target_class = False
|
|
return updated_node
|
|
|
|
def visit_FunctionDef(self, node):
|
|
if self.in_target_class and node.name.value == "load_data":
|
|
self.in_target_func = True
|
|
|
|
def leave_FunctionDef(self, original_node, updated_node):
|
|
self.in_target_func = False
|
|
return updated_node
|
|
|
|
def leave_Call(self, original_node, updated_node):
|
|
if self.in_target_class and self.in_target_func:
|
|
if m.matches(updated_node.func, m.Name("load_dataset")):
|
|
new_args = []
|
|
for arg in updated_node.args:
|
|
if arg.keyword and arg.keyword.value == "streaming":
|
|
new_arg = arg.with_changes(value=cst.Name("False"))
|
|
new_args.append(new_arg)
|
|
else:
|
|
new_args.append(arg)
|
|
return updated_node.with_changes(args=new_args)
|
|
return updated_node
|
|
|
|
|
|
def patch_file(path):
|
|
abs_path = os.path.abspath(path)
|
|
|
|
if not os.path.exists(abs_path):
|
|
print(f"File not found: {abs_path}")
|
|
return
|
|
|
|
with open(abs_path, "r", encoding="utf-8") as f:
|
|
source = f.read()
|
|
|
|
module = cst.parse_module(source)
|
|
modified = module.visit(StreamingFalseTransformer())
|
|
|
|
with open(abs_path, "w", encoding="utf-8") as f:
|
|
f.write(modified.code)
|
|
|
|
print(f"Patched: {abs_path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = ArgumentParser(
|
|
description="Patch benchmark_dataset.py to set streaming=False in load_dataset calls"
|
|
)
|
|
parser.add_argument(
|
|
"--path", type=str, help="Path to the benchmark_dataset.py file"
|
|
)
|
|
parser.add_argument(
|
|
"--path",
|
|
type=str,
|
|
default="/vllm-workspace/vllm/vllm/benchmarks/datasets.py",
|
|
help="Path to the benchmark_dataset.py file",
|
|
)
|
|
args = parser.parse_args()
|
|
patch_file(args.path)
|