[Lint]Add lint hooks for clang-format, shellcheck, forbidden imports, and boolean context manager checks (#7511)

### What this PR does / why we need it?
This PR introduces several upstream `vllm`-aligned lint hooks into
`vllm-ascend` and makes them part of the actual `pre-commit` flow.

Main changes in this PR:
- add `check-boolean-context-manager` to catch boolean expressions in
`with` statements
- add `check-forbidden-imports` to forbid direct `re` imports and
disallowed direct `triton` imports
- enable shell script linting through `tools/shellcheck.sh`
- add root `.clang-format` aligned with upstream `vllm`, enable
`clang-format` in `pre-commit`, temporarily **exclude all `csrc/**`**
from `clang-format` to avoid bringing a large native code reformat into
this PR

This PR focuses on landing the smaller and immediately useful lint
alignment first, without mixing in the larger requirements-management
migration.

### Does this PR introduce _any_ user-facing change?
No.

This PR only updates repository lint configuration, static checks, and
internal import/style enforcement. It does not change runtime behavior
or public interfaces.

### How was this patch tested?
Tested locally in the project virtual environment.

Commands used:
```bash
bash format.sh
```
Verified checks passed:
``` bash
ruff check...............................................................Passed
ruff format..............................................................Passed
codespell................................................................Passed
typos....................................................................Passed
clang-format.............................................................Passed
Lint GitHub Actions workflow files.......................................Passed
Lint shell scripts.......................................................Passed
Lint PNG exports from excalidraw.........................................Passed
Check for spaces in all filenames........................................Passed
Enforce __init__.py in Python packages...................................Passed
Check for forbidden imports..............................................Passed
Check for boolean ops in with-statements.................................Passed
Suggestion...............................................................Passed
- hook id: suggestion
- duration: 0s

To bypass pre-commit hooks, add --no-verify to git commit.
```
**note:**
clang-format is enabled but currently excludes all csrc/**


- vLLM version: v0.17.0
- vLLM main:
8b6325758c

---------

Signed-off-by: MrZ20 <2609716663@qq.com>
This commit is contained in:
SILONG ZENG
2026-03-24 20:03:01 +08:00
committed by GitHub
parent d1a83a72f7
commit 1e3c1e76bf
24 changed files with 262 additions and 134 deletions

View File

@@ -1,9 +1,9 @@
# Standard
import json
import os
import re
from dataclasses import dataclass
import regex as re
import torch
# Third Party

View File

@@ -15,11 +15,11 @@
#
import json
import re
import socket
import threading
from contextlib import suppress
import regex as re
import torch
from vllm.logger import logger

View File

@@ -15,9 +15,9 @@
#
import os
import re
import socket
import regex as re
from vllm.logger import logger

View File

@@ -40,10 +40,10 @@ Row parallel op follows a similar approach - inherit from RowColumnParallelOp an
get_row_parallel_op.
"""
import re
from functools import lru_cache
from types import SimpleNamespace
import regex as re
import torch
import torch.distributed as dist
import torch.nn.functional as F

View File

@@ -18,7 +18,6 @@
#
import torch
from triton.runtime import driver # type: ignore
from vllm.triton_utils import tl, triton
@@ -269,7 +268,9 @@ def linear_persistent(x, y):
# Allocate output tensor (same data type as x)
output = torch.zeros((M, N), dtype=x.dtype, device=x.device)
grid_size = driver.active.utils.get_device_properties(torch.npu.current_device())["num_vectorcore"] // 2
grid_size = (
triton.runtime.driver.active.utils.get_device_properties(torch.npu.current_device())["num_vectorcore"] // 2
)
# Define block sizes (can be adjusted based on hardware)
BLOCK_K = 256

View File

@@ -18,7 +18,6 @@
#
import torch
from triton.runtime import driver # type: ignore
from vllm.triton_utils import tl, triton
@@ -113,7 +112,9 @@ def rms_norm(
output = torch.empty_like(input_2d, dtype=input_.dtype)
BLOCK_SIZE = 1024
max_grid_size = driver.active.utils.get_device_properties(torch.npu.current_device())["num_vectorcore"]
max_grid_size = triton.runtime.driver.active.utils.get_device_properties(torch.npu.current_device())[
"num_vectorcore"
]
grid = (min(n_rows, max_grid_size),)

View File

@@ -17,8 +17,7 @@
import torch
import triton # type: ignore
import triton.language as tl # type: ignore
from vllm.triton_utils import tl, triton
from vllm.utils.torch_utils import direct_register_custom_op
from vllm_ascend.ops.triton.triton_utils import get_vectorcore_num

View File

@@ -16,8 +16,7 @@
#
import torch
import triton # type: ignore
import triton.language as tl # type: ignore
from vllm.triton_utils import tl, triton
from vllm.utils.torch_utils import direct_register_custom_op
from vllm_ascend.ops.triton.triton_utils import extract_slice, get_element, get_vectorcore_num, insert_slice

View File

@@ -11,10 +11,9 @@ from typing import Any
import torch
import torch.nn.functional as F
import triton
import triton.language as tl
from vllm.distributed import get_pcp_group
from vllm.forward_context import get_forward_context
from vllm.triton_utils import tl, triton
from vllm.v1.attention.backends.utils import PAD_SLOT_ID # type: ignore

View File

@@ -24,11 +24,11 @@ configs generated by the ModelSlim tool, along with model-specific mappings.
import glob
import json
import os
import re
from collections.abc import Mapping
from types import MappingProxyType
from typing import Any, Optional
import regex as re
import torch
from vllm.config import get_current_vllm_config
from vllm.logger import logger

View File

@@ -23,13 +23,13 @@ import atexit
import functools
import math
import os
import re
from contextlib import nullcontext
from enum import Enum
from functools import lru_cache
from threading import Lock
from typing import TYPE_CHECKING, Any
import regex as re
import torch
import torch_npu # noqa: F401
from packaging.version import InvalidVersion, Version