[Lint]Style: Convert root, benchmarks, tools and docs to ruff format (#5843)

### What this PR does / why we need it? Description This PR fixes linting issues in the root directory, benchmarks/, tools/ and docs/ to align with the project's Ruff configuration. This is part of a gradual effort to enable full linting coverage across the repository. The corresponding paths have been removed from the exclude list in pyproject.toml. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.13.0 - vLLM main: 2f4e6548ef --------- Signed-off-by: root <root@LAPTOP-VQKDDVMG.localdomain> Co-authored-by: root <root@LAPTOP-VQKDDVMG.localdomain>
2026-01-13 15:29:34 +08:00
parent 4b679984de
commit 523e83016b
14 changed files with 425 additions and 531 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -6,31 +6,12 @@ default_stages:
  - manual # Run in CI
 exclude: 'examples/.*' # Exclude examples from all hooks by default
 repos:
 - repo: https://github.com/codespell-project/codespell
  rev: v2.4.1
  hooks:
    - id: codespell
      args: [
        --toml, pyproject.toml,
        '--skip', 'csrc/**,tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**,.github/**,typos.toml',
        '-L', 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn,ArchType,AND,ND,tbe'
      ]
      additional_dependencies:
        - tomli
 - repo: https://github.com/google/yapf
  rev: v0.43.0
  hooks:
  - id: yapf
    args: [--in-place, --verbose]
    # Keep the same list from yapfignore here to avoid yapf failing without any inputs
    exclude: '(.github|benchmarks|examples|docs)/.*'
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.11.7
+  rev: v0.14.0
  hooks:
-  - id: ruff
+  - id: ruff-check
    args: [--output-format, github, --fix]
  - id: ruff-format
    files: ^(benchmarks|examples)/.*
 - repo: https://github.com/crate-ci/typos
  rev: v1.32.0
  hooks:
@@ -39,10 +20,6 @@ repos:
      "--force-exclude",
      "--exclude", "csrc/**"
    ]
 - repo: https://github.com/PyCQA/isort
  rev: 6.0.1
  hooks:
  - id: isort
 # - repo: https://github.com/pre-commit/mirrors-clang-format
 #   rev: v20.1.3
 #   hooks:
--- a/benchmarks/ops/ben_vocabparallelembedding.py
+++ b/benchmarks/ops/ben_vocabparallelembedding.py
@@ -1,5 +1,3 @@
 from typing import Tuple
 import numpy as np
 import pytest
 import torch
@@ -47,20 +45,12 @@ def get_masked_input_and_mask_ref(
    num_org_vocab_padding: int,
    added_vocab_start_index: int,
    added_vocab_end_index: int,
-) -> Tuple[torch.Tensor, torch.Tensor]:
+) -> tuple[torch.Tensor, torch.Tensor]:
    """Reference implementation for verification"""
    org_vocab_mask = (input_ >= org_vocab_start_index) & (input_ < org_vocab_end_index)
-    added_vocab_mask = (input_ >= added_vocab_start_index) & (
+    added_vocab_mask = (input_ >= added_vocab_start_index) & (input_ < added_vocab_end_index)
-        input_ < added_vocab_end_index
+    added_offset = added_vocab_start_index - (org_vocab_end_index - org_vocab_start_index) - num_org_vocab_padding
-    )
+    valid_offset = (org_vocab_start_index * org_vocab_mask) + (added_offset * added_vocab_mask)
    added_offset = (
        added_vocab_start_index
        - (org_vocab_end_index - org_vocab_start_index)
        - num_org_vocab_padding
    )
    valid_offset = (org_vocab_start_index * org_vocab_mask) + (
        added_offset * added_vocab_mask
    )
    vocab_mask = org_vocab_mask | added_vocab_mask
    masked_input = vocab_mask * (input_ - valid_offset)
    return masked_input, ~vocab_mask
@@ -78,7 +68,7 @@ SEEDS = [0]
@pytest.mark.parametrize("seed", SEEDS)
@torch.inference_mode()
 def test_get_masked_input_and_mask(
-    shape: Tuple[int, ...],
+    shape: tuple[int, ...],
    dtype: torch.dtype,
    device: str,
    seed: int,
--- a/benchmarks/scripts/convert_json_to_markdown.py
+++ b/benchmarks/scripts/convert_json_to_markdown.py
@@ -59,9 +59,7 @@ def results_to_json(latency, throughput, serving):
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
+    parser = argparse.ArgumentParser(description="Process the results of the benchmark tests.")
        description="Process the results of the benchmark tests."
    )
    parser.add_argument(
        "--results_folder",
        type=str,
@@ -80,12 +78,8 @@ if __name__ == "__main__":
        default="./perf_result_template.md",
        help="The template file for the markdown report.",
    )
-    parser.add_argument(
+    parser.add_argument("--tag", default="main", help="Tag to be used for release message.")
-        "--tag", default="main", help="Tag to be used for release message."
+    parser.add_argument("--commit_id", default="", help="Commit ID to be used for release message.")
    )
    parser.add_argument(
        "--commit_id", default="", help="Commit ID to be used for release message."
    )
    args = parser.parse_args()
    results_folder = (CUR_PATH / args.results_folder).resolve()
@@ -116,9 +110,7 @@ if __name__ == "__main__":
            # get different percentiles
            for perc in [10, 25, 50, 75, 90, 99]:
                # Multiply 1000 to convert the time unit from s to ms
-                raw_result.update(
+                raw_result.update({f"P{perc}": 1000 * raw_result["percentiles"][str(perc)]})
                    {f"P{perc}": 1000 * raw_result["percentiles"][str(perc)]}
                )
            raw_result["avg_latency"] = raw_result["avg_latency"] * 1000
            # add the result to raw_result
@@ -142,38 +134,24 @@ if __name__ == "__main__":
    serving_results = pd.DataFrame.from_dict(serving_results)
    throughput_results = pd.DataFrame.from_dict(throughput_results)
-    raw_results_json = results_to_json(
+    raw_results_json = results_to_json(latency_results, throughput_results, serving_results)
        latency_results, throughput_results, serving_results
    )
    # remapping the key, for visualization purpose
    if not latency_results.empty:
-        latency_results = latency_results[list(latency_column_mapping.keys())].rename(
+        latency_results = latency_results[list(latency_column_mapping.keys())].rename(columns=latency_column_mapping)
            columns=latency_column_mapping
        )
    if not serving_results.empty:
-        serving_results = serving_results[list(serving_column_mapping.keys())].rename(
+        serving_results = serving_results[list(serving_column_mapping.keys())].rename(columns=serving_column_mapping)
            columns=serving_column_mapping
        )
    if not throughput_results.empty:
-        throughput_results = throughput_results[
+        throughput_results = throughput_results[list(throughput_results_column_mapping.keys())].rename(
-            list(throughput_results_column_mapping.keys())
+            columns=throughput_results_column_mapping
-        ].rename(columns=throughput_results_column_mapping)
+        )
-    processed_results_json = results_to_json(
+    processed_results_json = results_to_json(latency_results, throughput_results, serving_results)
        latency_results, throughput_results, serving_results
    )
    # get markdown tables
-    latency_md_table = tabulate(
+    latency_md_table = tabulate(latency_results, headers="keys", tablefmt="pipe", showindex=False)
-        latency_results, headers="keys", tablefmt="pipe", showindex=False
+    serving_md_table = tabulate(serving_results, headers="keys", tablefmt="pipe", showindex=False)
-    )
+    throughput_md_table = tabulate(throughput_results, headers="keys", tablefmt="pipe", showindex=False)
    serving_md_table = tabulate(
        serving_results, headers="keys", tablefmt="pipe", showindex=False
    )
    throughput_md_table = tabulate(
        throughput_results, headers="keys", tablefmt="pipe", showindex=False
    )
    # document the result
    print(output_folder)
--- a/collect_env.py
+++ b/collect_env.py
@@ -27,33 +27,35 @@ from vllm.envs import environment_variables
 try:
    import torch
    TORCH_AVAILABLE = True
 except (ImportError, NameError, AttributeError, OSError):
    TORCH_AVAILABLE = False
 # System Environment Information
 SystemEnv = namedtuple(
-    'SystemEnv',
+    "SystemEnv",
    [
-        'torch_version',
+        "torch_version",
-        'is_debug_build',
+        "is_debug_build",
-        'gcc_version',
+        "gcc_version",
-        'clang_version',
+        "clang_version",
-        'cmake_version',
+        "cmake_version",
-        'os',
+        "os",
-        'libc_version',
+        "libc_version",
-        'python_version',
+        "python_version",
-        'python_platform',
+        "python_platform",
-        'pip_version',  # 'pip' or 'pip3'
+        "pip_version",  # 'pip' or 'pip3'
-        'pip_packages',
+        "pip_packages",
-        'conda_packages',
+        "conda_packages",
-        'cpu_info',
+        "cpu_info",
-        'vllm_version',  # vllm specific field
+        "vllm_version",  # vllm specific field
-        'vllm_ascend_version',  # vllm ascend specific field
+        "vllm_ascend_version",  # vllm ascend specific field
-        'env_vars',
+        "env_vars",
-        'npu_info',  # ascend specific field
+        "npu_info",  # ascend specific field
-        'cann_info',  # ascend specific field
+        "cann_info",  # ascend specific field
-    ])
+    ],
 )
 DEFAULT_CONDA_PATTERNS = {
    "torch",
@@ -82,15 +84,12 @@ DEFAULT_PIP_PATTERNS = {
 def run(command):
    """Return (return-code, stdout, stderr)."""
-    shell = True if type(command) is str else False
+    shell = isinstance(command, str)
-    p = subprocess.Popen(command,
+    p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell)
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE,
                         shell=shell)
    raw_output, raw_err = p.communicate()
    rc = p.returncode
-    if get_platform() == 'win32':
+    if get_platform() == "win32":
-        enc = 'oem'
+        enc = "oem"
    else:
        enc = locale.getpreferredencoding()
    output = raw_output.decode(enc)
@@ -122,42 +121,40 @@ def run_and_return_first_line(run_lambda, command):
    rc, out, _ = run_lambda(command)
    if rc != 0:
        return None
-    return out.split('\n')[0]
+    return out.split("\n")[0]
 def get_conda_packages(run_lambda, patterns=None):
    if patterns is None:
        patterns = DEFAULT_CONDA_PATTERNS
-    conda = os.environ.get('CONDA_EXE', 'conda')
+    conda = os.environ.get("CONDA_EXE", "conda")
    out = run_and_read_all(run_lambda, "{} list".format(conda))
    if out is None:
        return out
-    return "\n".join(line for line in out.splitlines()
+    return "\n".join(
-                     if not line.startswith("#") and any(name in line
+        line for line in out.splitlines() if not line.startswith("#") and any(name in line for name in patterns)
-                                                         for name in patterns))
+    )
 def get_gcc_version(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)')
+    return run_and_parse_first_match(run_lambda, "gcc --version", r"gcc (.*)")
 def get_clang_version(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'clang --version',
+    return run_and_parse_first_match(run_lambda, "clang --version", r"clang version (.*)")
                                     r'clang version (.*)')
 def get_cmake_version(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'cmake --version',
+    return run_and_parse_first_match(run_lambda, "cmake --version", r"cmake (.*)")
                                     r'cmake (.*)')
 def _parse_version(version, version_tuple):
    version_str = version_tuple[-1]
-    if isinstance(version_str, str) and version_str.startswith('g'):
+    if isinstance(version_str, str) and version_str.startswith("g"):
-        if '.' in version_str:
+        if "." in version_str:
-            git_sha = version_str.split('.')[0][1:]
+            git_sha = version_str.split(".")[0][1:]
-            date = version_str.split('.')[-1][1:]
+            date = version_str.split(".")[-1][1:]
            return f"{version} (git sha: {git_sha}, date: {date})"
        else:
            git_sha = version_str[1:]  # type: ignore
@@ -167,26 +164,28 @@ def _parse_version(version, version_tuple):
 def get_vllm_version():
    from vllm import __version__, __version_tuple__
    return _parse_version(__version__, __version_tuple__)
 def get_vllm_ascend_version():
    from vllm_ascend._version import __version__, __version_tuple__
    return _parse_version(__version__, __version_tuple__)
 def get_cpu_info(run_lambda):
-    rc, out, err = 0, '', ''
+    rc, out, err = 0, "", ""
-    if get_platform() == 'linux':
+    if get_platform() == "linux":
-        rc, out, err = run_lambda('lscpu')
+        rc, out, err = run_lambda("lscpu")
-    elif get_platform() == 'win32':
+    elif get_platform() == "win32":
        rc, out, err = run_lambda(
-            'wmic cpu get Name,Manufacturer,Family,Architecture,ProcessorType,DeviceID, \
+            "wmic cpu get Name,Manufacturer,Family,Architecture,ProcessorType,DeviceID, \
-        CurrentClockSpeed,MaxClockSpeed,L2CacheSize,L2CacheSpeed,Revision /VALUE'
+        CurrentClockSpeed,MaxClockSpeed,L2CacheSize,L2CacheSpeed,Revision /VALUE"
        )
-    elif get_platform() == 'darwin':
+    elif get_platform() == "darwin":
        rc, out, err = run_lambda("sysctl -n machdep.cpu.brand_string")
-    cpu_info = 'None'
+    cpu_info = "None"
    if rc == 0:
        cpu_info = out
    else:
@@ -195,67 +194,63 @@ def get_cpu_info(run_lambda):
 def get_platform():
-    if sys.platform.startswith('linux'):
+    if sys.platform.startswith("linux"):
-        return 'linux'
+        return "linux"
-    elif sys.platform.startswith('win32'):
+    elif sys.platform.startswith("win32"):
-        return 'win32'
+        return "win32"
-    elif sys.platform.startswith('cygwin'):
+    elif sys.platform.startswith("cygwin"):
-        return 'cygwin'
+        return "cygwin"
-    elif sys.platform.startswith('darwin'):
+    elif sys.platform.startswith("darwin"):
-        return 'darwin'
+        return "darwin"
    else:
        return sys.platform
 def get_mac_version(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion',
+    return run_and_parse_first_match(run_lambda, "sw_vers -productVersion", r"(.*)")
                                     r'(.*)')
 def get_windows_version(run_lambda):
-    system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
+    system_root = os.environ.get("SYSTEMROOT", "C:\\Windows")
-    wmic_cmd = os.path.join(system_root, 'System32', 'Wbem', 'wmic')
+    wmic_cmd = os.path.join(system_root, "System32", "Wbem", "wmic")
-    findstr_cmd = os.path.join(system_root, 'System32', 'findstr')
+    findstr_cmd = os.path.join(system_root, "System32", "findstr")
-    return run_and_read_all(
+    return run_and_read_all(run_lambda, "{} os get Caption | {} /v Caption".format(wmic_cmd, findstr_cmd))
        run_lambda,
        '{} os get Caption | {} /v Caption'.format(wmic_cmd, findstr_cmd))
 def get_lsb_version(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'lsb_release -a',
+    return run_and_parse_first_match(run_lambda, "lsb_release -a", r"Description:\t(.*)")
                                     r'Description:\t(.*)')
 def check_release_file(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'cat /etc/*-release',
+    return run_and_parse_first_match(run_lambda, "cat /etc/*-release", r'PRETTY_NAME="(.*)"')
                                     r'PRETTY_NAME="(.*)"')
 def get_os(run_lambda):
    from platform import machine
    platform = get_platform()
-    if platform == 'win32' or platform == 'cygwin':
+    if platform == "win32" or platform == "cygwin":
        return get_windows_version(run_lambda)
-    if platform == 'darwin':
+    if platform == "darwin":
        version = get_mac_version(run_lambda)
        if version is None:
            return None
-        return 'macOS {} ({})'.format(version, machine())
+        return "macOS {} ({})".format(version, machine())
-    if platform == 'linux':
+    if platform == "linux":
        # Ubuntu/Debian based
        desc = get_lsb_version(run_lambda)
        if desc is not None:
-            return '{} ({})'.format(desc, machine())
+            return "{} ({})".format(desc, machine())
        # Try reading /etc/*-release
        desc = check_release_file(run_lambda)
        if desc is not None:
-            return '{} ({})'.format(desc, machine())
+            return "{} ({})".format(desc, machine())
-        return '{} ({})'.format(platform, machine())
+        return "{} ({})".format(platform, machine())
    # Unknown platform
    return platform
@@ -263,14 +258,16 @@ def get_os(run_lambda):
 def get_python_platform():
    import platform
    return platform.platform()
 def get_libc_version():
    import platform
-    if get_platform() != 'linux':
+
-        return 'N/A'
+    if get_platform() != "linux":
-    return '-'.join(platform.libc_ver())
+        return "N/A"
    return "-".join(platform.libc_ver())
 def get_pip_packages(run_lambda, patterns=None):
@@ -282,31 +279,29 @@ def get_pip_packages(run_lambda, patterns=None):
    # But here it is invoked as `python -mpip`
    def run_with_pip(pip):
        out = run_and_read_all(run_lambda, pip + ["list", "--format=freeze"])
-        return "\n".join(line for line in out.splitlines()
+        return "\n".join(line for line in out.splitlines() if any(name in line for name in patterns))
                         if any(name in line for name in patterns))
-    pip_version = 'pip3' if sys.version[0] == '3' else 'pip'
+    pip_version = "pip3" if sys.version[0] == "3" else "pip"
-    out = run_with_pip([sys.executable, '-mpip'])
+    out = run_with_pip([sys.executable, "-mpip"])
    return pip_version, out
 def get_npu_info(run_lambda):
-    return run_and_read_all(run_lambda, 'npu-smi info')
+    return run_and_read_all(run_lambda, "npu-smi info")
 def get_cann_info(run_lambda):
-    out = run_and_read_all(run_lambda, 'lscpu | grep Architecture:')
+    out = run_and_read_all(run_lambda, "lscpu | grep Architecture:")
    cpu_arch = str(out).split()[-1]
    return run_and_read_all(
-        run_lambda,
+        run_lambda, "cat /usr/local/Ascend/ascend-toolkit/latest/{}-linux/ascend_toolkit_install.info".format(cpu_arch)
-        'cat /usr/local/Ascend/ascend-toolkit/latest/{}-linux/ascend_toolkit_install.info'
+    )
        .format(cpu_arch))
 def get_env_vars():
-    env_vars = ''
+    env_vars = ""
-    secret_terms = ('secret', 'token', 'api', 'access', 'password')
+    secret_terms = ("secret", "token", "api", "access", "password")
    report_prefix = ("TORCH", "PYTORCH", "ASCEND_", "ATB_")
    for k, v in os.environ.items():
        if any(term in k.lower() for term in secret_terms):
@@ -327,7 +322,7 @@ def get_env_info():
        version_str = torch.__version__
        debug_mode_str = str(torch.version.debug)
    else:
-        version_str = debug_mode_str = 'N/A'
+        version_str = debug_mode_str = "N/A"
    sys_version = sys.version.replace("\n", " ")
@@ -336,9 +331,7 @@ def get_env_info():
    return SystemEnv(
        torch_version=version_str,
        is_debug_build=debug_mode_str,
-        python_version='{} ({}-bit runtime)'.format(
+        python_version="{} ({}-bit runtime)".format(sys_version, sys.maxsize.bit_length() + 1),
            sys_version,
            sys.maxsize.bit_length() + 1),
        python_platform=get_python_platform(),
        pip_version=pip_version,
        pip_packages=pip_list_output,
@@ -399,36 +392,35 @@ CANN:
 def pretty_str(envinfo):
-
+    def replace_nones(dct, replacement="Could not collect"):
-    def replace_nones(dct, replacement='Could not collect'):
+        for key in dct:
        for key in dct.keys():
            if dct[key] is not None:
                continue
            dct[key] = replacement
        return dct
-    def replace_bools(dct, true='Yes', false='No'):
+    def replace_bools(dct, true="Yes", false="No"):
-        for key in dct.keys():
+        for key in dct:
            if dct[key] is True:
                dct[key] = true
            elif dct[key] is False:
                dct[key] = false
        return dct
-    def prepend(text, tag='[prepend]'):
+    def prepend(text, tag="[prepend]"):
-        lines = text.split('\n')
+        lines = text.split("\n")
        updated_lines = [tag + line for line in lines]
-        return '\n'.join(updated_lines)
+        return "\n".join(updated_lines)
-    def replace_if_empty(text, replacement='No relevant packages'):
+    def replace_if_empty(text, replacement="No relevant packages"):
        if text is not None and len(text) == 0:
            return replacement
        return text
    def maybe_start_on_next_line(string):
        # If `string` is multiline, prepend a \n to it.
-        if string is not None and len(string.split('\n')) > 1:
+        if string is not None and len(string.split("\n")) > 1:
-            return '\n{}\n'.format(string)
+            return "\n{}\n".format(string)
        return string
    mutable_dict = envinfo._asdict()
@@ -440,22 +432,18 @@ def pretty_str(envinfo):
    mutable_dict = replace_nones(mutable_dict)
    # If either of these are '', replace with 'No relevant packages'
-    mutable_dict['pip_packages'] = replace_if_empty(
+    mutable_dict["pip_packages"] = replace_if_empty(mutable_dict["pip_packages"])
-        mutable_dict['pip_packages'])
+    mutable_dict["conda_packages"] = replace_if_empty(mutable_dict["conda_packages"])
    mutable_dict['conda_packages'] = replace_if_empty(
        mutable_dict['conda_packages'])
    # Tag conda and pip packages with a prefix
    # If they were previously None, they'll show up as ie '[conda] Could not collect'
-    if mutable_dict['pip_packages']:
+    if mutable_dict["pip_packages"]:
-        mutable_dict['pip_packages'] = prepend(
+        mutable_dict["pip_packages"] = prepend(mutable_dict["pip_packages"], "[{}] ".format(envinfo.pip_version))
-            mutable_dict['pip_packages'], '[{}] '.format(envinfo.pip_version))
+    if mutable_dict["conda_packages"]:
-    if mutable_dict['conda_packages']:
+        mutable_dict["conda_packages"] = prepend(mutable_dict["conda_packages"], "[conda] ")
-        mutable_dict['conda_packages'] = prepend(
+    mutable_dict["cpu_info"] = envinfo.cpu_info
-            mutable_dict['conda_packages'], '[conda] ')
+    mutable_dict["npu_info"] = envinfo.npu_info
-    mutable_dict['cpu_info'] = envinfo.cpu_info
+    mutable_dict["cann_info"] = envinfo.cann_info
    mutable_dict['npu_info'] = envinfo.npu_info
    mutable_dict['cann_info'] = envinfo.cann_info
    return env_info_fmt.format(**mutable_dict)
@@ -468,22 +456,19 @@ def main():
    output = get_pretty_env_info()
    print(output)
-    if TORCH_AVAILABLE and hasattr(torch, 'utils') and hasattr(
+    if TORCH_AVAILABLE and hasattr(torch, "utils") and hasattr(torch.utils, "_crash_handler"):
            torch.utils, '_crash_handler'):
        minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR
        if sys.platform == "linux" and os.path.exists(minidump_dir):
-            dumps = [
+            dumps = [os.path.join(minidump_dir, dump) for dump in os.listdir(minidump_dir)]
                os.path.join(minidump_dir, dump)
                for dump in os.listdir(minidump_dir)
            ]
            latest = max(dumps, key=os.path.getctime)
            ctime = os.path.getctime(latest)
-            creation_time = datetime.datetime.fromtimestamp(ctime).strftime(
+            creation_time = datetime.datetime.fromtimestamp(ctime).strftime("%Y-%m-%d %H:%M:%S")
-                '%Y-%m-%d %H:%M:%S')
+            msg = (
-            msg = "\n*** Detected a minidump at {} created on {}, ".format(latest, creation_time) + \
+                "\n*** Detected a minidump at {} created on {}, ".format(latest, creation_time)
-                  "if this is related to your bug please include it when you file a report ***"
+                + "if this is related to your bug please include it when you file a report ***"
            )
            print(msg, file=sys.stderr)
-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -31,12 +31,12 @@ import os
 # -- Project information -----------------------------------------------------
-project = 'vllm-ascend'
+project = "vllm-ascend"
-copyright = '2025, vllm-ascend team'
+copyright = "2025, vllm-ascend team"
-author = 'the vllm-ascend team'
+author = "the vllm-ascend team"
 # The full version, including alpha/beta/rc tags
-release = ''
+release = ""
 # -- General configuration ---------------------------------------------------
@@ -65,46 +65,46 @@ myst_substitutions = {
    # the branch of vllm, used in vllm clone
    # - main branch: 'main'
    # - vX.Y.Z branch: 'vX.Y.Z'
-    'vllm_version': 'v0.13.0',
+    "vllm_version": "v0.13.0",
    # the branch of vllm-ascend, used in vllm-ascend clone and image tag
    # - main branch: 'main'
    # - vX.Y.Z branch: latest vllm-ascend release tag
-    'vllm_ascend_version': 'v0.13.0rc1',
+    "vllm_ascend_version": "v0.13.0rc1",
    # the newest release version of vllm-ascend and matched vLLM, used in pip install.
    # This value should be updated when cut down release.
-    'pip_vllm_ascend_version': "0.13.0rc1",
+    "pip_vllm_ascend_version": "0.13.0rc1",
-    'pip_vllm_version': "0.13.0",
+    "pip_vllm_version": "0.13.0",
    # CANN image tag
-    'cann_image_tag': "8.3.rc2-910b-ubuntu22.04-py3.11",
+    "cann_image_tag": "8.3.rc2-910b-ubuntu22.04-py3.11",
    # vllm version in ci
-    'ci_vllm_version': 'v0.13.0',
+    "ci_vllm_version": "v0.13.0",
 }
 # For cross-file header anchors
 myst_heading_anchors = 5
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
 #
 # This is also used if you do content translation via gettext catalogs.
 # Usually you set "language" from the command line for these cases.
-locale_dirs = ['locale/']   
+locale_dirs = ["locale/"]
 gettext_compact = False
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
 exclude_patterns = [
-    '_build',
+    "_build",
-    'Thumbs.db',
+    "Thumbs.db",
-    '.DS_Store',
+    ".DS_Store",
-    '.venv',
+    ".venv",
-    'README.md',
+    "README.md",
-    'user_guide/release.template.md',
+    "user_guide/release.template.md",
    # TODO(yikun): Remove this after zh supported
-    '**/*.zh.md'
+    "**/*.zh.md",
 ]
 # -- Options for HTML output -------------------------------------------------
@@ -113,24 +113,23 @@ exclude_patterns = [
 # a list of builtin themes.
 #
 html_title = project
-html_theme = 'sphinx_book_theme'
+html_theme = "sphinx_book_theme"
-html_logo = 'logos/vllm-ascend-logo-text-light.png'
+html_logo = "logos/vllm-ascend-logo-text-light.png"
 html_theme_options = {
-    'path_to_docs': 'docs/source',
+    "path_to_docs": "docs/source",
-    'repository_url': 'https://github.com/vllm-project/vllm-ascend',
+    "repository_url": "https://github.com/vllm-project/vllm-ascend",
-    'use_repository_button': True,
+    "use_repository_button": True,
-    'use_edit_page_button': True,
+    "use_edit_page_button": True,
 }
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 # html_static_path = ['_static']
-READTHEDOCS_VERSION_TYPE = os.environ.get('READTHEDOCS_VERSION_TYPE')
+READTHEDOCS_VERSION_TYPE = os.environ.get("READTHEDOCS_VERSION_TYPE")
 if READTHEDOCS_VERSION_TYPE == "tag":
    # remove the warning banner if the version is a tagged release
-    header_file = os.path.join(os.path.dirname(__file__),
+    header_file = os.path.join(os.path.dirname(__file__), "_templates/sections/header.html")
                               "_templates/sections/header.html")
    # The file might be removed already if the build is triggered multiple times
    # (readthedocs build both HTML and PDF versions separately)
    if os.path.exists(header_file):
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,3 +42,51 @@ plugins.md033.enabled = false # inline-html
 plugins.md046.enabled = false # code-block-style
 plugins.md024.allow_different_nesting = true # no-duplicate-headers
 plugins.md029.enabled = false # ol-prefix
 [tool.ruff]
 # TODO: according to PEP8, there should be 80 characters per line
 line-length = 120
 # Folder to be modified
 exclude = [
    "examples/**",
    "tests/**",
    "vllm_ascend/**",
 ]
 [tool.ruff.lint]
 select = [
    # pycodestyle
    "E",
    # Pyflakes
    "F",
    # pyupgrade
    "UP",
    # flake8-bugbear
    "B",
    # flake8-simplify
    "SIM",
    # isort
    "I",
    # flake8-logging-format
    "G",
 ]
 ignore = [
    # star imports
    "F405", "F403",
    # lambda expression assignment
    "E731",
    # zip without `strict=`
    "B905",
    # Loop control variable not used within loop body
    "B007",
    # f-string format
    "UP032",
    # TODO: FIE ME
    "G004",
    "B904",
    "SIM108",
    "SIM102"
 ]
 [tool.ruff.format]
 docstring-code-format = true
--- a/setup.py
+++ b/setup.py
@@ -23,7 +23,6 @@ import os
 import subprocess
 import sys
 from sysconfig import get_paths
 from typing import Dict, List
 from setuptools import Command, Extension, find_packages, setup
 from setuptools.command.build_ext import build_ext
@@ -45,15 +44,13 @@ ROOT_DIR = os.path.dirname(__file__)
 logger = logging.getLogger(__name__)
-def check_or_set_default_env(cmake_args,
+def check_or_set_default_env(cmake_args, env_name, env_variable, default_path=""):
                             env_name,
                             env_variable,
                             default_path=""):
    if env_variable is None:
        logging.warning(
            f"No {env_name} found in your environment, pleause try to set {env_name} "
            "if you customize the installation path of this library, otherwise default "
-            "path will be adapted during build this project")
+            "path will be adapted during build this project"
        )
        logging.warning(f"Set default {env_name}: {default_path}")
        env_variable = default_path
    else:
@@ -65,25 +62,27 @@ def check_or_set_default_env(cmake_args,
    return cmake_args
-def get_value_from_lines(lines: List[str], key: str) -> str:
+def get_value_from_lines(lines: list[str], key: str) -> str:
    for line in lines:
-        line = ' '.join(line.split())
+        line = " ".join(line.split())
        if key in line:
-            return line.split(':')[-1].strip()
+            return line.split(":")[-1].strip()
    return ""
 def get_chip_type() -> str:
    try:
-        npu_info_lines = subprocess.check_output(
+        npu_info_lines = subprocess.check_output(["npu-smi", "info", "-l"]).decode().strip().split("\n")
-            ['npu-smi', 'info', '-l']).decode().strip().split('\n')
+        npu_id = int(get_value_from_lines(npu_info_lines, "NPU ID"))
-        npu_id = int(get_value_from_lines(npu_info_lines, 'NPU ID'))
+        chip_info_lines = (
-        chip_info_lines = subprocess.check_output(
+            subprocess.check_output(["npu-smi", "info", "-t", "board", "-i", str(npu_id), "-c", "0"])
-            ['npu-smi', 'info', '-t', 'board', '-i',
+            .decode()
-             str(npu_id), '-c', '0']).decode().strip().split('\n')
+            .strip()
-        chip_name = get_value_from_lines(chip_info_lines, 'Chip Name')
+            .split("\n")
-        chip_type = get_value_from_lines(chip_info_lines, 'Chip Type')
+        )
-        npu_name = get_value_from_lines(chip_info_lines, 'NPU Name')
+        chip_name = get_value_from_lines(chip_info_lines, "Chip Name")
        chip_type = get_value_from_lines(chip_info_lines, "Chip Type")
        npu_name = get_value_from_lines(chip_info_lines, "NPU Name")
        if "310" in chip_name:
            # 310P case
@@ -97,12 +96,10 @@ def get_chip_type() -> str:
            else:
                # A3 case
                assert npu_name
-                return (chip_name + '_' + npu_name).lower()
+                return (chip_name + "_" + npu_name).lower()
        else:
            # TODO(zzzzwwjj): Currently, A5's chip name has not determined yet.
-            raise ValueError(
+            raise ValueError(f"Unable to recognize chip name: {chip_name}, please manually set env SOC_VERSION")
                f"Unable to recognize chip name: {chip_name}, please manually set env SOC_VERSION"
            )
    except subprocess.CalledProcessError as e:
        raise RuntimeError(f"Get chip info failed: {e}")
    except FileNotFoundError:
@@ -112,8 +109,7 @@ def get_chip_type() -> str:
        return ""
-envs = load_module_from_path("envs",
+envs = load_module_from_path("envs", os.path.join(ROOT_DIR, "vllm_ascend", "envs.py"))
                             os.path.join(ROOT_DIR, "vllm_ascend", "envs.py"))
 soc_version = get_chip_type()
@@ -126,10 +122,8 @@ if not envs.SOC_VERSION:
        )
    envs.SOC_VERSION = soc_version
 else:
-    if soc_version and envs.SOC_VERSION != soc_version:
+    if soc_version and soc_version != envs.SOC_VERSION:
-        logging.warning(
+        logging.warning(f"env SOC_VERSION: {envs.SOC_VERSION} is not equal to soc_version from npu-smi: {soc_version}")
            f"env SOC_VERSION: {envs.SOC_VERSION} is not equal to soc_version from npu-smi: {soc_version}"
        )
 def gen_build_info():
@@ -167,30 +161,24 @@ def gen_build_info():
    package_dir = os.path.join(ROOT_DIR, "vllm_ascend", "_build_info.py")
    with open(package_dir, "w+") as f:
-        f.write('# Auto-generated file\n')
+        f.write("# Auto-generated file\n")
        f.write(f"__device_type__ = '{device_type}'\n")
    logging.info(f"Generated _build_info.py with SOC version: {soc_version}")
 class CMakeExtension(Extension):
-
+    def __init__(self, name: str, cmake_lists_dir: str = ".", **kwargs) -> None:
    def __init__(self,
                 name: str,
                 cmake_lists_dir: str = ".",
                 **kwargs) -> None:
        super().__init__(name, sources=[], py_limited_api=False, **kwargs)
        self.cmake_lists_dir = os.path.abspath(cmake_lists_dir)
 class custom_develop(develop):
    def run(self):
        gen_build_info()
        super().run()
 class custom_build_info(build_py):
    def run(self):
        gen_build_info()
        super().run()
@@ -209,8 +197,7 @@ class build_and_install_aclnn(Command):
    def run(self):
        try:
            print("Running bash build_aclnn.sh ...")
-            subprocess.check_call(
+            subprocess.check_call(["bash", "csrc/build_aclnn.sh", ROOT_DIR, envs.SOC_VERSION])
                ["bash", "csrc/build_aclnn.sh", ROOT_DIR, envs.SOC_VERSION])
            print("buid_aclnn.sh executed successfully!")
        except subprocess.CalledProcessError as e:
            print(f"Error running build_aclnn.sh: {e}")
@@ -219,7 +206,7 @@ class build_and_install_aclnn(Command):
 class cmake_build_ext(build_ext):
    # A dict of extension directories that have been configured.
-    did_config: Dict[str, bool] = {}
+    did_config: dict[str, bool] = {}
    #
    # Determine number of compilation jobs
@@ -254,9 +241,9 @@ class cmake_build_ext(build_ext):
        # Default use release mode to compile the csrc code
        # Turbo now support compiled with Release, Debug and RelWithDebugInfo
        if envs.CMAKE_BUILD_TYPE is None or envs.CMAKE_BUILD_TYPE not in [
-                "Debug",
+            "Debug",
-                "Release",
+            "Release",
-                "RelWithDebugInfo",
+            "RelWithDebugInfo",
        ]:
            envs.CMAKE_BUILD_TYPE = "Release"
        cmake_args += [f"-DCMAKE_BUILD_TYPE={envs.CMAKE_BUILD_TYPE}"]
@@ -278,20 +265,18 @@ class cmake_build_ext(build_ext):
        )
        # find PYTHON_EXECUTABLE
-        check_or_set_default_env(cmake_args, "PYTHON_EXECUTABLE",
+        check_or_set_default_env(cmake_args, "PYTHON_EXECUTABLE", sys.executable)
                                 sys.executable)
        # find PYTHON_INCLUDE_PATH
-        check_or_set_default_env(cmake_args, "PYTHON_INCLUDE_PATH",
+        check_or_set_default_env(cmake_args, "PYTHON_INCLUDE_PATH", get_paths()["include"])
                                 get_paths()["include"])
        # ccache and ninja can not be applied at ascendc kernels now
        try:
            # if pybind11 is installed via pip
-            pybind11_cmake_path = (subprocess.check_output(
+            pybind11_cmake_path = (
-                [python_executable, "-m", "pybind11",
+                subprocess.check_output([python_executable, "-m", "pybind11", "--cmakedir"]).decode().strip()
-                 "--cmakedir"]).decode().strip())
+            )
        except subprocess.CalledProcessError as e:
            # else specify pybind11 path installed from source code on CI container
            raise RuntimeError(f"CMake configuration failed: {e}")
@@ -309,8 +294,7 @@ class cmake_build_ext(build_ext):
            "910c": "ascend910_9392",
            "310p": "ascend310p1",
        }
-        CANN_SOC_VERSION = soc_version_map.get(envs.SOC_VERSION,
+        CANN_SOC_VERSION = soc_version_map.get(envs.SOC_VERSION, envs.SOC_VERSION)
                                               envs.SOC_VERSION)
        cmake_args += [f"-DSOC_VERSION={CANN_SOC_VERSION}"]
        # Override the base directory for FetchContent downloads to $ROOT/.deps
@@ -323,8 +307,7 @@ class cmake_build_ext(build_ext):
        torch_npu_command = "python3 -m pip show torch-npu | grep '^Location:' | awk '{print $2}'"
        try:
-            torch_npu_path = subprocess.check_output(
+            torch_npu_path = subprocess.check_output(torch_npu_command, shell=True).decode().strip()
                torch_npu_command, shell=True).decode().strip()
            torch_npu_path += "/torch_npu"
        except subprocess.CalledProcessError as e:
            raise RuntimeError(f"Retrieve torch version version failed: {e}")
@@ -399,22 +382,21 @@ class cmake_build_ext(build_ext):
        # copy back to build folder for editable build
        if isinstance(self.distribution.get_command_obj("develop"), develop):
            import shutil
            for root, _, files in os.walk(self.build_temp):
                for file in files:
                    if file.endswith(".so"):
                        src_path = os.path.join(root, file)
-                        dst_path = os.path.join(self.build_lib, "vllm_ascend",
+                        dst_path = os.path.join(self.build_lib, "vllm_ascend", file)
                                                file)
                        shutil.copy(src_path, dst_path)
                        print(f"Copy: {src_path} -> {dst_path}")
        # copy back _cann_ops_custom directory
-        src_cann_ops_custom = os.path.join(ROOT_DIR, "vllm_ascend",
+        src_cann_ops_custom = os.path.join(ROOT_DIR, "vllm_ascend", "_cann_ops_custom")
-                                           "_cann_ops_custom")
+        dst_cann_ops_custom = os.path.join(self.build_lib, "vllm_ascend", "_cann_ops_custom")
        dst_cann_ops_custom = os.path.join(self.build_lib, "vllm_ascend",
                                           "_cann_ops_custom")
        if os.path.exists(src_cann_ops_custom):
            import shutil
            if os.path.exists(dst_cann_ops_custom):
                shutil.rmtree(dst_cann_ops_custom)
            shutil.copytree(src_cann_ops_custom, dst_cann_ops_custom)
@@ -428,7 +410,6 @@ class cmake_build_ext(build_ext):
 class custom_install(install):
    def run(self):
        self.run_command("build_ext")
        install.run(self)
@@ -459,10 +440,10 @@ def read_readme() -> str:
        return ""
-def get_requirements() -> List[str]:
+def get_requirements() -> list[str]:
    """Get Python package dependencies from requirements.txt."""
-    def _read_requirements(filename: str) -> List[str]:
+    def _read_requirements(filename: str) -> list[str]:
        with open(get_path(filename)) as f:
            requirements = f.read().strip().split("\n")
        resolved_requirements = []
@@ -487,7 +468,7 @@ cmdclass = {
    "build_py": custom_build_info,
    "build_aclnn": build_and_install_aclnn,
    "build_ext": cmake_build_ext,
-    "install": custom_install
+    "install": custom_install,
 }
 setup(
@@ -526,7 +507,7 @@ setup(
        "vllm.general_plugins": [
            "ascend_kv_connector = vllm_ascend:register_connector",
            "ascend_model_loader = vllm_ascend:register_model_loader",
-            "ascend_service_profiling = vllm_ascend:register_service_profiling"
+            "ascend_service_profiling = vllm_ascend:register_service_profiling",
        ],
    },
 )
--- a/tools/aisbench.py
+++ b/tools/aisbench.py
@@ -29,60 +29,47 @@ import pandas as pd
 from modelscope import snapshot_download  # type: ignore
 BENCHMARK_HOME = os.getenv("BENCHMARK_HOME", os.path.abspath("./benchmark"))
-DATASET_CONF_DIR = os.path.join(BENCHMARK_HOME, "ais_bench", "benchmark",
+DATASET_CONF_DIR = os.path.join(BENCHMARK_HOME, "ais_bench", "benchmark", "configs", "datasets")
-                                "configs", "datasets")
+REQUEST_CONF_DIR = os.path.join(BENCHMARK_HOME, "ais_bench", "benchmark", "configs", "models", "vllm_api")
 REQUEST_CONF_DIR = os.path.join(BENCHMARK_HOME, "ais_bench", "benchmark",
                                "configs", "models", "vllm_api")
 DATASET_DIR = os.path.join(BENCHMARK_HOME, "ais_bench", "datasets")
 class AisbenchRunner:
-    RESULT_MSG = {
+    RESULT_MSG = {"performance": "Performance Result files locate in ", "accuracy": "write csv to "}
-        "performance": "Performance Result files locate in ",
+    DATASET_RENAME = {"aime2024": "aime", "gsm8k-lite": "gsm8k", "textvqa-lite": "textvqa"}
        "accuracy": "write csv to "
    }
    DATASET_RENAME = {
        "aime2024": "aime",
        "gsm8k-lite": "gsm8k",
        "textvqa-lite": "textvqa"
    }
    def _run_aisbench_task(self):
-        dataset_conf = self.dataset_conf.split('/')[-1]
+        dataset_conf = self.dataset_conf.split("/")[-1]
        if self.task_type == "accuracy":
-            aisbench_cmd = [
+            aisbench_cmd = ["ais_bench", "--models", f"{self.request_conf}_custom", "--datasets", f"{dataset_conf}"]
                'ais_bench', '--models', f'{self.request_conf}_custom',
                '--datasets', f'{dataset_conf}'
            ]
        if self.task_type == "performance":
            aisbench_cmd = [
-                'ais_bench', '--models', f'{self.request_conf}_custom',
+                "ais_bench",
-                '--datasets', f'{dataset_conf}_custom', '--mode', 'perf'
+                "--models",
                f"{self.request_conf}_custom",
                "--datasets",
                f"{dataset_conf}_custom",
                "--mode",
                "perf",
            ]
            if self.num_prompts:
-                aisbench_cmd.extend(['--num-prompts', str(self.num_prompts)])
+                aisbench_cmd.extend(["--num-prompts", str(self.num_prompts)])
        print(f"running aisbench cmd: {' '.join(aisbench_cmd)}")
-        self.proc: subprocess.Popen = subprocess.Popen(aisbench_cmd,
+        self.proc: subprocess.Popen = subprocess.Popen(
-                                                       stdout=subprocess.PIPE,
+            aisbench_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
-                                                       stderr=subprocess.PIPE,
+        )
                                                       text=True)
-    def __init__(self,
+    def __init__(self, model: str, port: int, aisbench_config: dict, host_ip: str = "localhost", verify=True):
                 model: str,
                 port: int,
                 aisbench_config: dict,
                 host_ip: str = "localhost",
                 verify=True):
        self.model = model
        self.dataset_path = aisbench_config.get("dataset_path_local")
        if not self.dataset_path:
-            self.dataset_path = maybe_download_from_modelscope(
+            self.dataset_path = maybe_download_from_modelscope(aisbench_config["dataset_path"], repo_type="dataset")
                aisbench_config["dataset_path"], repo_type="dataset")
        self.model_path = aisbench_config.get("model_path")
        if not self.model_path:
            self.model_path = maybe_download_from_modelscope(model)
-        assert self.dataset_path is not None and self.model_path is not None, \
+        assert self.dataset_path is not None and self.model_path is not None, (
            f"Failed to download dataset or model: dataset={self.dataset_path}, model={self.model_path}"
        )
        self.port = port
        self.host_ip = host_ip
        self.task_type = aisbench_config["case_type"]
@@ -92,8 +79,7 @@ class AisbenchRunner:
        self.max_out_len = aisbench_config["max_out_len"]
        self.batch_size = aisbench_config["batch_size"]
        self.request_rate = aisbench_config.get("request_rate", 0)
-        self.trust_remote_code = aisbench_config.get("trust_remote_code",
+        self.trust_remote_code = aisbench_config.get("trust_remote_code", False)
                                                     False)
        self.temperature = aisbench_config.get("temperature")
        self.top_k = aisbench_config.get("top_k")
        self.top_p = aisbench_config.get("top_p")
@@ -122,52 +108,38 @@ class AisbenchRunner:
            command = ["cp", "-r", self.dataset_path, dst_dir]
            subprocess.call(command)
        if self.task_type == "performance":
-            conf_path = os.path.join(DATASET_CONF_DIR,
+            conf_path = os.path.join(DATASET_CONF_DIR, f"{self.dataset_conf}.py")
                                     f'{self.dataset_conf}.py')
            if self.dataset_conf.startswith("textvqa"):
-                self.dataset_path = os.path.join(self.dataset_path,
+                self.dataset_path = os.path.join(self.dataset_path, "textvqa_val.jsonl")
-                                                 "textvqa_val.jsonl")
+            with open(conf_path, encoding="utf-8") as f:
            with open(conf_path, 'r', encoding='utf-8') as f:
                content = f.read()
-            content = re.sub(r'path=.*', f'path="{self.dataset_path}",',
+            content = re.sub(r"path=.*", f'path="{self.dataset_path}",', content)
-                             content)
+            conf_path_new = os.path.join(DATASET_CONF_DIR, f"{self.dataset_conf}_custom.py")
-            conf_path_new = os.path.join(DATASET_CONF_DIR,
+            with open(conf_path_new, "w", encoding="utf-8") as f:
                                         f'{self.dataset_conf}_custom.py')
            with open(conf_path_new, 'w', encoding='utf-8') as f:
                f.write(content)
    def _init_request_conf(self):
-        conf_path = os.path.join(REQUEST_CONF_DIR, f'{self.request_conf}.py')
+        conf_path = os.path.join(REQUEST_CONF_DIR, f"{self.request_conf}.py")
-        with open(conf_path, 'r', encoding='utf-8') as f:
+        with open(conf_path, encoding="utf-8") as f:
            content = f.read()
-        content = re.sub(r'model=.*', f'model="{self.model}",', content)
+        content = re.sub(r"model=.*", f'model="{self.model}",', content)
-        content = re.sub(r'host_port.*', f'host_port = {self.port},', content)
+        content = re.sub(r"host_port.*", f"host_port = {self.port},", content)
-        content = re.sub(r'host_ip.*', f'host_ip = "{self.host_ip}",', content)
+        content = re.sub(r"host_ip.*", f'host_ip = "{self.host_ip}",', content)
-        content = re.sub(r'max_out_len.*',
+        content = re.sub(r"max_out_len.*", f"max_out_len = {self.max_out_len},", content)
-                         f'max_out_len = {self.max_out_len},', content)
+        content = re.sub(r"batch_size.*", f"batch_size = {self.batch_size},", content)
-        content = re.sub(r'batch_size.*', f'batch_size = {self.batch_size},',
+        content = re.sub(r"trust_remote_code=.*", f"trust_remote_code={self.trust_remote_code},", content)
                         content)
        content = re.sub(r'trust_remote_code=.*',
                         f'trust_remote_code={self.trust_remote_code},',
                         content)
        content = content.replace("top_k", "#top_k")
        content = content.replace("seed", "#seed")
        content = content.replace("repetition_penalty", "#repetition_penalty")
        if self.task_type == "performance":
-            content = re.sub(r'path=.*', f'path="{self.model_path}",', content)
+            content = re.sub(r"path=.*", f'path="{self.model_path}",', content)
-            content = re.sub(r'request_rate.*',
+            content = re.sub(r"request_rate.*", f"request_rate = {self.request_rate},", content)
-                             f'request_rate = {self.request_rate},', content)
+            content = re.sub(r"temperature.*", "temperature = 0,\n            ignore_eos = True,", content)
            content = re.sub(
                r"temperature.*",
                "temperature = 0,\n            ignore_eos = True,", content)
            content = content.replace("top_p", "#top_p")
        if self.task_type == "accuracy":
-            content = re.sub(
+            content = re.sub(r"temperature.*", "temperature = 0.6,\n            ignore_eos = False,", content)
                r"temperature.*",
                "temperature = 0.6,\n            ignore_eos = False,", content)
        if self.temperature:
-            content = re.sub(r"temperature.*",
+            content = re.sub(r"temperature.*", f"temperature = {self.temperature},", content)
                             f"temperature = {self.temperature},", content)
        if self.top_p:
            content = re.sub(r"#?top_p.*", f"top_p = {self.top_p},", content)
        if self.top_k:
@@ -175,12 +147,9 @@ class AisbenchRunner:
        if self.seed:
            content = re.sub(r"#seed.*", f"seed = {self.seed},", content)
        if self.repetition_penalty:
-            content = re.sub(
+            content = re.sub(r"#repetition_penalty.*", f"repetition_penalty = {self.repetition_penalty},", content)
-                r"#repetition_penalty.*",
+        conf_path_new = os.path.join(REQUEST_CONF_DIR, f"{self.request_conf}_custom.py")
-                f"repetition_penalty = {self.repetition_penalty},", content)
+        with open(conf_path_new, "w", encoding="utf-8") as f:
        conf_path_new = os.path.join(REQUEST_CONF_DIR,
                                     f'{self.request_conf}_custom.py')
        with open(conf_path_new, 'w', encoding='utf-8') as f:
            f.write(content)
        print(f"The request config is\n {content}")
@@ -200,8 +169,7 @@ class AisbenchRunner:
            line = self.proc.stdout.readline().strip()
            print(line)
            if "Current exp folder: " in line:
-                self.exp_folder = re.search(r'Current exp folder: (.*)',
+                self.exp_folder = re.search(r"Current exp folder: (.*)", line).group(1)
                                            line).group(1)
                return
            if "ERROR" in line:
                error_msg = f"Some errors happened to Aisbench runtime, the first error is {line}"
@@ -221,53 +189,48 @@ class AisbenchRunner:
                raise RuntimeError(error_msg) from None
    def _get_result_performance(self):
-        result_dir = re.search(r'Performance Result files locate in (.*)',
+        result_dir = re.search(r"Performance Result files locate in (.*)", self.result_line).group(1)[:-1]
-                               self.result_line).group(1)[:-1]
+        dataset_type = self.dataset_conf.split("/")[0]
-        dataset_type = self.dataset_conf.split('/')[0]
+        result_csv_file = os.path.join(result_dir, f"{dataset_type}dataset.csv")
-        result_csv_file = os.path.join(result_dir,
+        result_json_file = os.path.join(result_dir, f"{dataset_type}dataset.json")
                                       f"{dataset_type}dataset.csv")
        result_json_file = os.path.join(result_dir,
                                        f"{dataset_type}dataset.json")
        self.result_csv = pd.read_csv(result_csv_file, index_col=0)
        print("Getting performance results from file: ", result_json_file)
-        with open(result_json_file, 'r', encoding='utf-8') as f:
+        with open(result_json_file, encoding="utf-8") as f:
            self.result_json = json.load(f)
        self.result = [self.result_csv, self.result_json]
    def _get_result_accuracy(self):
-        acc_file = re.search(r'write csv to (.*)', self.result_line).group(1)
+        acc_file = re.search(r"write csv to (.*)", self.result_line).group(1)
        df = pd.read_csv(acc_file)
        self.result = float(df.loc[0][-1])
    def _performance_verify(self):
        self._get_result_performance()
-        output_throughput = self.result_json["Output Token Throughput"][
+        output_throughput = self.result_json["Output Token Throughput"]["total"].replace("token/s", "")
-            "total"].replace("token/s", "")
+        assert float(output_throughput) >= self.threshold * self.baseline, (
-        assert float(
+            "Performance verification failed. "
-            output_throughput
+            f"The current Output Token Throughput is {output_throughput} token/s, "
-        ) >= self.threshold * self.baseline, f"Performance verification failed. The current Output Token Throughput is {output_throughput} token/s, which is not greater than or equal to {self.threshold} * baseline {self.baseline}."
+            f"which is not greater than or equal to {self.threshold} * baseline {self.baseline}."
        )
    def _accuracy_verify(self):
        self._get_result_accuracy()
        acc_value = self.result
-        assert self.baseline - self.threshold <= acc_value <= self.baseline + self.threshold, f"Accuracy verification failed. The accuracy of {self.dataset_path} is {acc_value}, which is not within {self.threshold} relative to baseline {self.baseline}."
+        assert self.baseline - self.threshold <= acc_value <= self.baseline + self.threshold, (
            "Accuracy verification failed. "
            f"The accuracy of {self.dataset_path} is {acc_value}, "
            f"which is not within {self.threshold} relative to baseline {self.baseline}."
        )
-def run_aisbench_cases(model,
+def run_aisbench_cases(model, port, aisbench_cases, server_args="", host_ip="localhost"):
                       port,
                       aisbench_cases,
                       server_args="",
                       host_ip="localhost"):
    aisbench_results = []
    aisbench_errors = []
    for aisbench_case in aisbench_cases:
        if not aisbench_case:
            continue
        try:
-            with AisbenchRunner(model=model,
+            with AisbenchRunner(model=model, port=port, host_ip=host_ip, aisbench_config=aisbench_case) as aisbench:
                                port=port,
                                host_ip=host_ip,
                                aisbench_config=aisbench_case) as aisbench:
                aisbench_results.append(aisbench.result)
        except Exception as e:
            aisbench_results.append("")
@@ -299,8 +262,7 @@ def get_lock(model_name_or_path: str | Path, cache_dir: str | None = None):
    # add hash to avoid conflict with old users' lock files
    lock_file_name = hash_name + model_name + ".lock"
    # mode 0o666 is required for the filelock to be shared across users
-    lock = filelock.FileLock(os.path.join(lock_dir, lock_file_name),
+    lock = filelock.FileLock(os.path.join(lock_dir, lock_file_name), mode=0o666)
                             mode=0o666)
    return lock
--- a/tools/check_python_src_init.py
+++ b/tools/check_python_src_init.py
@@ -36,8 +36,8 @@ def check_init_file_in_package(directory):
        return False
    # If any .py file exists, we expect an __init__.py
-    if any(f.endswith('.py') for f in files):
+    if any(f.endswith(".py") for f in files):
-        init_file = os.path.join(directory, '__init__.py')
+        init_file = os.path.join(directory, "__init__.py")
        if not os.path.isfile(init_file):
            return False
    return True
@@ -62,9 +62,7 @@ def main():
        all_missing.update(missing)
    if all_missing:
-        print(
+        print("❌ Missing '__init__.py' files in the following Python package directories:")
            "❌ Missing '__init__.py' files in the following Python package directories:"
        )
        for pkg in sorted(all_missing):
            print(f" - {pkg}")
        sys.exit(1)
--- a/tools/enforce_regex_import.py
+++ b/tools/enforce_regex_import.py
@@ -24,39 +24,33 @@ from pathlib import Path
 import regex as re
-FORBIDDEN_PATTERNS = re.compile(
+FORBIDDEN_PATTERNS = re.compile(r"^\s*(?:import\s+re(?:$|\s|,)|from\s+re\s+import)")
    r'^\s*(?:import\s+re(?:$|\s|,)|from\s+re\s+import)')
 ALLOWED_PATTERNS = [
-    re.compile(r'^\s*import\s+regex\s+as\s+re\s*$'),
+    re.compile(r"^\s*import\s+regex\s+as\s+re\s*$"),
-    re.compile(r'^\s*import\s+regex\s*$'),
+    re.compile(r"^\s*import\s+regex\s*$"),
 ]
 def get_staged_python_files() -> list[str]:
    try:
        result = subprocess.run(
-            ['git', 'diff', '--cached', '--name-only', '--diff-filter=AM'],
+            ["git", "diff", "--cached", "--name-only", "--diff-filter=AM"], capture_output=True, text=True, check=True
-            capture_output=True,
+        )
-            text=True,
+        files = result.stdout.strip().split("\n") if result.stdout.strip() else []
-            check=True)
+        return [f for f in files if f.endswith(".py")]
        files = result.stdout.strip().split(
            '\n') if result.stdout.strip() else []
        return [f for f in files if f.endswith('.py')]
    except subprocess.CalledProcessError:
        return []
 def is_forbidden_import(line: str) -> bool:
    line = line.strip()
-    return bool(
+    return bool(FORBIDDEN_PATTERNS.match(line) and not any(pattern.match(line) for pattern in ALLOWED_PATTERNS))
        FORBIDDEN_PATTERNS.match(line)
        and not any(pattern.match(line) for pattern in ALLOWED_PATTERNS))
 def check_file(filepath: str) -> list[tuple[int, str]]:
    violations = []
    try:
-        with open(filepath, encoding='utf-8') as f:
+        with open(filepath, encoding="utf-8") as f:
            for line_num, line in enumerate(f, 1):
                if is_forbidden_import(line):
                    violations.append((line_num, line.strip()))
@@ -89,9 +83,7 @@ def main() -> int:
    if total_violations > 0:
        print(f"\n💡 Found {total_violations} violation(s).")
        print("❌ Please replace 'import re' with 'import regex as re'")
-        print(
+        print("   Also replace 'from re import ...' with 'from regex import ...'")  # noqa: E501
            "   Also replace 'from re import ...' with 'from regex import ...'"
        )  # noqa: E501
        print("✅ Allowed imports:")
        print("   - import regex as re")
        print("   - import regex")  # noqa: E501
--- a/tools/format_contributors.py
+++ b/tools/format_contributors.py
@@ -20,9 +20,7 @@ import re
 import sys
 from datetime import datetime
-p = re.compile(
+p = re.compile(r"@(?P<user>[A-Za-z0-9-_]+)[^\`]*\`(?P<sha>[0-9a-fA-F]+)\`\s*[-–—]\s*(?P<date>.+)$")
    r'@(?P<user>[A-Za-z0-9-_]+)[^\`]*\`(?P<sha>[0-9a-fA-F]+)\`\s*[-–—]\s*(?P<date>.+)$'
 )
 def parse_lines(lines):
@@ -34,9 +32,9 @@ def parse_lines(lines):
        m = p.search(ln)
        if not m:
            continue
-        user = m.group('user')
+        user = m.group("user")
-        sha = m.group('sha')
+        sha = m.group("sha")
-        datestr = m.group('date').strip()
+        datestr = m.group("date").strip()
        try:
            dt = datetime.fromisoformat(datestr)
        except Exception:
@@ -51,27 +49,17 @@ def parse_lines(lines):
 def main():
    ap = argparse.ArgumentParser(
-        description=
+        description="Format and sort contributor lines by date (newest first). Outputs markdown table by default."
        "Format and sort contributor lines by date (newest first). Outputs markdown table by default."
    )
    ap.add_argument(
-        'file',
+        "file", nargs="?", help="input file (default stdin), output from collect_user_first_contribution.sh"
        nargs='?',
        help=
        'input file (default stdin), output from collect_user_first_contribution.sh'
    )
-    ap.add_argument(
+    ap.add_argument("--start", type=int, default=1, help="minimum number for table (oldest row will have this number)")
-        '--start',
+    ap.add_argument("--repo", default="vllm-project/vllm-ascend", help="repo used for commit links")
        type=int,
        default=1,
        help='minimum number for table (oldest row will have this number)')
    ap.add_argument('--repo',
                    default='vllm-project/vllm-ascend',
                    help='repo used for commit links')
    args = ap.parse_args()
    if args.file:
-        with open(args.file, 'r', encoding='utf-8') as f:
+        with open(args.file, encoding="utf-8") as f:
            lines = f.readlines()
    else:
        lines = sys.stdin.readlines()
@@ -88,9 +76,9 @@ def main():
    for dt, user, sha, datestr in items:
        short = sha[:7]
        date_short = dt.strftime("%Y/%m/%d")
-        print(
+        user_url = f"https://github.com/{user}"
-            f"| {n} | [@{user}](https://github.com/{user}) | {date_short} | [{short}](https://github.com/{args.repo}/commit/{sha}) |"
+        commit_url = f"https://github.com/{args.repo}/commit/{sha}"
-        )
+        print(f"| {n} | [@{user}]({user_url}) | {date_short} | [{short}]({commit_url}) |")
        n -= 1
--- a/tools/send_mm_request.py
+++ b/tools/send_mm_request.py
@@ -4,39 +4,30 @@ import os
 import requests
 from modelscope import snapshot_download  # type: ignore
-mm_dir = snapshot_download("vllm-ascend/mm_request", repo_type='dataset')
+mm_dir = snapshot_download("vllm-ascend/mm_request", repo_type="dataset")
 image_path = os.path.join(mm_dir, "test_mm2.jpg")
-with open(image_path, 'rb') as image_file:
+with open(image_path, "rb") as image_file:
-    image_data = base64.b64encode(image_file.read()).decode('utf-8')
+    image_data = base64.b64encode(image_file.read()).decode("utf-8")
 data = {
-    "messages": [{
+    "messages": [
-        "role":
+        {
-        "user",
+            "role": "user",
-        "content": [{
+            "content": [
-            "type": "text",
+                {"type": "text", "text": "What is the content of this image?"},
-            "text": "What is the content of this image?"
+                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}},
-        }, {
+            ],
-            "type": "image_url",
+        }
-            "image_url": {
+    ],
                "url": f"data:image/jpeg;base64,{image_data}"
            }
        }]
    }],
    "eos_token_id": [1, 106],
-    "pad_token_id":
+    "pad_token_id": 0,
-    0,
+    "top_k": 64,
-    "top_k":
+    "top_p": 0.95,
-    64,
+    "max_tokens": 8192,
-    "top_p":
+    "stream": False,
    0.95,
    "max_tokens":
    8192,
    "stream":
    False
 }
-headers = {'Accept': 'application/json', 'Content-Type': 'application/json'}
+headers = {"Accept": "application/json", "Content-Type": "application/json"}
 def send_image_request(model, server):
--- a/tools/send_request.py
+++ b/tools/send_request.py
@@ -20,10 +20,12 @@ def send_v1_completions(prompt, model, server, request_args=None):
 def send_v1_chat_completions(prompt, model, server, request_args=None):
    data: dict[str, Any] = {
        "model": model,
-        "messages": [{
+        "messages": [
-            "role": "user",
+            {
-            "content": prompt,
+                "role": "user",
-        }],
+                "content": prompt,
            }
        ],
    }
    if request_args:
        data.update(request_args)
--- a/tools/vllm_bench.py
+++ b/tools/vllm_bench.py
@@ -24,42 +24,58 @@ from .aisbench import maybe_download_from_modelscope
 class VllmbenchRunner:
    def _run_vllm_bench_task(self):
        vllm_bench_cmd = [
-            'vllm', 'bench', 'serve', '--backend', 'openai-chat',
+            "vllm",
-            '--trust-remote-code', '--served-model-name',
+            "bench",
-            str(self.model_name), '--model', self.model_path, '--tokenizer',
+            "serve",
-            self.model_path, '--metric-percentiles', '50,90,99', '--host',
+            "--backend",
-            self.host_ip, '--port',
+            "openai-chat",
-            str(self.port), '--save-result', '--result-filename',
+            "--trust-remote-code",
-            self.result_filename, '--endpoint', '/v1/chat/completions',
+            "--served-model-name",
-            '--ready-check-timeout-sec', '0'
+            str(self.model_name),
            "--model",
            self.model_path,
            "--tokenizer",
            self.model_path,
            "--metric-percentiles",
            "50,90,99",
            "--host",
            self.host_ip,
            "--port",
            str(self.port),
            "--save-result",
            "--result-filename",
            self.result_filename,
            "--endpoint",
            "/v1/chat/completions",
            "--ready-check-timeout-sec",
            "0",
        ]
        self._concat_config_args(vllm_bench_cmd)
        print(f"running vllm_bench cmd: {' '.join(vllm_bench_cmd)}")
-        self.proc: subprocess.Popen = subprocess.Popen(vllm_bench_cmd,
+        self.proc: subprocess.Popen = subprocess.Popen(
-                                                       stdout=subprocess.PIPE,
+            vllm_bench_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
-                                                       stderr=subprocess.PIPE,
+        )
                                                       text=True)
-    def __init__(self,
+    def __init__(
-                 model_name: str,
+        self,
-                 port: int,
+        model_name: str,
-                 config: dict,
+        port: int,
-                 baseline: float,
+        config: dict,
-                 threshold: float = 0.97,
+        baseline: float,
-                 model_path: str = "",
+        threshold: float = 0.97,
-                 host_ip: str = "localhost"):
+        model_path: str = "",
        host_ip: str = "localhost",
    ):
        self.model_name = model_name
        self.model_path = model_path
        if not self.model_path:
            self.model_path = maybe_download_from_modelscope(model_name)
-        assert self.model_path is not None, \
+        assert self.model_path is not None, f"Failed to download model: model={self.model_path}"
            f"Failed to download model: model={self.model_path}"
        self.port = port
        self.host_ip = host_ip
-        curr_time = datetime.now().strftime('%Y%m%d%H%M%S')
+        curr_time = datetime.now().strftime("%Y%m%d%H%M%S")
        self.result_filename = f"result_vllm_bench_{curr_time}.json"
        self.config = config
        self.baseline = baseline
@@ -96,19 +112,14 @@ class VllmbenchRunner:
        stdout, stderr = self.proc.communicate()
        if self.proc.returncode != 0:
-            logging.error(
+            logging.error(f"vllm bench command failed, return code: {self.proc.returncode}")
                f"vllm bench command failed, return code: {self.proc.returncode}"
            )
            logging.error(f"Standard output: {stdout}")
            logging.error(f"Standard error: {stderr}")
-            raise RuntimeError(
+            raise RuntimeError(f"vllm bench command execution failed: {stderr}")
                f"vllm bench command execution failed: {stderr}")
-        logging.info(
+        logging.info(f"vllm bench command completed, return code: {self.proc.returncode}")
            f"vllm bench command completed, return code: {self.proc.returncode}"
        )
        if stdout:
-            lines = stdout.split('\n')
+            lines = stdout.split("\n")
            last_lines = lines[-100:] if len(lines) > 100 else lines
            logging.info(f"Last {len(last_lines)} lines of standard output:")
            for line in last_lines:
@@ -119,36 +130,28 @@ class VllmbenchRunner:
    def _get_result(self):
        result_file = os.path.join(os.getcwd(), self.result_filename)
        print("Getting performance results from file: ", result_file)
-        with open(result_file, 'r', encoding='utf-8') as f:
+        with open(result_file, encoding="utf-8") as f:
            self.result = json.load(f)
    def _performance_verify(self):
        self._get_result()
        output_throughput = self.result["output_throughput"]
-        assert float(
+        assert float(output_throughput) >= self.baseline * self.threshold, (
-            output_throughput
+            "Performance verification failed. "
-        ) >= self.baseline * self.threshold, f"Performance verification failed. The current Output Token Throughput is {output_throughput} token/s, which is not greater than or equal to {self.threshold} * baseline {self.baseline}."
+            f"The current Output Token Throughput is {output_throughput} token/s, "
            f"which is not greater than or equal to {self.threshold} * baseline {self.baseline}."
        )
-def run_vllm_bench_case(model_name,
+def run_vllm_bench_case(model_name, port, config, baseline, threshold=0.97, model_path="", host_ip="localhost"):
                        port,
                        config,
                        baseline,
                        threshold=0.97,
                        model_path="",
                        host_ip="localhost"):
    try:
-        with VllmbenchRunner(model_name,
+        with VllmbenchRunner(
-                             port,
+            model_name, port, config, baseline, threshold, model_path=model_path, host_ip=host_ip
-                             config,
+        ) as vllm_bench:
                             baseline,
                             threshold,
                             model_path=model_path,
                             host_ip=host_ip) as vllm_bench:
            vllm_bench_result = vllm_bench.result
    except Exception as e:
        print(e)
        error_msg = f"vllm_bench run failed, reason is {e}"
        logging.error(error_msg)
-        assert False, f"vllm_bench run failed, reason is {e}"
+        raise RuntimeError(error_msg) from e
    return vllm_bench_result