diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f7dd0e1d..8778d540 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -6,31 +6,12 @@ default_stages:
   - manual # Run in CI
 exclude: 'examples/.*' # Exclude examples from all hooks by default
 repos:
-- repo: https://github.com/codespell-project/codespell
-  rev: v2.4.1
-  hooks:
-    - id: codespell
-      args: [
-        --toml, pyproject.toml,
-        '--skip', 'csrc/**,tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**,.github/**,typos.toml',
-        '-L', 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn,ArchType,AND,ND,tbe'
-      ]
-      additional_dependencies:
-        - tomli
-- repo: https://github.com/google/yapf
-  rev: v0.43.0
-  hooks:
-  - id: yapf
-    args: [--in-place, --verbose]
-    # Keep the same list from yapfignore here to avoid yapf failing without any inputs
-    exclude: '(.github|benchmarks|examples|docs)/.*'
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.11.7
+  rev: v0.14.0
   hooks:
-  - id: ruff
+  - id: ruff-check
     args: [--output-format, github, --fix]
   - id: ruff-format
-    files: ^(benchmarks|examples)/.*
 - repo: https://github.com/crate-ci/typos
   rev: v1.32.0
   hooks:
@@ -39,10 +20,6 @@ repos:
       "--force-exclude",
       "--exclude", "csrc/**"
     ]
-- repo: https://github.com/PyCQA/isort
-  rev: 6.0.1
-  hooks:
-  - id: isort
 # - repo: https://github.com/pre-commit/mirrors-clang-format
 #   rev: v20.1.3
 #   hooks:
diff --git a/benchmarks/ops/ben_vocabparallelembedding.py b/benchmarks/ops/ben_vocabparallelembedding.py
index 5590c733..b1bb8db0 100644
--- a/benchmarks/ops/ben_vocabparallelembedding.py
+++ b/benchmarks/ops/ben_vocabparallelembedding.py
@@ -1,5 +1,3 @@
-from typing import Tuple
-
 import numpy as np
 import pytest
 import torch
@@ -47,20 +45,12 @@ def get_masked_input_and_mask_ref(
     num_org_vocab_padding: int,
     added_vocab_start_index: int,
     added_vocab_end_index: int,
-) -> Tuple[torch.Tensor, torch.Tensor]:
+) -> tuple[torch.Tensor, torch.Tensor]:
     """Reference implementation for verification"""
     org_vocab_mask = (input_ >= org_vocab_start_index) & (input_ < org_vocab_end_index)
-    added_vocab_mask = (input_ >= added_vocab_start_index) & (
-        input_ < added_vocab_end_index
-    )
-    added_offset = (
-        added_vocab_start_index
-        - (org_vocab_end_index - org_vocab_start_index)
-        - num_org_vocab_padding
-    )
-    valid_offset = (org_vocab_start_index * org_vocab_mask) + (
-        added_offset * added_vocab_mask
-    )
+    added_vocab_mask = (input_ >= added_vocab_start_index) & (input_ < added_vocab_end_index)
+    added_offset = added_vocab_start_index - (org_vocab_end_index - org_vocab_start_index) - num_org_vocab_padding
+    valid_offset = (org_vocab_start_index * org_vocab_mask) + (added_offset * added_vocab_mask)
     vocab_mask = org_vocab_mask | added_vocab_mask
     masked_input = vocab_mask * (input_ - valid_offset)
     return masked_input, ~vocab_mask
@@ -78,7 +68,7 @@ SEEDS = [0]
 @pytest.mark.parametrize("seed", SEEDS)
 @torch.inference_mode()
 def test_get_masked_input_and_mask(
-    shape: Tuple[int, ...],
+    shape: tuple[int, ...],
     dtype: torch.dtype,
     device: str,
     seed: int,
diff --git a/benchmarks/scripts/convert_json_to_markdown.py b/benchmarks/scripts/convert_json_to_markdown.py
index 11204342..f91fbe9e 100644
--- a/benchmarks/scripts/convert_json_to_markdown.py
+++ b/benchmarks/scripts/convert_json_to_markdown.py
@@ -59,9 +59,7 @@ def results_to_json(latency, throughput, serving):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Process the results of the benchmark tests."
-    )
+    parser = argparse.ArgumentParser(description="Process the results of the benchmark tests.")
     parser.add_argument(
         "--results_folder",
         type=str,
@@ -80,12 +78,8 @@ if __name__ == "__main__":
         default="./perf_result_template.md",
         help="The template file for the markdown report.",
     )
-    parser.add_argument(
-        "--tag", default="main", help="Tag to be used for release message."
-    )
-    parser.add_argument(
-        "--commit_id", default="", help="Commit ID to be used for release message."
-    )
+    parser.add_argument("--tag", default="main", help="Tag to be used for release message.")
+    parser.add_argument("--commit_id", default="", help="Commit ID to be used for release message.")
 
     args = parser.parse_args()
     results_folder = (CUR_PATH / args.results_folder).resolve()
@@ -116,9 +110,7 @@ if __name__ == "__main__":
             # get different percentiles
             for perc in [10, 25, 50, 75, 90, 99]:
                 # Multiply 1000 to convert the time unit from s to ms
-                raw_result.update(
-                    {f"P{perc}": 1000 * raw_result["percentiles"][str(perc)]}
-                )
+                raw_result.update({f"P{perc}": 1000 * raw_result["percentiles"][str(perc)]})
             raw_result["avg_latency"] = raw_result["avg_latency"] * 1000
 
             # add the result to raw_result
@@ -142,38 +134,24 @@ if __name__ == "__main__":
     serving_results = pd.DataFrame.from_dict(serving_results)
     throughput_results = pd.DataFrame.from_dict(throughput_results)
 
-    raw_results_json = results_to_json(
-        latency_results, throughput_results, serving_results
-    )
+    raw_results_json = results_to_json(latency_results, throughput_results, serving_results)
 
     # remapping the key, for visualization purpose
     if not latency_results.empty:
-        latency_results = latency_results[list(latency_column_mapping.keys())].rename(
-            columns=latency_column_mapping
-        )
+        latency_results = latency_results[list(latency_column_mapping.keys())].rename(columns=latency_column_mapping)
     if not serving_results.empty:
-        serving_results = serving_results[list(serving_column_mapping.keys())].rename(
-            columns=serving_column_mapping
-        )
+        serving_results = serving_results[list(serving_column_mapping.keys())].rename(columns=serving_column_mapping)
     if not throughput_results.empty:
-        throughput_results = throughput_results[
-            list(throughput_results_column_mapping.keys())
-        ].rename(columns=throughput_results_column_mapping)
+        throughput_results = throughput_results[list(throughput_results_column_mapping.keys())].rename(
+            columns=throughput_results_column_mapping
+        )
 
-    processed_results_json = results_to_json(
-        latency_results, throughput_results, serving_results
-    )
+    processed_results_json = results_to_json(latency_results, throughput_results, serving_results)
 
     # get markdown tables
-    latency_md_table = tabulate(
-        latency_results, headers="keys", tablefmt="pipe", showindex=False
-    )
-    serving_md_table = tabulate(
-        serving_results, headers="keys", tablefmt="pipe", showindex=False
-    )
-    throughput_md_table = tabulate(
-        throughput_results, headers="keys", tablefmt="pipe", showindex=False
-    )
+    latency_md_table = tabulate(latency_results, headers="keys", tablefmt="pipe", showindex=False)
+    serving_md_table = tabulate(serving_results, headers="keys", tablefmt="pipe", showindex=False)
+    throughput_md_table = tabulate(throughput_results, headers="keys", tablefmt="pipe", showindex=False)
 
     # document the result
     print(output_folder)
diff --git a/collect_env.py b/collect_env.py
index 68d97a7b..cdfdcbe6 100644
--- a/collect_env.py
+++ b/collect_env.py
@@ -27,33 +27,35 @@ from vllm.envs import environment_variables
 
 try:
     import torch
+
     TORCH_AVAILABLE = True
 except (ImportError, NameError, AttributeError, OSError):
     TORCH_AVAILABLE = False
 
 # System Environment Information
 SystemEnv = namedtuple(
-    'SystemEnv',
+    "SystemEnv",
     [
-        'torch_version',
-        'is_debug_build',
-        'gcc_version',
-        'clang_version',
-        'cmake_version',
-        'os',
-        'libc_version',
-        'python_version',
-        'python_platform',
-        'pip_version',  # 'pip' or 'pip3'
-        'pip_packages',
-        'conda_packages',
-        'cpu_info',
-        'vllm_version',  # vllm specific field
-        'vllm_ascend_version',  # vllm ascend specific field
-        'env_vars',
-        'npu_info',  # ascend specific field
-        'cann_info',  # ascend specific field
-    ])
+        "torch_version",
+        "is_debug_build",
+        "gcc_version",
+        "clang_version",
+        "cmake_version",
+        "os",
+        "libc_version",
+        "python_version",
+        "python_platform",
+        "pip_version",  # 'pip' or 'pip3'
+        "pip_packages",
+        "conda_packages",
+        "cpu_info",
+        "vllm_version",  # vllm specific field
+        "vllm_ascend_version",  # vllm ascend specific field
+        "env_vars",
+        "npu_info",  # ascend specific field
+        "cann_info",  # ascend specific field
+    ],
+)
 
 DEFAULT_CONDA_PATTERNS = {
     "torch",
@@ -82,15 +84,12 @@ DEFAULT_PIP_PATTERNS = {
 
 def run(command):
     """Return (return-code, stdout, stderr)."""
-    shell = True if type(command) is str else False
-    p = subprocess.Popen(command,
-                         stdout=subprocess.PIPE,
-                         stderr=subprocess.PIPE,
-                         shell=shell)
+    shell = isinstance(command, str)
+    p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell)
     raw_output, raw_err = p.communicate()
     rc = p.returncode
-    if get_platform() == 'win32':
-        enc = 'oem'
+    if get_platform() == "win32":
+        enc = "oem"
     else:
         enc = locale.getpreferredencoding()
     output = raw_output.decode(enc)
@@ -122,42 +121,40 @@ def run_and_return_first_line(run_lambda, command):
     rc, out, _ = run_lambda(command)
     if rc != 0:
         return None
-    return out.split('\n')[0]
+    return out.split("\n")[0]
 
 
 def get_conda_packages(run_lambda, patterns=None):
     if patterns is None:
         patterns = DEFAULT_CONDA_PATTERNS
-    conda = os.environ.get('CONDA_EXE', 'conda')
+    conda = os.environ.get("CONDA_EXE", "conda")
     out = run_and_read_all(run_lambda, "{} list".format(conda))
     if out is None:
         return out
 
-    return "\n".join(line for line in out.splitlines()
-                     if not line.startswith("#") and any(name in line
-                                                         for name in patterns))
+    return "\n".join(
+        line for line in out.splitlines() if not line.startswith("#") and any(name in line for name in patterns)
+    )
 
 
 def get_gcc_version(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)')
+    return run_and_parse_first_match(run_lambda, "gcc --version", r"gcc (.*)")
 
 
 def get_clang_version(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'clang --version',
-                                     r'clang version (.*)')
+    return run_and_parse_first_match(run_lambda, "clang --version", r"clang version (.*)")
 
 
 def get_cmake_version(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'cmake --version',
-                                     r'cmake (.*)')
+    return run_and_parse_first_match(run_lambda, "cmake --version", r"cmake (.*)")
 
 
 def _parse_version(version, version_tuple):
     version_str = version_tuple[-1]
-    if isinstance(version_str, str) and version_str.startswith('g'):
-        if '.' in version_str:
-            git_sha = version_str.split('.')[0][1:]
-            date = version_str.split('.')[-1][1:]
+    if isinstance(version_str, str) and version_str.startswith("g"):
+        if "." in version_str:
+            git_sha = version_str.split(".")[0][1:]
+            date = version_str.split(".")[-1][1:]
             return f"{version} (git sha: {git_sha}, date: {date})"
         else:
             git_sha = version_str[1:]  # type: ignore
@@ -167,26 +164,28 @@ def _parse_version(version, version_tuple):
 
 def get_vllm_version():
     from vllm import __version__, __version_tuple__
+
     return _parse_version(__version__, __version_tuple__)
 
 
 def get_vllm_ascend_version():
     from vllm_ascend._version import __version__, __version_tuple__
+
     return _parse_version(__version__, __version_tuple__)
 
 
 def get_cpu_info(run_lambda):
-    rc, out, err = 0, '', ''
-    if get_platform() == 'linux':
-        rc, out, err = run_lambda('lscpu')
-    elif get_platform() == 'win32':
+    rc, out, err = 0, "", ""
+    if get_platform() == "linux":
+        rc, out, err = run_lambda("lscpu")
+    elif get_platform() == "win32":
         rc, out, err = run_lambda(
-            'wmic cpu get Name,Manufacturer,Family,Architecture,ProcessorType,DeviceID, \
-        CurrentClockSpeed,MaxClockSpeed,L2CacheSize,L2CacheSpeed,Revision /VALUE'
+            "wmic cpu get Name,Manufacturer,Family,Architecture,ProcessorType,DeviceID, \
+        CurrentClockSpeed,MaxClockSpeed,L2CacheSize,L2CacheSpeed,Revision /VALUE"
         )
-    elif get_platform() == 'darwin':
+    elif get_platform() == "darwin":
         rc, out, err = run_lambda("sysctl -n machdep.cpu.brand_string")
-    cpu_info = 'None'
+    cpu_info = "None"
     if rc == 0:
         cpu_info = out
     else:
@@ -195,67 +194,63 @@ def get_cpu_info(run_lambda):
 
 
 def get_platform():
-    if sys.platform.startswith('linux'):
-        return 'linux'
-    elif sys.platform.startswith('win32'):
-        return 'win32'
-    elif sys.platform.startswith('cygwin'):
-        return 'cygwin'
-    elif sys.platform.startswith('darwin'):
-        return 'darwin'
+    if sys.platform.startswith("linux"):
+        return "linux"
+    elif sys.platform.startswith("win32"):
+        return "win32"
+    elif sys.platform.startswith("cygwin"):
+        return "cygwin"
+    elif sys.platform.startswith("darwin"):
+        return "darwin"
     else:
         return sys.platform
 
 
 def get_mac_version(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion',
-                                     r'(.*)')
+    return run_and_parse_first_match(run_lambda, "sw_vers -productVersion", r"(.*)")
 
 
 def get_windows_version(run_lambda):
-    system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
-    wmic_cmd = os.path.join(system_root, 'System32', 'Wbem', 'wmic')
-    findstr_cmd = os.path.join(system_root, 'System32', 'findstr')
-    return run_and_read_all(
-        run_lambda,
-        '{} os get Caption | {} /v Caption'.format(wmic_cmd, findstr_cmd))
+    system_root = os.environ.get("SYSTEMROOT", "C:\\Windows")
+    wmic_cmd = os.path.join(system_root, "System32", "Wbem", "wmic")
+    findstr_cmd = os.path.join(system_root, "System32", "findstr")
+    return run_and_read_all(run_lambda, "{} os get Caption | {} /v Caption".format(wmic_cmd, findstr_cmd))
 
 
 def get_lsb_version(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'lsb_release -a',
-                                     r'Description:\t(.*)')
+    return run_and_parse_first_match(run_lambda, "lsb_release -a", r"Description:\t(.*)")
 
 
 def check_release_file(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'cat /etc/*-release',
-                                     r'PRETTY_NAME="(.*)"')
+    return run_and_parse_first_match(run_lambda, "cat /etc/*-release", r'PRETTY_NAME="(.*)"')
 
 
 def get_os(run_lambda):
     from platform import machine
+
     platform = get_platform()
 
-    if platform == 'win32' or platform == 'cygwin':
+    if platform == "win32" or platform == "cygwin":
         return get_windows_version(run_lambda)
 
-    if platform == 'darwin':
+    if platform == "darwin":
         version = get_mac_version(run_lambda)
         if version is None:
             return None
-        return 'macOS {} ({})'.format(version, machine())
+        return "macOS {} ({})".format(version, machine())
 
-    if platform == 'linux':
+    if platform == "linux":
         # Ubuntu/Debian based
         desc = get_lsb_version(run_lambda)
         if desc is not None:
-            return '{} ({})'.format(desc, machine())
+            return "{} ({})".format(desc, machine())
 
         # Try reading /etc/*-release
         desc = check_release_file(run_lambda)
         if desc is not None:
-            return '{} ({})'.format(desc, machine())
+            return "{} ({})".format(desc, machine())
 
-        return '{} ({})'.format(platform, machine())
+        return "{} ({})".format(platform, machine())
 
     # Unknown platform
     return platform
@@ -263,14 +258,16 @@ def get_os(run_lambda):
 
 def get_python_platform():
     import platform
+
     return platform.platform()
 
 
 def get_libc_version():
     import platform
-    if get_platform() != 'linux':
-        return 'N/A'
-    return '-'.join(platform.libc_ver())
+
+    if get_platform() != "linux":
+        return "N/A"
+    return "-".join(platform.libc_ver())
 
 
 def get_pip_packages(run_lambda, patterns=None):
@@ -282,31 +279,29 @@ def get_pip_packages(run_lambda, patterns=None):
     # But here it is invoked as `python -mpip`
     def run_with_pip(pip):
         out = run_and_read_all(run_lambda, pip + ["list", "--format=freeze"])
-        return "\n".join(line for line in out.splitlines()
-                         if any(name in line for name in patterns))
+        return "\n".join(line for line in out.splitlines() if any(name in line for name in patterns))
 
-    pip_version = 'pip3' if sys.version[0] == '3' else 'pip'
-    out = run_with_pip([sys.executable, '-mpip'])
+    pip_version = "pip3" if sys.version[0] == "3" else "pip"
+    out = run_with_pip([sys.executable, "-mpip"])
 
     return pip_version, out
 
 
 def get_npu_info(run_lambda):
-    return run_and_read_all(run_lambda, 'npu-smi info')
+    return run_and_read_all(run_lambda, "npu-smi info")
 
 
 def get_cann_info(run_lambda):
-    out = run_and_read_all(run_lambda, 'lscpu | grep Architecture:')
+    out = run_and_read_all(run_lambda, "lscpu | grep Architecture:")
     cpu_arch = str(out).split()[-1]
     return run_and_read_all(
-        run_lambda,
-        'cat /usr/local/Ascend/ascend-toolkit/latest/{}-linux/ascend_toolkit_install.info'
-        .format(cpu_arch))
+        run_lambda, "cat /usr/local/Ascend/ascend-toolkit/latest/{}-linux/ascend_toolkit_install.info".format(cpu_arch)
+    )
 
 
 def get_env_vars():
-    env_vars = ''
-    secret_terms = ('secret', 'token', 'api', 'access', 'password')
+    env_vars = ""
+    secret_terms = ("secret", "token", "api", "access", "password")
     report_prefix = ("TORCH", "PYTORCH", "ASCEND_", "ATB_")
     for k, v in os.environ.items():
         if any(term in k.lower() for term in secret_terms):
@@ -327,7 +322,7 @@ def get_env_info():
         version_str = torch.__version__
         debug_mode_str = str(torch.version.debug)
     else:
-        version_str = debug_mode_str = 'N/A'
+        version_str = debug_mode_str = "N/A"
 
     sys_version = sys.version.replace("\n", " ")
 
@@ -336,9 +331,7 @@ def get_env_info():
     return SystemEnv(
         torch_version=version_str,
         is_debug_build=debug_mode_str,
-        python_version='{} ({}-bit runtime)'.format(
-            sys_version,
-            sys.maxsize.bit_length() + 1),
+        python_version="{} ({}-bit runtime)".format(sys_version, sys.maxsize.bit_length() + 1),
         python_platform=get_python_platform(),
         pip_version=pip_version,
         pip_packages=pip_list_output,
@@ -399,36 +392,35 @@ CANN:
 
 
 def pretty_str(envinfo):
-
-    def replace_nones(dct, replacement='Could not collect'):
-        for key in dct.keys():
+    def replace_nones(dct, replacement="Could not collect"):
+        for key in dct:
             if dct[key] is not None:
                 continue
             dct[key] = replacement
         return dct
 
-    def replace_bools(dct, true='Yes', false='No'):
-        for key in dct.keys():
+    def replace_bools(dct, true="Yes", false="No"):
+        for key in dct:
             if dct[key] is True:
                 dct[key] = true
             elif dct[key] is False:
                 dct[key] = false
         return dct
 
-    def prepend(text, tag='[prepend]'):
-        lines = text.split('\n')
+    def prepend(text, tag="[prepend]"):
+        lines = text.split("\n")
         updated_lines = [tag + line for line in lines]
-        return '\n'.join(updated_lines)
+        return "\n".join(updated_lines)
 
-    def replace_if_empty(text, replacement='No relevant packages'):
+    def replace_if_empty(text, replacement="No relevant packages"):
         if text is not None and len(text) == 0:
             return replacement
         return text
 
     def maybe_start_on_next_line(string):
         # If `string` is multiline, prepend a \n to it.
-        if string is not None and len(string.split('\n')) > 1:
-            return '\n{}\n'.format(string)
+        if string is not None and len(string.split("\n")) > 1:
+            return "\n{}\n".format(string)
         return string
 
     mutable_dict = envinfo._asdict()
@@ -440,22 +432,18 @@ def pretty_str(envinfo):
     mutable_dict = replace_nones(mutable_dict)
 
     # If either of these are '', replace with 'No relevant packages'
-    mutable_dict['pip_packages'] = replace_if_empty(
-        mutable_dict['pip_packages'])
-    mutable_dict['conda_packages'] = replace_if_empty(
-        mutable_dict['conda_packages'])
+    mutable_dict["pip_packages"] = replace_if_empty(mutable_dict["pip_packages"])
+    mutable_dict["conda_packages"] = replace_if_empty(mutable_dict["conda_packages"])
 
     # Tag conda and pip packages with a prefix
     # If they were previously None, they'll show up as ie '[conda] Could not collect'
-    if mutable_dict['pip_packages']:
-        mutable_dict['pip_packages'] = prepend(
-            mutable_dict['pip_packages'], '[{}] '.format(envinfo.pip_version))
-    if mutable_dict['conda_packages']:
-        mutable_dict['conda_packages'] = prepend(
-            mutable_dict['conda_packages'], '[conda] ')
-    mutable_dict['cpu_info'] = envinfo.cpu_info
-    mutable_dict['npu_info'] = envinfo.npu_info
-    mutable_dict['cann_info'] = envinfo.cann_info
+    if mutable_dict["pip_packages"]:
+        mutable_dict["pip_packages"] = prepend(mutable_dict["pip_packages"], "[{}] ".format(envinfo.pip_version))
+    if mutable_dict["conda_packages"]:
+        mutable_dict["conda_packages"] = prepend(mutable_dict["conda_packages"], "[conda] ")
+    mutable_dict["cpu_info"] = envinfo.cpu_info
+    mutable_dict["npu_info"] = envinfo.npu_info
+    mutable_dict["cann_info"] = envinfo.cann_info
     return env_info_fmt.format(**mutable_dict)
 
 
@@ -468,22 +456,19 @@ def main():
     output = get_pretty_env_info()
     print(output)
 
-    if TORCH_AVAILABLE and hasattr(torch, 'utils') and hasattr(
-            torch.utils, '_crash_handler'):
+    if TORCH_AVAILABLE and hasattr(torch, "utils") and hasattr(torch.utils, "_crash_handler"):
         minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR
         if sys.platform == "linux" and os.path.exists(minidump_dir):
-            dumps = [
-                os.path.join(minidump_dir, dump)
-                for dump in os.listdir(minidump_dir)
-            ]
+            dumps = [os.path.join(minidump_dir, dump) for dump in os.listdir(minidump_dir)]
             latest = max(dumps, key=os.path.getctime)
             ctime = os.path.getctime(latest)
-            creation_time = datetime.datetime.fromtimestamp(ctime).strftime(
-                '%Y-%m-%d %H:%M:%S')
-            msg = "\n*** Detected a minidump at {} created on {}, ".format(latest, creation_time) + \
-                  "if this is related to your bug please include it when you file a report ***"
+            creation_time = datetime.datetime.fromtimestamp(ctime).strftime("%Y-%m-%d %H:%M:%S")
+            msg = (
+                "\n*** Detected a minidump at {} created on {}, ".format(latest, creation_time)
+                + "if this is related to your bug please include it when you file a report ***"
+            )
             print(msg, file=sys.stderr)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 529815ff..3f774e65 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -31,12 +31,12 @@ import os
 
 # -- Project information -----------------------------------------------------
 
-project = 'vllm-ascend'
-copyright = '2025, vllm-ascend team'
-author = 'the vllm-ascend team'
+project = "vllm-ascend"
+copyright = "2025, vllm-ascend team"
+author = "the vllm-ascend team"
 
 # The full version, including alpha/beta/rc tags
-release = ''
+release = ""
 
 # -- General configuration ---------------------------------------------------
 
@@ -65,46 +65,46 @@ myst_substitutions = {
     # the branch of vllm, used in vllm clone
     # - main branch: 'main'
     # - vX.Y.Z branch: 'vX.Y.Z'
-    'vllm_version': 'v0.13.0',
+    "vllm_version": "v0.13.0",
     # the branch of vllm-ascend, used in vllm-ascend clone and image tag
     # - main branch: 'main'
     # - vX.Y.Z branch: latest vllm-ascend release tag
-    'vllm_ascend_version': 'v0.13.0rc1',
+    "vllm_ascend_version": "v0.13.0rc1",
     # the newest release version of vllm-ascend and matched vLLM, used in pip install.
     # This value should be updated when cut down release.
-    'pip_vllm_ascend_version': "0.13.0rc1",
-    'pip_vllm_version': "0.13.0",
+    "pip_vllm_ascend_version": "0.13.0rc1",
+    "pip_vllm_version": "0.13.0",
     # CANN image tag
-    'cann_image_tag': "8.3.rc2-910b-ubuntu22.04-py3.11",
+    "cann_image_tag": "8.3.rc2-910b-ubuntu22.04-py3.11",
     # vllm version in ci
-    'ci_vllm_version': 'v0.13.0',
+    "ci_vllm_version": "v0.13.0",
 }
 
 # For cross-file header anchors
 myst_heading_anchors = 5
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
 #
 # This is also used if you do content translation via gettext catalogs.
 # Usually you set "language" from the command line for these cases.
-locale_dirs = ['locale/']   
-gettext_compact = False   
+locale_dirs = ["locale/"]
+gettext_compact = False
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
 exclude_patterns = [
-    '_build',
-    'Thumbs.db',
-    '.DS_Store',
-    '.venv',
-    'README.md',
-    'user_guide/release.template.md',
+    "_build",
+    "Thumbs.db",
+    ".DS_Store",
+    ".venv",
+    "README.md",
+    "user_guide/release.template.md",
     # TODO(yikun): Remove this after zh supported
-    '**/*.zh.md'
+    "**/*.zh.md",
 ]
 
 # -- Options for HTML output -------------------------------------------------
@@ -113,24 +113,23 @@ exclude_patterns = [
 # a list of builtin themes.
 #
 html_title = project
-html_theme = 'sphinx_book_theme'
-html_logo = 'logos/vllm-ascend-logo-text-light.png'
+html_theme = "sphinx_book_theme"
+html_logo = "logos/vllm-ascend-logo-text-light.png"
 html_theme_options = {
-    'path_to_docs': 'docs/source',
-    'repository_url': 'https://github.com/vllm-project/vllm-ascend',
-    'use_repository_button': True,
-    'use_edit_page_button': True,
+    "path_to_docs": "docs/source",
+    "repository_url": "https://github.com/vllm-project/vllm-ascend",
+    "use_repository_button": True,
+    "use_edit_page_button": True,
 }
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 # html_static_path = ['_static']
 
-READTHEDOCS_VERSION_TYPE = os.environ.get('READTHEDOCS_VERSION_TYPE')
+READTHEDOCS_VERSION_TYPE = os.environ.get("READTHEDOCS_VERSION_TYPE")
 if READTHEDOCS_VERSION_TYPE == "tag":
     # remove the warning banner if the version is a tagged release
-    header_file = os.path.join(os.path.dirname(__file__),
-                               "_templates/sections/header.html")
+    header_file = os.path.join(os.path.dirname(__file__), "_templates/sections/header.html")
     # The file might be removed already if the build is triggered multiple times
     # (readthedocs build both HTML and PDF versions separately)
     if os.path.exists(header_file):
diff --git a/pyproject.toml b/pyproject.toml
index f2e122a2..df08c191 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,3 +42,51 @@ plugins.md033.enabled = false # inline-html
 plugins.md046.enabled = false # code-block-style
 plugins.md024.allow_different_nesting = true # no-duplicate-headers
 plugins.md029.enabled = false # ol-prefix
+
+[tool.ruff]
+# TODO: according to PEP8, there should be 80 characters per line
+line-length = 120
+# Folder to be modified
+exclude = [
+    "examples/**",
+    "tests/**",
+    "vllm_ascend/**",
+]
+
+[tool.ruff.lint]
+select = [
+    # pycodestyle
+    "E",
+    # Pyflakes
+    "F",
+    # pyupgrade
+    "UP",
+    # flake8-bugbear
+    "B",
+    # flake8-simplify
+    "SIM",
+    # isort
+    "I",
+    # flake8-logging-format
+    "G",
+]
+ignore = [
+    # star imports
+    "F405", "F403",
+    # lambda expression assignment
+    "E731",
+    # zip without `strict=`
+    "B905",
+    # Loop control variable not used within loop body
+    "B007",
+    # f-string format
+    "UP032",
+    # TODO: FIE ME
+    "G004",
+    "B904",
+    "SIM108",
+    "SIM102"
+]
+
+[tool.ruff.format]
+docstring-code-format = true
diff --git a/setup.py b/setup.py
index a38d9563..3449282e 100644
--- a/setup.py
+++ b/setup.py
@@ -23,7 +23,6 @@ import os
 import subprocess
 import sys
 from sysconfig import get_paths
-from typing import Dict, List
 
 from setuptools import Command, Extension, find_packages, setup
 from setuptools.command.build_ext import build_ext
@@ -45,15 +44,13 @@ ROOT_DIR = os.path.dirname(__file__)
 logger = logging.getLogger(__name__)
 
 
-def check_or_set_default_env(cmake_args,
-                             env_name,
-                             env_variable,
-                             default_path=""):
+def check_or_set_default_env(cmake_args, env_name, env_variable, default_path=""):
     if env_variable is None:
         logging.warning(
             f"No {env_name} found in your environment, pleause try to set {env_name} "
             "if you customize the installation path of this library, otherwise default "
-            "path will be adapted during build this project")
+            "path will be adapted during build this project"
+        )
         logging.warning(f"Set default {env_name}: {default_path}")
         env_variable = default_path
     else:
@@ -65,25 +62,27 @@ def check_or_set_default_env(cmake_args,
     return cmake_args
 
 
-def get_value_from_lines(lines: List[str], key: str) -> str:
+def get_value_from_lines(lines: list[str], key: str) -> str:
     for line in lines:
-        line = ' '.join(line.split())
+        line = " ".join(line.split())
         if key in line:
-            return line.split(':')[-1].strip()
+            return line.split(":")[-1].strip()
     return ""
 
 
 def get_chip_type() -> str:
     try:
-        npu_info_lines = subprocess.check_output(
-            ['npu-smi', 'info', '-l']).decode().strip().split('\n')
-        npu_id = int(get_value_from_lines(npu_info_lines, 'NPU ID'))
-        chip_info_lines = subprocess.check_output(
-            ['npu-smi', 'info', '-t', 'board', '-i',
-             str(npu_id), '-c', '0']).decode().strip().split('\n')
-        chip_name = get_value_from_lines(chip_info_lines, 'Chip Name')
-        chip_type = get_value_from_lines(chip_info_lines, 'Chip Type')
-        npu_name = get_value_from_lines(chip_info_lines, 'NPU Name')
+        npu_info_lines = subprocess.check_output(["npu-smi", "info", "-l"]).decode().strip().split("\n")
+        npu_id = int(get_value_from_lines(npu_info_lines, "NPU ID"))
+        chip_info_lines = (
+            subprocess.check_output(["npu-smi", "info", "-t", "board", "-i", str(npu_id), "-c", "0"])
+            .decode()
+            .strip()
+            .split("\n")
+        )
+        chip_name = get_value_from_lines(chip_info_lines, "Chip Name")
+        chip_type = get_value_from_lines(chip_info_lines, "Chip Type")
+        npu_name = get_value_from_lines(chip_info_lines, "NPU Name")
 
         if "310" in chip_name:
             # 310P case
@@ -97,12 +96,10 @@ def get_chip_type() -> str:
             else:
                 # A3 case
                 assert npu_name
-                return (chip_name + '_' + npu_name).lower()
+                return (chip_name + "_" + npu_name).lower()
         else:
             # TODO(zzzzwwjj): Currently, A5's chip name has not determined yet.
-            raise ValueError(
-                f"Unable to recognize chip name: {chip_name}, please manually set env SOC_VERSION"
-            )
+            raise ValueError(f"Unable to recognize chip name: {chip_name}, please manually set env SOC_VERSION")
     except subprocess.CalledProcessError as e:
         raise RuntimeError(f"Get chip info failed: {e}")
     except FileNotFoundError:
@@ -112,8 +109,7 @@ def get_chip_type() -> str:
         return ""
 
 
-envs = load_module_from_path("envs",
-                             os.path.join(ROOT_DIR, "vllm_ascend", "envs.py"))
+envs = load_module_from_path("envs", os.path.join(ROOT_DIR, "vllm_ascend", "envs.py"))
 
 soc_version = get_chip_type()
 
@@ -126,10 +122,8 @@ if not envs.SOC_VERSION:
         )
     envs.SOC_VERSION = soc_version
 else:
-    if soc_version and envs.SOC_VERSION != soc_version:
-        logging.warning(
-            f"env SOC_VERSION: {envs.SOC_VERSION} is not equal to soc_version from npu-smi: {soc_version}"
-        )
+    if soc_version and soc_version != envs.SOC_VERSION:
+        logging.warning(f"env SOC_VERSION: {envs.SOC_VERSION} is not equal to soc_version from npu-smi: {soc_version}")
 
 
 def gen_build_info():
@@ -167,30 +161,24 @@ def gen_build_info():
 
     package_dir = os.path.join(ROOT_DIR, "vllm_ascend", "_build_info.py")
     with open(package_dir, "w+") as f:
-        f.write('# Auto-generated file\n')
+        f.write("# Auto-generated file\n")
         f.write(f"__device_type__ = '{device_type}'\n")
     logging.info(f"Generated _build_info.py with SOC version: {soc_version}")
 
 
 class CMakeExtension(Extension):
-
-    def __init__(self,
-                 name: str,
-                 cmake_lists_dir: str = ".",
-                 **kwargs) -> None:
+    def __init__(self, name: str, cmake_lists_dir: str = ".", **kwargs) -> None:
         super().__init__(name, sources=[], py_limited_api=False, **kwargs)
         self.cmake_lists_dir = os.path.abspath(cmake_lists_dir)
 
 
 class custom_develop(develop):
-
     def run(self):
         gen_build_info()
         super().run()
 
 
 class custom_build_info(build_py):
-
     def run(self):
         gen_build_info()
         super().run()
@@ -209,8 +197,7 @@ class build_and_install_aclnn(Command):
     def run(self):
         try:
             print("Running bash build_aclnn.sh ...")
-            subprocess.check_call(
-                ["bash", "csrc/build_aclnn.sh", ROOT_DIR, envs.SOC_VERSION])
+            subprocess.check_call(["bash", "csrc/build_aclnn.sh", ROOT_DIR, envs.SOC_VERSION])
             print("buid_aclnn.sh executed successfully!")
         except subprocess.CalledProcessError as e:
             print(f"Error running build_aclnn.sh: {e}")
@@ -219,7 +206,7 @@ class build_and_install_aclnn(Command):
 
 class cmake_build_ext(build_ext):
     # A dict of extension directories that have been configured.
-    did_config: Dict[str, bool] = {}
+    did_config: dict[str, bool] = {}
 
     #
     # Determine number of compilation jobs
@@ -254,9 +241,9 @@ class cmake_build_ext(build_ext):
         # Default use release mode to compile the csrc code
         # Turbo now support compiled with Release, Debug and RelWithDebugInfo
         if envs.CMAKE_BUILD_TYPE is None or envs.CMAKE_BUILD_TYPE not in [
-                "Debug",
-                "Release",
-                "RelWithDebugInfo",
+            "Debug",
+            "Release",
+            "RelWithDebugInfo",
         ]:
             envs.CMAKE_BUILD_TYPE = "Release"
         cmake_args += [f"-DCMAKE_BUILD_TYPE={envs.CMAKE_BUILD_TYPE}"]
@@ -278,20 +265,18 @@ class cmake_build_ext(build_ext):
         )
 
         # find PYTHON_EXECUTABLE
-        check_or_set_default_env(cmake_args, "PYTHON_EXECUTABLE",
-                                 sys.executable)
+        check_or_set_default_env(cmake_args, "PYTHON_EXECUTABLE", sys.executable)
 
         # find PYTHON_INCLUDE_PATH
-        check_or_set_default_env(cmake_args, "PYTHON_INCLUDE_PATH",
-                                 get_paths()["include"])
+        check_or_set_default_env(cmake_args, "PYTHON_INCLUDE_PATH", get_paths()["include"])
 
         # ccache and ninja can not be applied at ascendc kernels now
 
         try:
             # if pybind11 is installed via pip
-            pybind11_cmake_path = (subprocess.check_output(
-                [python_executable, "-m", "pybind11",
-                 "--cmakedir"]).decode().strip())
+            pybind11_cmake_path = (
+                subprocess.check_output([python_executable, "-m", "pybind11", "--cmakedir"]).decode().strip()
+            )
         except subprocess.CalledProcessError as e:
             # else specify pybind11 path installed from source code on CI container
             raise RuntimeError(f"CMake configuration failed: {e}")
@@ -309,8 +294,7 @@ class cmake_build_ext(build_ext):
             "910c": "ascend910_9392",
             "310p": "ascend310p1",
         }
-        CANN_SOC_VERSION = soc_version_map.get(envs.SOC_VERSION,
-                                               envs.SOC_VERSION)
+        CANN_SOC_VERSION = soc_version_map.get(envs.SOC_VERSION, envs.SOC_VERSION)
         cmake_args += [f"-DSOC_VERSION={CANN_SOC_VERSION}"]
 
         # Override the base directory for FetchContent downloads to $ROOT/.deps
@@ -323,8 +307,7 @@ class cmake_build_ext(build_ext):
 
         torch_npu_command = "python3 -m pip show torch-npu | grep '^Location:' | awk '{print $2}'"
         try:
-            torch_npu_path = subprocess.check_output(
-                torch_npu_command, shell=True).decode().strip()
+            torch_npu_path = subprocess.check_output(torch_npu_command, shell=True).decode().strip()
             torch_npu_path += "/torch_npu"
         except subprocess.CalledProcessError as e:
             raise RuntimeError(f"Retrieve torch version version failed: {e}")
@@ -399,22 +382,21 @@ class cmake_build_ext(build_ext):
         # copy back to build folder for editable build
         if isinstance(self.distribution.get_command_obj("develop"), develop):
             import shutil
+
             for root, _, files in os.walk(self.build_temp):
                 for file in files:
                     if file.endswith(".so"):
                         src_path = os.path.join(root, file)
-                        dst_path = os.path.join(self.build_lib, "vllm_ascend",
-                                                file)
+                        dst_path = os.path.join(self.build_lib, "vllm_ascend", file)
                         shutil.copy(src_path, dst_path)
                         print(f"Copy: {src_path} -> {dst_path}")
 
         # copy back _cann_ops_custom directory
-        src_cann_ops_custom = os.path.join(ROOT_DIR, "vllm_ascend",
-                                           "_cann_ops_custom")
-        dst_cann_ops_custom = os.path.join(self.build_lib, "vllm_ascend",
-                                           "_cann_ops_custom")
+        src_cann_ops_custom = os.path.join(ROOT_DIR, "vllm_ascend", "_cann_ops_custom")
+        dst_cann_ops_custom = os.path.join(self.build_lib, "vllm_ascend", "_cann_ops_custom")
         if os.path.exists(src_cann_ops_custom):
             import shutil
+
             if os.path.exists(dst_cann_ops_custom):
                 shutil.rmtree(dst_cann_ops_custom)
             shutil.copytree(src_cann_ops_custom, dst_cann_ops_custom)
@@ -428,7 +410,6 @@ class cmake_build_ext(build_ext):
 
 
 class custom_install(install):
-
     def run(self):
         self.run_command("build_ext")
         install.run(self)
@@ -459,10 +440,10 @@ def read_readme() -> str:
         return ""
 
 
-def get_requirements() -> List[str]:
+def get_requirements() -> list[str]:
     """Get Python package dependencies from requirements.txt."""
 
-    def _read_requirements(filename: str) -> List[str]:
+    def _read_requirements(filename: str) -> list[str]:
         with open(get_path(filename)) as f:
             requirements = f.read().strip().split("\n")
         resolved_requirements = []
@@ -487,7 +468,7 @@ cmdclass = {
     "build_py": custom_build_info,
     "build_aclnn": build_and_install_aclnn,
     "build_ext": cmake_build_ext,
-    "install": custom_install
+    "install": custom_install,
 }
 
 setup(
@@ -526,7 +507,7 @@ setup(
         "vllm.general_plugins": [
             "ascend_kv_connector = vllm_ascend:register_connector",
             "ascend_model_loader = vllm_ascend:register_model_loader",
-            "ascend_service_profiling = vllm_ascend:register_service_profiling"
+            "ascend_service_profiling = vllm_ascend:register_service_profiling",
         ],
     },
 )
diff --git a/tools/aisbench.py b/tools/aisbench.py
index a4ddb0ad..dc22dded 100644
--- a/tools/aisbench.py
+++ b/tools/aisbench.py
@@ -29,60 +29,47 @@ import pandas as pd
 from modelscope import snapshot_download  # type: ignore
 
 BENCHMARK_HOME = os.getenv("BENCHMARK_HOME", os.path.abspath("./benchmark"))
-DATASET_CONF_DIR = os.path.join(BENCHMARK_HOME, "ais_bench", "benchmark",
-                                "configs", "datasets")
-REQUEST_CONF_DIR = os.path.join(BENCHMARK_HOME, "ais_bench", "benchmark",
-                                "configs", "models", "vllm_api")
+DATASET_CONF_DIR = os.path.join(BENCHMARK_HOME, "ais_bench", "benchmark", "configs", "datasets")
+REQUEST_CONF_DIR = os.path.join(BENCHMARK_HOME, "ais_bench", "benchmark", "configs", "models", "vllm_api")
 DATASET_DIR = os.path.join(BENCHMARK_HOME, "ais_bench", "datasets")
 
 
 class AisbenchRunner:
-    RESULT_MSG = {
-        "performance": "Performance Result files locate in ",
-        "accuracy": "write csv to "
-    }
-    DATASET_RENAME = {
-        "aime2024": "aime",
-        "gsm8k-lite": "gsm8k",
-        "textvqa-lite": "textvqa"
-    }
+    RESULT_MSG = {"performance": "Performance Result files locate in ", "accuracy": "write csv to "}
+    DATASET_RENAME = {"aime2024": "aime", "gsm8k-lite": "gsm8k", "textvqa-lite": "textvqa"}
 
     def _run_aisbench_task(self):
-        dataset_conf = self.dataset_conf.split('/')[-1]
+        dataset_conf = self.dataset_conf.split("/")[-1]
         if self.task_type == "accuracy":
-            aisbench_cmd = [
-                'ais_bench', '--models', f'{self.request_conf}_custom',
-                '--datasets', f'{dataset_conf}'
-            ]
+            aisbench_cmd = ["ais_bench", "--models", f"{self.request_conf}_custom", "--datasets", f"{dataset_conf}"]
         if self.task_type == "performance":
             aisbench_cmd = [
-                'ais_bench', '--models', f'{self.request_conf}_custom',
-                '--datasets', f'{dataset_conf}_custom', '--mode', 'perf'
+                "ais_bench",
+                "--models",
+                f"{self.request_conf}_custom",
+                "--datasets",
+                f"{dataset_conf}_custom",
+                "--mode",
+                "perf",
             ]
             if self.num_prompts:
-                aisbench_cmd.extend(['--num-prompts', str(self.num_prompts)])
+                aisbench_cmd.extend(["--num-prompts", str(self.num_prompts)])
         print(f"running aisbench cmd: {' '.join(aisbench_cmd)}")
-        self.proc: subprocess.Popen = subprocess.Popen(aisbench_cmd,
-                                                       stdout=subprocess.PIPE,
-                                                       stderr=subprocess.PIPE,
-                                                       text=True)
+        self.proc: subprocess.Popen = subprocess.Popen(
+            aisbench_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
+        )
 
-    def __init__(self,
-                 model: str,
-                 port: int,
-                 aisbench_config: dict,
-                 host_ip: str = "localhost",
-                 verify=True):
+    def __init__(self, model: str, port: int, aisbench_config: dict, host_ip: str = "localhost", verify=True):
         self.model = model
         self.dataset_path = aisbench_config.get("dataset_path_local")
         if not self.dataset_path:
-            self.dataset_path = maybe_download_from_modelscope(
-                aisbench_config["dataset_path"], repo_type="dataset")
+            self.dataset_path = maybe_download_from_modelscope(aisbench_config["dataset_path"], repo_type="dataset")
         self.model_path = aisbench_config.get("model_path")
         if not self.model_path:
             self.model_path = maybe_download_from_modelscope(model)
-        assert self.dataset_path is not None and self.model_path is not None, \
+        assert self.dataset_path is not None and self.model_path is not None, (
             f"Failed to download dataset or model: dataset={self.dataset_path}, model={self.model_path}"
+        )
         self.port = port
         self.host_ip = host_ip
         self.task_type = aisbench_config["case_type"]
@@ -92,8 +79,7 @@ class AisbenchRunner:
         self.max_out_len = aisbench_config["max_out_len"]
         self.batch_size = aisbench_config["batch_size"]
         self.request_rate = aisbench_config.get("request_rate", 0)
-        self.trust_remote_code = aisbench_config.get("trust_remote_code",
-                                                     False)
+        self.trust_remote_code = aisbench_config.get("trust_remote_code", False)
         self.temperature = aisbench_config.get("temperature")
         self.top_k = aisbench_config.get("top_k")
         self.top_p = aisbench_config.get("top_p")
@@ -122,52 +108,38 @@ class AisbenchRunner:
             command = ["cp", "-r", self.dataset_path, dst_dir]
             subprocess.call(command)
         if self.task_type == "performance":
-            conf_path = os.path.join(DATASET_CONF_DIR,
-                                     f'{self.dataset_conf}.py')
+            conf_path = os.path.join(DATASET_CONF_DIR, f"{self.dataset_conf}.py")
             if self.dataset_conf.startswith("textvqa"):
-                self.dataset_path = os.path.join(self.dataset_path,
-                                                 "textvqa_val.jsonl")
-            with open(conf_path, 'r', encoding='utf-8') as f:
+                self.dataset_path = os.path.join(self.dataset_path, "textvqa_val.jsonl")
+            with open(conf_path, encoding="utf-8") as f:
                 content = f.read()
-            content = re.sub(r'path=.*', f'path="{self.dataset_path}",',
-                             content)
-            conf_path_new = os.path.join(DATASET_CONF_DIR,
-                                         f'{self.dataset_conf}_custom.py')
-            with open(conf_path_new, 'w', encoding='utf-8') as f:
+            content = re.sub(r"path=.*", f'path="{self.dataset_path}",', content)
+            conf_path_new = os.path.join(DATASET_CONF_DIR, f"{self.dataset_conf}_custom.py")
+            with open(conf_path_new, "w", encoding="utf-8") as f:
                 f.write(content)
 
     def _init_request_conf(self):
-        conf_path = os.path.join(REQUEST_CONF_DIR, f'{self.request_conf}.py')
-        with open(conf_path, 'r', encoding='utf-8') as f:
+        conf_path = os.path.join(REQUEST_CONF_DIR, f"{self.request_conf}.py")
+        with open(conf_path, encoding="utf-8") as f:
             content = f.read()
-        content = re.sub(r'model=.*', f'model="{self.model}",', content)
-        content = re.sub(r'host_port.*', f'host_port = {self.port},', content)
-        content = re.sub(r'host_ip.*', f'host_ip = "{self.host_ip}",', content)
-        content = re.sub(r'max_out_len.*',
-                         f'max_out_len = {self.max_out_len},', content)
-        content = re.sub(r'batch_size.*', f'batch_size = {self.batch_size},',
-                         content)
-        content = re.sub(r'trust_remote_code=.*',
-                         f'trust_remote_code={self.trust_remote_code},',
-                         content)
+        content = re.sub(r"model=.*", f'model="{self.model}",', content)
+        content = re.sub(r"host_port.*", f"host_port = {self.port},", content)
+        content = re.sub(r"host_ip.*", f'host_ip = "{self.host_ip}",', content)
+        content = re.sub(r"max_out_len.*", f"max_out_len = {self.max_out_len},", content)
+        content = re.sub(r"batch_size.*", f"batch_size = {self.batch_size},", content)
+        content = re.sub(r"trust_remote_code=.*", f"trust_remote_code={self.trust_remote_code},", content)
         content = content.replace("top_k", "#top_k")
         content = content.replace("seed", "#seed")
         content = content.replace("repetition_penalty", "#repetition_penalty")
         if self.task_type == "performance":
-            content = re.sub(r'path=.*', f'path="{self.model_path}",', content)
-            content = re.sub(r'request_rate.*',
-                             f'request_rate = {self.request_rate},', content)
-            content = re.sub(
-                r"temperature.*",
-                "temperature = 0,\n            ignore_eos = True,", content)
+            content = re.sub(r"path=.*", f'path="{self.model_path}",', content)
+            content = re.sub(r"request_rate.*", f"request_rate = {self.request_rate},", content)
+            content = re.sub(r"temperature.*", "temperature = 0,\n            ignore_eos = True,", content)
             content = content.replace("top_p", "#top_p")
         if self.task_type == "accuracy":
-            content = re.sub(
-                r"temperature.*",
-                "temperature = 0.6,\n            ignore_eos = False,", content)
+            content = re.sub(r"temperature.*", "temperature = 0.6,\n            ignore_eos = False,", content)
         if self.temperature:
-            content = re.sub(r"temperature.*",
-                             f"temperature = {self.temperature},", content)
+            content = re.sub(r"temperature.*", f"temperature = {self.temperature},", content)
         if self.top_p:
             content = re.sub(r"#?top_p.*", f"top_p = {self.top_p},", content)
         if self.top_k:
@@ -175,12 +147,9 @@ class AisbenchRunner:
         if self.seed:
             content = re.sub(r"#seed.*", f"seed = {self.seed},", content)
         if self.repetition_penalty:
-            content = re.sub(
-                r"#repetition_penalty.*",
-                f"repetition_penalty = {self.repetition_penalty},", content)
-        conf_path_new = os.path.join(REQUEST_CONF_DIR,
-                                     f'{self.request_conf}_custom.py')
-        with open(conf_path_new, 'w', encoding='utf-8') as f:
+            content = re.sub(r"#repetition_penalty.*", f"repetition_penalty = {self.repetition_penalty},", content)
+        conf_path_new = os.path.join(REQUEST_CONF_DIR, f"{self.request_conf}_custom.py")
+        with open(conf_path_new, "w", encoding="utf-8") as f:
             f.write(content)
         print(f"The request config is\n {content}")
 
@@ -200,8 +169,7 @@ class AisbenchRunner:
             line = self.proc.stdout.readline().strip()
             print(line)
             if "Current exp folder: " in line:
-                self.exp_folder = re.search(r'Current exp folder: (.*)',
-                                            line).group(1)
+                self.exp_folder = re.search(r"Current exp folder: (.*)", line).group(1)
                 return
             if "ERROR" in line:
                 error_msg = f"Some errors happened to Aisbench runtime, the first error is {line}"
@@ -221,53 +189,48 @@ class AisbenchRunner:
                 raise RuntimeError(error_msg) from None
 
     def _get_result_performance(self):
-        result_dir = re.search(r'Performance Result files locate in (.*)',
-                               self.result_line).group(1)[:-1]
-        dataset_type = self.dataset_conf.split('/')[0]
-        result_csv_file = os.path.join(result_dir,
-                                       f"{dataset_type}dataset.csv")
-        result_json_file = os.path.join(result_dir,
-                                        f"{dataset_type}dataset.json")
+        result_dir = re.search(r"Performance Result files locate in (.*)", self.result_line).group(1)[:-1]
+        dataset_type = self.dataset_conf.split("/")[0]
+        result_csv_file = os.path.join(result_dir, f"{dataset_type}dataset.csv")
+        result_json_file = os.path.join(result_dir, f"{dataset_type}dataset.json")
         self.result_csv = pd.read_csv(result_csv_file, index_col=0)
         print("Getting performance results from file: ", result_json_file)
-        with open(result_json_file, 'r', encoding='utf-8') as f:
+        with open(result_json_file, encoding="utf-8") as f:
             self.result_json = json.load(f)
         self.result = [self.result_csv, self.result_json]
 
     def _get_result_accuracy(self):
-        acc_file = re.search(r'write csv to (.*)', self.result_line).group(1)
+        acc_file = re.search(r"write csv to (.*)", self.result_line).group(1)
         df = pd.read_csv(acc_file)
         self.result = float(df.loc[0][-1])
 
     def _performance_verify(self):
         self._get_result_performance()
-        output_throughput = self.result_json["Output Token Throughput"][
-            "total"].replace("token/s", "")
-        assert float(
-            output_throughput
-        ) >= self.threshold * self.baseline, f"Performance verification failed. The current Output Token Throughput is {output_throughput} token/s, which is not greater than or equal to {self.threshold} * baseline {self.baseline}."
+        output_throughput = self.result_json["Output Token Throughput"]["total"].replace("token/s", "")
+        assert float(output_throughput) >= self.threshold * self.baseline, (
+            "Performance verification failed. "
+            f"The current Output Token Throughput is {output_throughput} token/s, "
+            f"which is not greater than or equal to {self.threshold} * baseline {self.baseline}."
+        )
 
     def _accuracy_verify(self):
         self._get_result_accuracy()
         acc_value = self.result
-        assert self.baseline - self.threshold <= acc_value <= self.baseline + self.threshold, f"Accuracy verification failed. The accuracy of {self.dataset_path} is {acc_value}, which is not within {self.threshold} relative to baseline {self.baseline}."
+        assert self.baseline - self.threshold <= acc_value <= self.baseline + self.threshold, (
+            "Accuracy verification failed. "
+            f"The accuracy of {self.dataset_path} is {acc_value}, "
+            f"which is not within {self.threshold} relative to baseline {self.baseline}."
+        )
 
 
-def run_aisbench_cases(model,
-                       port,
-                       aisbench_cases,
-                       server_args="",
-                       host_ip="localhost"):
+def run_aisbench_cases(model, port, aisbench_cases, server_args="", host_ip="localhost"):
     aisbench_results = []
     aisbench_errors = []
     for aisbench_case in aisbench_cases:
         if not aisbench_case:
             continue
         try:
-            with AisbenchRunner(model=model,
-                                port=port,
-                                host_ip=host_ip,
-                                aisbench_config=aisbench_case) as aisbench:
+            with AisbenchRunner(model=model, port=port, host_ip=host_ip, aisbench_config=aisbench_case) as aisbench:
                 aisbench_results.append(aisbench.result)
         except Exception as e:
             aisbench_results.append("")
@@ -299,8 +262,7 @@ def get_lock(model_name_or_path: str | Path, cache_dir: str | None = None):
     # add hash to avoid conflict with old users' lock files
     lock_file_name = hash_name + model_name + ".lock"
     # mode 0o666 is required for the filelock to be shared across users
-    lock = filelock.FileLock(os.path.join(lock_dir, lock_file_name),
-                             mode=0o666)
+    lock = filelock.FileLock(os.path.join(lock_dir, lock_file_name), mode=0o666)
     return lock
 
 
diff --git a/tools/check_python_src_init.py b/tools/check_python_src_init.py
index 2c7e0a91..ab0fa21f 100644
--- a/tools/check_python_src_init.py
+++ b/tools/check_python_src_init.py
@@ -36,8 +36,8 @@ def check_init_file_in_package(directory):
         return False
 
     # If any .py file exists, we expect an __init__.py
-    if any(f.endswith('.py') for f in files):
-        init_file = os.path.join(directory, '__init__.py')
+    if any(f.endswith(".py") for f in files):
+        init_file = os.path.join(directory, "__init__.py")
         if not os.path.isfile(init_file):
             return False
     return True
@@ -62,9 +62,7 @@ def main():
         all_missing.update(missing)
 
     if all_missing:
-        print(
-            "❌ Missing '__init__.py' files in the following Python package directories:"
-        )
+        print("❌ Missing '__init__.py' files in the following Python package directories:")
         for pkg in sorted(all_missing):
             print(f" - {pkg}")
         sys.exit(1)
diff --git a/tools/enforce_regex_import.py b/tools/enforce_regex_import.py
index 92e6f79e..896e1ad0 100644
--- a/tools/enforce_regex_import.py
+++ b/tools/enforce_regex_import.py
@@ -24,39 +24,33 @@ from pathlib import Path
 
 import regex as re
 
-FORBIDDEN_PATTERNS = re.compile(
-    r'^\s*(?:import\s+re(?:$|\s|,)|from\s+re\s+import)')
+FORBIDDEN_PATTERNS = re.compile(r"^\s*(?:import\s+re(?:$|\s|,)|from\s+re\s+import)")
 ALLOWED_PATTERNS = [
-    re.compile(r'^\s*import\s+regex\s+as\s+re\s*$'),
-    re.compile(r'^\s*import\s+regex\s*$'),
+    re.compile(r"^\s*import\s+regex\s+as\s+re\s*$"),
+    re.compile(r"^\s*import\s+regex\s*$"),
 ]
 
 
 def get_staged_python_files() -> list[str]:
     try:
         result = subprocess.run(
-            ['git', 'diff', '--cached', '--name-only', '--diff-filter=AM'],
-            capture_output=True,
-            text=True,
-            check=True)
-        files = result.stdout.strip().split(
-            '\n') if result.stdout.strip() else []
-        return [f for f in files if f.endswith('.py')]
+            ["git", "diff", "--cached", "--name-only", "--diff-filter=AM"], capture_output=True, text=True, check=True
+        )
+        files = result.stdout.strip().split("\n") if result.stdout.strip() else []
+        return [f for f in files if f.endswith(".py")]
     except subprocess.CalledProcessError:
         return []
 
 
 def is_forbidden_import(line: str) -> bool:
     line = line.strip()
-    return bool(
-        FORBIDDEN_PATTERNS.match(line)
-        and not any(pattern.match(line) for pattern in ALLOWED_PATTERNS))
+    return bool(FORBIDDEN_PATTERNS.match(line) and not any(pattern.match(line) for pattern in ALLOWED_PATTERNS))
 
 
 def check_file(filepath: str) -> list[tuple[int, str]]:
     violations = []
     try:
-        with open(filepath, encoding='utf-8') as f:
+        with open(filepath, encoding="utf-8") as f:
             for line_num, line in enumerate(f, 1):
                 if is_forbidden_import(line):
                     violations.append((line_num, line.strip()))
@@ -89,9 +83,7 @@ def main() -> int:
     if total_violations > 0:
         print(f"\n💡 Found {total_violations} violation(s).")
         print("❌ Please replace 'import re' with 'import regex as re'")
-        print(
-            "   Also replace 'from re import ...' with 'from regex import ...'"
-        )  # noqa: E501
+        print("   Also replace 'from re import ...' with 'from regex import ...'")  # noqa: E501
         print("✅ Allowed imports:")
         print("   - import regex as re")
         print("   - import regex")  # noqa: E501
diff --git a/tools/format_contributors.py b/tools/format_contributors.py
index 84dc4d19..93d5d9e1 100644
--- a/tools/format_contributors.py
+++ b/tools/format_contributors.py
@@ -20,9 +20,7 @@ import re
 import sys
 from datetime import datetime
 
-p = re.compile(
-    r'@(?P<user>[A-Za-z0-9-_]+)[^\`]*\`(?P<sha>[0-9a-fA-F]+)\`\s*[-–—]\s*(?P<date>.+)$'
-)
+p = re.compile(r"@(?P<user>[A-Za-z0-9-_]+)[^\`]*\`(?P<sha>[0-9a-fA-F]+)\`\s*[-–—]\s*(?P<date>.+)$")
 
 
 def parse_lines(lines):
@@ -34,9 +32,9 @@ def parse_lines(lines):
         m = p.search(ln)
         if not m:
             continue
-        user = m.group('user')
-        sha = m.group('sha')
-        datestr = m.group('date').strip()
+        user = m.group("user")
+        sha = m.group("sha")
+        datestr = m.group("date").strip()
         try:
             dt = datetime.fromisoformat(datestr)
         except Exception:
@@ -51,27 +49,17 @@ def parse_lines(lines):
 
 def main():
     ap = argparse.ArgumentParser(
-        description=
-        "Format and sort contributor lines by date (newest first). Outputs markdown table by default."
+        description="Format and sort contributor lines by date (newest first). Outputs markdown table by default."
     )
     ap.add_argument(
-        'file',
-        nargs='?',
-        help=
-        'input file (default stdin), output from collect_user_first_contribution.sh'
+        "file", nargs="?", help="input file (default stdin), output from collect_user_first_contribution.sh"
     )
-    ap.add_argument(
-        '--start',
-        type=int,
-        default=1,
-        help='minimum number for table (oldest row will have this number)')
-    ap.add_argument('--repo',
-                    default='vllm-project/vllm-ascend',
-                    help='repo used for commit links')
+    ap.add_argument("--start", type=int, default=1, help="minimum number for table (oldest row will have this number)")
+    ap.add_argument("--repo", default="vllm-project/vllm-ascend", help="repo used for commit links")
     args = ap.parse_args()
 
     if args.file:
-        with open(args.file, 'r', encoding='utf-8') as f:
+        with open(args.file, encoding="utf-8") as f:
             lines = f.readlines()
     else:
         lines = sys.stdin.readlines()
@@ -88,9 +76,9 @@ def main():
     for dt, user, sha, datestr in items:
         short = sha[:7]
         date_short = dt.strftime("%Y/%m/%d")
-        print(
-            f"| {n} | [@{user}](https://github.com/{user}) | {date_short} | [{short}](https://github.com/{args.repo}/commit/{sha}) |"
-        )
+        user_url = f"https://github.com/{user}"
+        commit_url = f"https://github.com/{args.repo}/commit/{sha}"
+        print(f"| {n} | [@{user}]({user_url}) | {date_short} | [{short}]({commit_url}) |")
         n -= 1
 
 
diff --git a/tools/send_mm_request.py b/tools/send_mm_request.py
index 61551200..8b3033ab 100644
--- a/tools/send_mm_request.py
+++ b/tools/send_mm_request.py
@@ -4,39 +4,30 @@ import os
 import requests
 from modelscope import snapshot_download  # type: ignore
 
-mm_dir = snapshot_download("vllm-ascend/mm_request", repo_type='dataset')
+mm_dir = snapshot_download("vllm-ascend/mm_request", repo_type="dataset")
 image_path = os.path.join(mm_dir, "test_mm2.jpg")
-with open(image_path, 'rb') as image_file:
-    image_data = base64.b64encode(image_file.read()).decode('utf-8')
+with open(image_path, "rb") as image_file:
+    image_data = base64.b64encode(image_file.read()).decode("utf-8")
 
 data = {
-    "messages": [{
-        "role":
-        "user",
-        "content": [{
-            "type": "text",
-            "text": "What is the content of this image?"
-        }, {
-            "type": "image_url",
-            "image_url": {
-                "url": f"data:image/jpeg;base64,{image_data}"
-            }
-        }]
-    }],
+    "messages": [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "What is the content of this image?"},
+                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}},
+            ],
+        }
+    ],
     "eos_token_id": [1, 106],
-    "pad_token_id":
-    0,
-    "top_k":
-    64,
-    "top_p":
-    0.95,
-    "max_tokens":
-    8192,
-    "stream":
-    False
+    "pad_token_id": 0,
+    "top_k": 64,
+    "top_p": 0.95,
+    "max_tokens": 8192,
+    "stream": False,
 }
 
-headers = {'Accept': 'application/json', 'Content-Type': 'application/json'}
+headers = {"Accept": "application/json", "Content-Type": "application/json"}
 
 
 def send_image_request(model, server):
diff --git a/tools/send_request.py b/tools/send_request.py
index faad3156..3edc12f8 100644
--- a/tools/send_request.py
+++ b/tools/send_request.py
@@ -20,10 +20,12 @@ def send_v1_completions(prompt, model, server, request_args=None):
 def send_v1_chat_completions(prompt, model, server, request_args=None):
     data: dict[str, Any] = {
         "model": model,
-        "messages": [{
-            "role": "user",
-            "content": prompt,
-        }],
+        "messages": [
+            {
+                "role": "user",
+                "content": prompt,
+            }
+        ],
     }
     if request_args:
         data.update(request_args)
diff --git a/tools/vllm_bench.py b/tools/vllm_bench.py
index b460e191..eab096a6 100644
--- a/tools/vllm_bench.py
+++ b/tools/vllm_bench.py
@@ -24,42 +24,58 @@ from .aisbench import maybe_download_from_modelscope
 
 
 class VllmbenchRunner:
-
     def _run_vllm_bench_task(self):
         vllm_bench_cmd = [
-            'vllm', 'bench', 'serve', '--backend', 'openai-chat',
-            '--trust-remote-code', '--served-model-name',
-            str(self.model_name), '--model', self.model_path, '--tokenizer',
-            self.model_path, '--metric-percentiles', '50,90,99', '--host',
-            self.host_ip, '--port',
-            str(self.port), '--save-result', '--result-filename',
-            self.result_filename, '--endpoint', '/v1/chat/completions',
-            '--ready-check-timeout-sec', '0'
+            "vllm",
+            "bench",
+            "serve",
+            "--backend",
+            "openai-chat",
+            "--trust-remote-code",
+            "--served-model-name",
+            str(self.model_name),
+            "--model",
+            self.model_path,
+            "--tokenizer",
+            self.model_path,
+            "--metric-percentiles",
+            "50,90,99",
+            "--host",
+            self.host_ip,
+            "--port",
+            str(self.port),
+            "--save-result",
+            "--result-filename",
+            self.result_filename,
+            "--endpoint",
+            "/v1/chat/completions",
+            "--ready-check-timeout-sec",
+            "0",
         ]
         self._concat_config_args(vllm_bench_cmd)
         print(f"running vllm_bench cmd: {' '.join(vllm_bench_cmd)}")
-        self.proc: subprocess.Popen = subprocess.Popen(vllm_bench_cmd,
-                                                       stdout=subprocess.PIPE,
-                                                       stderr=subprocess.PIPE,
-                                                       text=True)
+        self.proc: subprocess.Popen = subprocess.Popen(
+            vllm_bench_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
+        )
 
-    def __init__(self,
-                 model_name: str,
-                 port: int,
-                 config: dict,
-                 baseline: float,
-                 threshold: float = 0.97,
-                 model_path: str = "",
-                 host_ip: str = "localhost"):
+    def __init__(
+        self,
+        model_name: str,
+        port: int,
+        config: dict,
+        baseline: float,
+        threshold: float = 0.97,
+        model_path: str = "",
+        host_ip: str = "localhost",
+    ):
         self.model_name = model_name
         self.model_path = model_path
         if not self.model_path:
             self.model_path = maybe_download_from_modelscope(model_name)
-        assert self.model_path is not None, \
-            f"Failed to download model: model={self.model_path}"
+        assert self.model_path is not None, f"Failed to download model: model={self.model_path}"
         self.port = port
         self.host_ip = host_ip
-        curr_time = datetime.now().strftime('%Y%m%d%H%M%S')
+        curr_time = datetime.now().strftime("%Y%m%d%H%M%S")
         self.result_filename = f"result_vllm_bench_{curr_time}.json"
         self.config = config
         self.baseline = baseline
@@ -96,19 +112,14 @@ class VllmbenchRunner:
         stdout, stderr = self.proc.communicate()
 
         if self.proc.returncode != 0:
-            logging.error(
-                f"vllm bench command failed, return code: {self.proc.returncode}"
-            )
+            logging.error(f"vllm bench command failed, return code: {self.proc.returncode}")
             logging.error(f"Standard output: {stdout}")
             logging.error(f"Standard error: {stderr}")
-            raise RuntimeError(
-                f"vllm bench command execution failed: {stderr}")
+            raise RuntimeError(f"vllm bench command execution failed: {stderr}")
 
-        logging.info(
-            f"vllm bench command completed, return code: {self.proc.returncode}"
-        )
+        logging.info(f"vllm bench command completed, return code: {self.proc.returncode}")
         if stdout:
-            lines = stdout.split('\n')
+            lines = stdout.split("\n")
             last_lines = lines[-100:] if len(lines) > 100 else lines
             logging.info(f"Last {len(last_lines)} lines of standard output:")
             for line in last_lines:
@@ -119,36 +130,28 @@ class VllmbenchRunner:
     def _get_result(self):
         result_file = os.path.join(os.getcwd(), self.result_filename)
         print("Getting performance results from file: ", result_file)
-        with open(result_file, 'r', encoding='utf-8') as f:
+        with open(result_file, encoding="utf-8") as f:
             self.result = json.load(f)
 
     def _performance_verify(self):
         self._get_result()
         output_throughput = self.result["output_throughput"]
-        assert float(
-            output_throughput
-        ) >= self.baseline * self.threshold, f"Performance verification failed. The current Output Token Throughput is {output_throughput} token/s, which is not greater than or equal to {self.threshold} * baseline {self.baseline}."
+        assert float(output_throughput) >= self.baseline * self.threshold, (
+            "Performance verification failed. "
+            f"The current Output Token Throughput is {output_throughput} token/s, "
+            f"which is not greater than or equal to {self.threshold} * baseline {self.baseline}."
+        )
 
 
-def run_vllm_bench_case(model_name,
-                        port,
-                        config,
-                        baseline,
-                        threshold=0.97,
-                        model_path="",
-                        host_ip="localhost"):
+def run_vllm_bench_case(model_name, port, config, baseline, threshold=0.97, model_path="", host_ip="localhost"):
     try:
-        with VllmbenchRunner(model_name,
-                             port,
-                             config,
-                             baseline,
-                             threshold,
-                             model_path=model_path,
-                             host_ip=host_ip) as vllm_bench:
+        with VllmbenchRunner(
+            model_name, port, config, baseline, threshold, model_path=model_path, host_ip=host_ip
+        ) as vllm_bench:
             vllm_bench_result = vllm_bench.result
     except Exception as e:
         print(e)
         error_msg = f"vllm_bench run failed, reason is {e}"
         logging.error(error_msg)
-        assert False, f"vllm_bench run failed, reason is {e}"
+        raise RuntimeError(error_msg) from e
     return vllm_bench_result