diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f7dd0e1d..8778d540 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,31 +6,12 @@ default_stages: - manual # Run in CI exclude: 'examples/.*' # Exclude examples from all hooks by default repos: -- repo: https://github.com/codespell-project/codespell - rev: v2.4.1 - hooks: - - id: codespell - args: [ - --toml, pyproject.toml, - '--skip', 'csrc/**,tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**,.github/**,typos.toml', - '-L', 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn,ArchType,AND,ND,tbe' - ] - additional_dependencies: - - tomli -- repo: https://github.com/google/yapf - rev: v0.43.0 - hooks: - - id: yapf - args: [--in-place, --verbose] - # Keep the same list from yapfignore here to avoid yapf failing without any inputs - exclude: '(.github|benchmarks|examples|docs)/.*' - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.11.7 + rev: v0.14.0 hooks: - - id: ruff + - id: ruff-check args: [--output-format, github, --fix] - id: ruff-format - files: ^(benchmarks|examples)/.* - repo: https://github.com/crate-ci/typos rev: v1.32.0 hooks: @@ -39,10 +20,6 @@ repos: "--force-exclude", "--exclude", "csrc/**" ] -- repo: https://github.com/PyCQA/isort - rev: 6.0.1 - hooks: - - id: isort # - repo: https://github.com/pre-commit/mirrors-clang-format # rev: v20.1.3 # hooks: diff --git a/benchmarks/ops/ben_vocabparallelembedding.py b/benchmarks/ops/ben_vocabparallelembedding.py index 5590c733..b1bb8db0 100644 --- a/benchmarks/ops/ben_vocabparallelembedding.py +++ b/benchmarks/ops/ben_vocabparallelembedding.py @@ -1,5 +1,3 @@ -from typing import Tuple - import numpy as np import pytest import torch @@ -47,20 +45,12 @@ def get_masked_input_and_mask_ref( num_org_vocab_padding: int, added_vocab_start_index: int, added_vocab_end_index: int, -) -> Tuple[torch.Tensor, torch.Tensor]: +) -> tuple[torch.Tensor, torch.Tensor]: """Reference implementation for verification""" org_vocab_mask = (input_ >= org_vocab_start_index) & (input_ < org_vocab_end_index) - added_vocab_mask = (input_ >= added_vocab_start_index) & ( - input_ < added_vocab_end_index - ) - added_offset = ( - added_vocab_start_index - - (org_vocab_end_index - org_vocab_start_index) - - num_org_vocab_padding - ) - valid_offset = (org_vocab_start_index * org_vocab_mask) + ( - added_offset * added_vocab_mask - ) + added_vocab_mask = (input_ >= added_vocab_start_index) & (input_ < added_vocab_end_index) + added_offset = added_vocab_start_index - (org_vocab_end_index - org_vocab_start_index) - num_org_vocab_padding + valid_offset = (org_vocab_start_index * org_vocab_mask) + (added_offset * added_vocab_mask) vocab_mask = org_vocab_mask | added_vocab_mask masked_input = vocab_mask * (input_ - valid_offset) return masked_input, ~vocab_mask @@ -78,7 +68,7 @@ SEEDS = [0] @pytest.mark.parametrize("seed", SEEDS) @torch.inference_mode() def test_get_masked_input_and_mask( - shape: Tuple[int, ...], + shape: tuple[int, ...], dtype: torch.dtype, device: str, seed: int, diff --git a/benchmarks/scripts/convert_json_to_markdown.py b/benchmarks/scripts/convert_json_to_markdown.py index 11204342..f91fbe9e 100644 --- a/benchmarks/scripts/convert_json_to_markdown.py +++ b/benchmarks/scripts/convert_json_to_markdown.py @@ -59,9 +59,7 @@ def results_to_json(latency, throughput, serving): if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Process the results of the benchmark tests." - ) + parser = argparse.ArgumentParser(description="Process the results of the benchmark tests.") parser.add_argument( "--results_folder", type=str, @@ -80,12 +78,8 @@ if __name__ == "__main__": default="./perf_result_template.md", help="The template file for the markdown report.", ) - parser.add_argument( - "--tag", default="main", help="Tag to be used for release message." - ) - parser.add_argument( - "--commit_id", default="", help="Commit ID to be used for release message." - ) + parser.add_argument("--tag", default="main", help="Tag to be used for release message.") + parser.add_argument("--commit_id", default="", help="Commit ID to be used for release message.") args = parser.parse_args() results_folder = (CUR_PATH / args.results_folder).resolve() @@ -116,9 +110,7 @@ if __name__ == "__main__": # get different percentiles for perc in [10, 25, 50, 75, 90, 99]: # Multiply 1000 to convert the time unit from s to ms - raw_result.update( - {f"P{perc}": 1000 * raw_result["percentiles"][str(perc)]} - ) + raw_result.update({f"P{perc}": 1000 * raw_result["percentiles"][str(perc)]}) raw_result["avg_latency"] = raw_result["avg_latency"] * 1000 # add the result to raw_result @@ -142,38 +134,24 @@ if __name__ == "__main__": serving_results = pd.DataFrame.from_dict(serving_results) throughput_results = pd.DataFrame.from_dict(throughput_results) - raw_results_json = results_to_json( - latency_results, throughput_results, serving_results - ) + raw_results_json = results_to_json(latency_results, throughput_results, serving_results) # remapping the key, for visualization purpose if not latency_results.empty: - latency_results = latency_results[list(latency_column_mapping.keys())].rename( - columns=latency_column_mapping - ) + latency_results = latency_results[list(latency_column_mapping.keys())].rename(columns=latency_column_mapping) if not serving_results.empty: - serving_results = serving_results[list(serving_column_mapping.keys())].rename( - columns=serving_column_mapping - ) + serving_results = serving_results[list(serving_column_mapping.keys())].rename(columns=serving_column_mapping) if not throughput_results.empty: - throughput_results = throughput_results[ - list(throughput_results_column_mapping.keys()) - ].rename(columns=throughput_results_column_mapping) + throughput_results = throughput_results[list(throughput_results_column_mapping.keys())].rename( + columns=throughput_results_column_mapping + ) - processed_results_json = results_to_json( - latency_results, throughput_results, serving_results - ) + processed_results_json = results_to_json(latency_results, throughput_results, serving_results) # get markdown tables - latency_md_table = tabulate( - latency_results, headers="keys", tablefmt="pipe", showindex=False - ) - serving_md_table = tabulate( - serving_results, headers="keys", tablefmt="pipe", showindex=False - ) - throughput_md_table = tabulate( - throughput_results, headers="keys", tablefmt="pipe", showindex=False - ) + latency_md_table = tabulate(latency_results, headers="keys", tablefmt="pipe", showindex=False) + serving_md_table = tabulate(serving_results, headers="keys", tablefmt="pipe", showindex=False) + throughput_md_table = tabulate(throughput_results, headers="keys", tablefmt="pipe", showindex=False) # document the result print(output_folder) diff --git a/collect_env.py b/collect_env.py index 68d97a7b..cdfdcbe6 100644 --- a/collect_env.py +++ b/collect_env.py @@ -27,33 +27,35 @@ from vllm.envs import environment_variables try: import torch + TORCH_AVAILABLE = True except (ImportError, NameError, AttributeError, OSError): TORCH_AVAILABLE = False # System Environment Information SystemEnv = namedtuple( - 'SystemEnv', + "SystemEnv", [ - 'torch_version', - 'is_debug_build', - 'gcc_version', - 'clang_version', - 'cmake_version', - 'os', - 'libc_version', - 'python_version', - 'python_platform', - 'pip_version', # 'pip' or 'pip3' - 'pip_packages', - 'conda_packages', - 'cpu_info', - 'vllm_version', # vllm specific field - 'vllm_ascend_version', # vllm ascend specific field - 'env_vars', - 'npu_info', # ascend specific field - 'cann_info', # ascend specific field - ]) + "torch_version", + "is_debug_build", + "gcc_version", + "clang_version", + "cmake_version", + "os", + "libc_version", + "python_version", + "python_platform", + "pip_version", # 'pip' or 'pip3' + "pip_packages", + "conda_packages", + "cpu_info", + "vllm_version", # vllm specific field + "vllm_ascend_version", # vllm ascend specific field + "env_vars", + "npu_info", # ascend specific field + "cann_info", # ascend specific field + ], +) DEFAULT_CONDA_PATTERNS = { "torch", @@ -82,15 +84,12 @@ DEFAULT_PIP_PATTERNS = { def run(command): """Return (return-code, stdout, stderr).""" - shell = True if type(command) is str else False - p = subprocess.Popen(command, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - shell=shell) + shell = isinstance(command, str) + p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell) raw_output, raw_err = p.communicate() rc = p.returncode - if get_platform() == 'win32': - enc = 'oem' + if get_platform() == "win32": + enc = "oem" else: enc = locale.getpreferredencoding() output = raw_output.decode(enc) @@ -122,42 +121,40 @@ def run_and_return_first_line(run_lambda, command): rc, out, _ = run_lambda(command) if rc != 0: return None - return out.split('\n')[0] + return out.split("\n")[0] def get_conda_packages(run_lambda, patterns=None): if patterns is None: patterns = DEFAULT_CONDA_PATTERNS - conda = os.environ.get('CONDA_EXE', 'conda') + conda = os.environ.get("CONDA_EXE", "conda") out = run_and_read_all(run_lambda, "{} list".format(conda)) if out is None: return out - return "\n".join(line for line in out.splitlines() - if not line.startswith("#") and any(name in line - for name in patterns)) + return "\n".join( + line for line in out.splitlines() if not line.startswith("#") and any(name in line for name in patterns) + ) def get_gcc_version(run_lambda): - return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)') + return run_and_parse_first_match(run_lambda, "gcc --version", r"gcc (.*)") def get_clang_version(run_lambda): - return run_and_parse_first_match(run_lambda, 'clang --version', - r'clang version (.*)') + return run_and_parse_first_match(run_lambda, "clang --version", r"clang version (.*)") def get_cmake_version(run_lambda): - return run_and_parse_first_match(run_lambda, 'cmake --version', - r'cmake (.*)') + return run_and_parse_first_match(run_lambda, "cmake --version", r"cmake (.*)") def _parse_version(version, version_tuple): version_str = version_tuple[-1] - if isinstance(version_str, str) and version_str.startswith('g'): - if '.' in version_str: - git_sha = version_str.split('.')[0][1:] - date = version_str.split('.')[-1][1:] + if isinstance(version_str, str) and version_str.startswith("g"): + if "." in version_str: + git_sha = version_str.split(".")[0][1:] + date = version_str.split(".")[-1][1:] return f"{version} (git sha: {git_sha}, date: {date})" else: git_sha = version_str[1:] # type: ignore @@ -167,26 +164,28 @@ def _parse_version(version, version_tuple): def get_vllm_version(): from vllm import __version__, __version_tuple__ + return _parse_version(__version__, __version_tuple__) def get_vllm_ascend_version(): from vllm_ascend._version import __version__, __version_tuple__ + return _parse_version(__version__, __version_tuple__) def get_cpu_info(run_lambda): - rc, out, err = 0, '', '' - if get_platform() == 'linux': - rc, out, err = run_lambda('lscpu') - elif get_platform() == 'win32': + rc, out, err = 0, "", "" + if get_platform() == "linux": + rc, out, err = run_lambda("lscpu") + elif get_platform() == "win32": rc, out, err = run_lambda( - 'wmic cpu get Name,Manufacturer,Family,Architecture,ProcessorType,DeviceID, \ - CurrentClockSpeed,MaxClockSpeed,L2CacheSize,L2CacheSpeed,Revision /VALUE' + "wmic cpu get Name,Manufacturer,Family,Architecture,ProcessorType,DeviceID, \ + CurrentClockSpeed,MaxClockSpeed,L2CacheSize,L2CacheSpeed,Revision /VALUE" ) - elif get_platform() == 'darwin': + elif get_platform() == "darwin": rc, out, err = run_lambda("sysctl -n machdep.cpu.brand_string") - cpu_info = 'None' + cpu_info = "None" if rc == 0: cpu_info = out else: @@ -195,67 +194,63 @@ def get_cpu_info(run_lambda): def get_platform(): - if sys.platform.startswith('linux'): - return 'linux' - elif sys.platform.startswith('win32'): - return 'win32' - elif sys.platform.startswith('cygwin'): - return 'cygwin' - elif sys.platform.startswith('darwin'): - return 'darwin' + if sys.platform.startswith("linux"): + return "linux" + elif sys.platform.startswith("win32"): + return "win32" + elif sys.platform.startswith("cygwin"): + return "cygwin" + elif sys.platform.startswith("darwin"): + return "darwin" else: return sys.platform def get_mac_version(run_lambda): - return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion', - r'(.*)') + return run_and_parse_first_match(run_lambda, "sw_vers -productVersion", r"(.*)") def get_windows_version(run_lambda): - system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows') - wmic_cmd = os.path.join(system_root, 'System32', 'Wbem', 'wmic') - findstr_cmd = os.path.join(system_root, 'System32', 'findstr') - return run_and_read_all( - run_lambda, - '{} os get Caption | {} /v Caption'.format(wmic_cmd, findstr_cmd)) + system_root = os.environ.get("SYSTEMROOT", "C:\\Windows") + wmic_cmd = os.path.join(system_root, "System32", "Wbem", "wmic") + findstr_cmd = os.path.join(system_root, "System32", "findstr") + return run_and_read_all(run_lambda, "{} os get Caption | {} /v Caption".format(wmic_cmd, findstr_cmd)) def get_lsb_version(run_lambda): - return run_and_parse_first_match(run_lambda, 'lsb_release -a', - r'Description:\t(.*)') + return run_and_parse_first_match(run_lambda, "lsb_release -a", r"Description:\t(.*)") def check_release_file(run_lambda): - return run_and_parse_first_match(run_lambda, 'cat /etc/*-release', - r'PRETTY_NAME="(.*)"') + return run_and_parse_first_match(run_lambda, "cat /etc/*-release", r'PRETTY_NAME="(.*)"') def get_os(run_lambda): from platform import machine + platform = get_platform() - if platform == 'win32' or platform == 'cygwin': + if platform == "win32" or platform == "cygwin": return get_windows_version(run_lambda) - if platform == 'darwin': + if platform == "darwin": version = get_mac_version(run_lambda) if version is None: return None - return 'macOS {} ({})'.format(version, machine()) + return "macOS {} ({})".format(version, machine()) - if platform == 'linux': + if platform == "linux": # Ubuntu/Debian based desc = get_lsb_version(run_lambda) if desc is not None: - return '{} ({})'.format(desc, machine()) + return "{} ({})".format(desc, machine()) # Try reading /etc/*-release desc = check_release_file(run_lambda) if desc is not None: - return '{} ({})'.format(desc, machine()) + return "{} ({})".format(desc, machine()) - return '{} ({})'.format(platform, machine()) + return "{} ({})".format(platform, machine()) # Unknown platform return platform @@ -263,14 +258,16 @@ def get_os(run_lambda): def get_python_platform(): import platform + return platform.platform() def get_libc_version(): import platform - if get_platform() != 'linux': - return 'N/A' - return '-'.join(platform.libc_ver()) + + if get_platform() != "linux": + return "N/A" + return "-".join(platform.libc_ver()) def get_pip_packages(run_lambda, patterns=None): @@ -282,31 +279,29 @@ def get_pip_packages(run_lambda, patterns=None): # But here it is invoked as `python -mpip` def run_with_pip(pip): out = run_and_read_all(run_lambda, pip + ["list", "--format=freeze"]) - return "\n".join(line for line in out.splitlines() - if any(name in line for name in patterns)) + return "\n".join(line for line in out.splitlines() if any(name in line for name in patterns)) - pip_version = 'pip3' if sys.version[0] == '3' else 'pip' - out = run_with_pip([sys.executable, '-mpip']) + pip_version = "pip3" if sys.version[0] == "3" else "pip" + out = run_with_pip([sys.executable, "-mpip"]) return pip_version, out def get_npu_info(run_lambda): - return run_and_read_all(run_lambda, 'npu-smi info') + return run_and_read_all(run_lambda, "npu-smi info") def get_cann_info(run_lambda): - out = run_and_read_all(run_lambda, 'lscpu | grep Architecture:') + out = run_and_read_all(run_lambda, "lscpu | grep Architecture:") cpu_arch = str(out).split()[-1] return run_and_read_all( - run_lambda, - 'cat /usr/local/Ascend/ascend-toolkit/latest/{}-linux/ascend_toolkit_install.info' - .format(cpu_arch)) + run_lambda, "cat /usr/local/Ascend/ascend-toolkit/latest/{}-linux/ascend_toolkit_install.info".format(cpu_arch) + ) def get_env_vars(): - env_vars = '' - secret_terms = ('secret', 'token', 'api', 'access', 'password') + env_vars = "" + secret_terms = ("secret", "token", "api", "access", "password") report_prefix = ("TORCH", "PYTORCH", "ASCEND_", "ATB_") for k, v in os.environ.items(): if any(term in k.lower() for term in secret_terms): @@ -327,7 +322,7 @@ def get_env_info(): version_str = torch.__version__ debug_mode_str = str(torch.version.debug) else: - version_str = debug_mode_str = 'N/A' + version_str = debug_mode_str = "N/A" sys_version = sys.version.replace("\n", " ") @@ -336,9 +331,7 @@ def get_env_info(): return SystemEnv( torch_version=version_str, is_debug_build=debug_mode_str, - python_version='{} ({}-bit runtime)'.format( - sys_version, - sys.maxsize.bit_length() + 1), + python_version="{} ({}-bit runtime)".format(sys_version, sys.maxsize.bit_length() + 1), python_platform=get_python_platform(), pip_version=pip_version, pip_packages=pip_list_output, @@ -399,36 +392,35 @@ CANN: def pretty_str(envinfo): - - def replace_nones(dct, replacement='Could not collect'): - for key in dct.keys(): + def replace_nones(dct, replacement="Could not collect"): + for key in dct: if dct[key] is not None: continue dct[key] = replacement return dct - def replace_bools(dct, true='Yes', false='No'): - for key in dct.keys(): + def replace_bools(dct, true="Yes", false="No"): + for key in dct: if dct[key] is True: dct[key] = true elif dct[key] is False: dct[key] = false return dct - def prepend(text, tag='[prepend]'): - lines = text.split('\n') + def prepend(text, tag="[prepend]"): + lines = text.split("\n") updated_lines = [tag + line for line in lines] - return '\n'.join(updated_lines) + return "\n".join(updated_lines) - def replace_if_empty(text, replacement='No relevant packages'): + def replace_if_empty(text, replacement="No relevant packages"): if text is not None and len(text) == 0: return replacement return text def maybe_start_on_next_line(string): # If `string` is multiline, prepend a \n to it. - if string is not None and len(string.split('\n')) > 1: - return '\n{}\n'.format(string) + if string is not None and len(string.split("\n")) > 1: + return "\n{}\n".format(string) return string mutable_dict = envinfo._asdict() @@ -440,22 +432,18 @@ def pretty_str(envinfo): mutable_dict = replace_nones(mutable_dict) # If either of these are '', replace with 'No relevant packages' - mutable_dict['pip_packages'] = replace_if_empty( - mutable_dict['pip_packages']) - mutable_dict['conda_packages'] = replace_if_empty( - mutable_dict['conda_packages']) + mutable_dict["pip_packages"] = replace_if_empty(mutable_dict["pip_packages"]) + mutable_dict["conda_packages"] = replace_if_empty(mutable_dict["conda_packages"]) # Tag conda and pip packages with a prefix # If they were previously None, they'll show up as ie '[conda] Could not collect' - if mutable_dict['pip_packages']: - mutable_dict['pip_packages'] = prepend( - mutable_dict['pip_packages'], '[{}] '.format(envinfo.pip_version)) - if mutable_dict['conda_packages']: - mutable_dict['conda_packages'] = prepend( - mutable_dict['conda_packages'], '[conda] ') - mutable_dict['cpu_info'] = envinfo.cpu_info - mutable_dict['npu_info'] = envinfo.npu_info - mutable_dict['cann_info'] = envinfo.cann_info + if mutable_dict["pip_packages"]: + mutable_dict["pip_packages"] = prepend(mutable_dict["pip_packages"], "[{}] ".format(envinfo.pip_version)) + if mutable_dict["conda_packages"]: + mutable_dict["conda_packages"] = prepend(mutable_dict["conda_packages"], "[conda] ") + mutable_dict["cpu_info"] = envinfo.cpu_info + mutable_dict["npu_info"] = envinfo.npu_info + mutable_dict["cann_info"] = envinfo.cann_info return env_info_fmt.format(**mutable_dict) @@ -468,22 +456,19 @@ def main(): output = get_pretty_env_info() print(output) - if TORCH_AVAILABLE and hasattr(torch, 'utils') and hasattr( - torch.utils, '_crash_handler'): + if TORCH_AVAILABLE and hasattr(torch, "utils") and hasattr(torch.utils, "_crash_handler"): minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR if sys.platform == "linux" and os.path.exists(minidump_dir): - dumps = [ - os.path.join(minidump_dir, dump) - for dump in os.listdir(minidump_dir) - ] + dumps = [os.path.join(minidump_dir, dump) for dump in os.listdir(minidump_dir)] latest = max(dumps, key=os.path.getctime) ctime = os.path.getctime(latest) - creation_time = datetime.datetime.fromtimestamp(ctime).strftime( - '%Y-%m-%d %H:%M:%S') - msg = "\n*** Detected a minidump at {} created on {}, ".format(latest, creation_time) + \ - "if this is related to your bug please include it when you file a report ***" + creation_time = datetime.datetime.fromtimestamp(ctime).strftime("%Y-%m-%d %H:%M:%S") + msg = ( + "\n*** Detected a minidump at {} created on {}, ".format(latest, creation_time) + + "if this is related to your bug please include it when you file a report ***" + ) print(msg, file=sys.stderr) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/docs/source/conf.py b/docs/source/conf.py index 529815ff..3f774e65 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -31,12 +31,12 @@ import os # -- Project information ----------------------------------------------------- -project = 'vllm-ascend' -copyright = '2025, vllm-ascend team' -author = 'the vllm-ascend team' +project = "vllm-ascend" +copyright = "2025, vllm-ascend team" +author = "the vllm-ascend team" # The full version, including alpha/beta/rc tags -release = '' +release = "" # -- General configuration --------------------------------------------------- @@ -65,46 +65,46 @@ myst_substitutions = { # the branch of vllm, used in vllm clone # - main branch: 'main' # - vX.Y.Z branch: 'vX.Y.Z' - 'vllm_version': 'v0.13.0', + "vllm_version": "v0.13.0", # the branch of vllm-ascend, used in vllm-ascend clone and image tag # - main branch: 'main' # - vX.Y.Z branch: latest vllm-ascend release tag - 'vllm_ascend_version': 'v0.13.0rc1', + "vllm_ascend_version": "v0.13.0rc1", # the newest release version of vllm-ascend and matched vLLM, used in pip install. # This value should be updated when cut down release. - 'pip_vllm_ascend_version': "0.13.0rc1", - 'pip_vllm_version': "0.13.0", + "pip_vllm_ascend_version": "0.13.0rc1", + "pip_vllm_version": "0.13.0", # CANN image tag - 'cann_image_tag': "8.3.rc2-910b-ubuntu22.04-py3.11", + "cann_image_tag": "8.3.rc2-910b-ubuntu22.04-py3.11", # vllm version in ci - 'ci_vllm_version': 'v0.13.0', + "ci_vllm_version": "v0.13.0", } # For cross-file header anchors myst_heading_anchors = 5 # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -locale_dirs = ['locale/'] -gettext_compact = False +locale_dirs = ["locale/"] +gettext_compact = False # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = [ - '_build', - 'Thumbs.db', - '.DS_Store', - '.venv', - 'README.md', - 'user_guide/release.template.md', + "_build", + "Thumbs.db", + ".DS_Store", + ".venv", + "README.md", + "user_guide/release.template.md", # TODO(yikun): Remove this after zh supported - '**/*.zh.md' + "**/*.zh.md", ] # -- Options for HTML output ------------------------------------------------- @@ -113,24 +113,23 @@ exclude_patterns = [ # a list of builtin themes. # html_title = project -html_theme = 'sphinx_book_theme' -html_logo = 'logos/vllm-ascend-logo-text-light.png' +html_theme = "sphinx_book_theme" +html_logo = "logos/vllm-ascend-logo-text-light.png" html_theme_options = { - 'path_to_docs': 'docs/source', - 'repository_url': 'https://github.com/vllm-project/vllm-ascend', - 'use_repository_button': True, - 'use_edit_page_button': True, + "path_to_docs": "docs/source", + "repository_url": "https://github.com/vllm-project/vllm-ascend", + "use_repository_button": True, + "use_edit_page_button": True, } # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". # html_static_path = ['_static'] -READTHEDOCS_VERSION_TYPE = os.environ.get('READTHEDOCS_VERSION_TYPE') +READTHEDOCS_VERSION_TYPE = os.environ.get("READTHEDOCS_VERSION_TYPE") if READTHEDOCS_VERSION_TYPE == "tag": # remove the warning banner if the version is a tagged release - header_file = os.path.join(os.path.dirname(__file__), - "_templates/sections/header.html") + header_file = os.path.join(os.path.dirname(__file__), "_templates/sections/header.html") # The file might be removed already if the build is triggered multiple times # (readthedocs build both HTML and PDF versions separately) if os.path.exists(header_file): diff --git a/pyproject.toml b/pyproject.toml index f2e122a2..df08c191 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,3 +42,51 @@ plugins.md033.enabled = false # inline-html plugins.md046.enabled = false # code-block-style plugins.md024.allow_different_nesting = true # no-duplicate-headers plugins.md029.enabled = false # ol-prefix + +[tool.ruff] +# TODO: according to PEP8, there should be 80 characters per line +line-length = 120 +# Folder to be modified +exclude = [ + "examples/**", + "tests/**", + "vllm_ascend/**", +] + +[tool.ruff.lint] +select = [ + # pycodestyle + "E", + # Pyflakes + "F", + # pyupgrade + "UP", + # flake8-bugbear + "B", + # flake8-simplify + "SIM", + # isort + "I", + # flake8-logging-format + "G", +] +ignore = [ + # star imports + "F405", "F403", + # lambda expression assignment + "E731", + # zip without `strict=` + "B905", + # Loop control variable not used within loop body + "B007", + # f-string format + "UP032", + # TODO: FIE ME + "G004", + "B904", + "SIM108", + "SIM102" +] + +[tool.ruff.format] +docstring-code-format = true diff --git a/setup.py b/setup.py index a38d9563..3449282e 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,6 @@ import os import subprocess import sys from sysconfig import get_paths -from typing import Dict, List from setuptools import Command, Extension, find_packages, setup from setuptools.command.build_ext import build_ext @@ -45,15 +44,13 @@ ROOT_DIR = os.path.dirname(__file__) logger = logging.getLogger(__name__) -def check_or_set_default_env(cmake_args, - env_name, - env_variable, - default_path=""): +def check_or_set_default_env(cmake_args, env_name, env_variable, default_path=""): if env_variable is None: logging.warning( f"No {env_name} found in your environment, pleause try to set {env_name} " "if you customize the installation path of this library, otherwise default " - "path will be adapted during build this project") + "path will be adapted during build this project" + ) logging.warning(f"Set default {env_name}: {default_path}") env_variable = default_path else: @@ -65,25 +62,27 @@ def check_or_set_default_env(cmake_args, return cmake_args -def get_value_from_lines(lines: List[str], key: str) -> str: +def get_value_from_lines(lines: list[str], key: str) -> str: for line in lines: - line = ' '.join(line.split()) + line = " ".join(line.split()) if key in line: - return line.split(':')[-1].strip() + return line.split(":")[-1].strip() return "" def get_chip_type() -> str: try: - npu_info_lines = subprocess.check_output( - ['npu-smi', 'info', '-l']).decode().strip().split('\n') - npu_id = int(get_value_from_lines(npu_info_lines, 'NPU ID')) - chip_info_lines = subprocess.check_output( - ['npu-smi', 'info', '-t', 'board', '-i', - str(npu_id), '-c', '0']).decode().strip().split('\n') - chip_name = get_value_from_lines(chip_info_lines, 'Chip Name') - chip_type = get_value_from_lines(chip_info_lines, 'Chip Type') - npu_name = get_value_from_lines(chip_info_lines, 'NPU Name') + npu_info_lines = subprocess.check_output(["npu-smi", "info", "-l"]).decode().strip().split("\n") + npu_id = int(get_value_from_lines(npu_info_lines, "NPU ID")) + chip_info_lines = ( + subprocess.check_output(["npu-smi", "info", "-t", "board", "-i", str(npu_id), "-c", "0"]) + .decode() + .strip() + .split("\n") + ) + chip_name = get_value_from_lines(chip_info_lines, "Chip Name") + chip_type = get_value_from_lines(chip_info_lines, "Chip Type") + npu_name = get_value_from_lines(chip_info_lines, "NPU Name") if "310" in chip_name: # 310P case @@ -97,12 +96,10 @@ def get_chip_type() -> str: else: # A3 case assert npu_name - return (chip_name + '_' + npu_name).lower() + return (chip_name + "_" + npu_name).lower() else: # TODO(zzzzwwjj): Currently, A5's chip name has not determined yet. - raise ValueError( - f"Unable to recognize chip name: {chip_name}, please manually set env SOC_VERSION" - ) + raise ValueError(f"Unable to recognize chip name: {chip_name}, please manually set env SOC_VERSION") except subprocess.CalledProcessError as e: raise RuntimeError(f"Get chip info failed: {e}") except FileNotFoundError: @@ -112,8 +109,7 @@ def get_chip_type() -> str: return "" -envs = load_module_from_path("envs", - os.path.join(ROOT_DIR, "vllm_ascend", "envs.py")) +envs = load_module_from_path("envs", os.path.join(ROOT_DIR, "vllm_ascend", "envs.py")) soc_version = get_chip_type() @@ -126,10 +122,8 @@ if not envs.SOC_VERSION: ) envs.SOC_VERSION = soc_version else: - if soc_version and envs.SOC_VERSION != soc_version: - logging.warning( - f"env SOC_VERSION: {envs.SOC_VERSION} is not equal to soc_version from npu-smi: {soc_version}" - ) + if soc_version and soc_version != envs.SOC_VERSION: + logging.warning(f"env SOC_VERSION: {envs.SOC_VERSION} is not equal to soc_version from npu-smi: {soc_version}") def gen_build_info(): @@ -167,30 +161,24 @@ def gen_build_info(): package_dir = os.path.join(ROOT_DIR, "vllm_ascend", "_build_info.py") with open(package_dir, "w+") as f: - f.write('# Auto-generated file\n') + f.write("# Auto-generated file\n") f.write(f"__device_type__ = '{device_type}'\n") logging.info(f"Generated _build_info.py with SOC version: {soc_version}") class CMakeExtension(Extension): - - def __init__(self, - name: str, - cmake_lists_dir: str = ".", - **kwargs) -> None: + def __init__(self, name: str, cmake_lists_dir: str = ".", **kwargs) -> None: super().__init__(name, sources=[], py_limited_api=False, **kwargs) self.cmake_lists_dir = os.path.abspath(cmake_lists_dir) class custom_develop(develop): - def run(self): gen_build_info() super().run() class custom_build_info(build_py): - def run(self): gen_build_info() super().run() @@ -209,8 +197,7 @@ class build_and_install_aclnn(Command): def run(self): try: print("Running bash build_aclnn.sh ...") - subprocess.check_call( - ["bash", "csrc/build_aclnn.sh", ROOT_DIR, envs.SOC_VERSION]) + subprocess.check_call(["bash", "csrc/build_aclnn.sh", ROOT_DIR, envs.SOC_VERSION]) print("buid_aclnn.sh executed successfully!") except subprocess.CalledProcessError as e: print(f"Error running build_aclnn.sh: {e}") @@ -219,7 +206,7 @@ class build_and_install_aclnn(Command): class cmake_build_ext(build_ext): # A dict of extension directories that have been configured. - did_config: Dict[str, bool] = {} + did_config: dict[str, bool] = {} # # Determine number of compilation jobs @@ -254,9 +241,9 @@ class cmake_build_ext(build_ext): # Default use release mode to compile the csrc code # Turbo now support compiled with Release, Debug and RelWithDebugInfo if envs.CMAKE_BUILD_TYPE is None or envs.CMAKE_BUILD_TYPE not in [ - "Debug", - "Release", - "RelWithDebugInfo", + "Debug", + "Release", + "RelWithDebugInfo", ]: envs.CMAKE_BUILD_TYPE = "Release" cmake_args += [f"-DCMAKE_BUILD_TYPE={envs.CMAKE_BUILD_TYPE}"] @@ -278,20 +265,18 @@ class cmake_build_ext(build_ext): ) # find PYTHON_EXECUTABLE - check_or_set_default_env(cmake_args, "PYTHON_EXECUTABLE", - sys.executable) + check_or_set_default_env(cmake_args, "PYTHON_EXECUTABLE", sys.executable) # find PYTHON_INCLUDE_PATH - check_or_set_default_env(cmake_args, "PYTHON_INCLUDE_PATH", - get_paths()["include"]) + check_or_set_default_env(cmake_args, "PYTHON_INCLUDE_PATH", get_paths()["include"]) # ccache and ninja can not be applied at ascendc kernels now try: # if pybind11 is installed via pip - pybind11_cmake_path = (subprocess.check_output( - [python_executable, "-m", "pybind11", - "--cmakedir"]).decode().strip()) + pybind11_cmake_path = ( + subprocess.check_output([python_executable, "-m", "pybind11", "--cmakedir"]).decode().strip() + ) except subprocess.CalledProcessError as e: # else specify pybind11 path installed from source code on CI container raise RuntimeError(f"CMake configuration failed: {e}") @@ -309,8 +294,7 @@ class cmake_build_ext(build_ext): "910c": "ascend910_9392", "310p": "ascend310p1", } - CANN_SOC_VERSION = soc_version_map.get(envs.SOC_VERSION, - envs.SOC_VERSION) + CANN_SOC_VERSION = soc_version_map.get(envs.SOC_VERSION, envs.SOC_VERSION) cmake_args += [f"-DSOC_VERSION={CANN_SOC_VERSION}"] # Override the base directory for FetchContent downloads to $ROOT/.deps @@ -323,8 +307,7 @@ class cmake_build_ext(build_ext): torch_npu_command = "python3 -m pip show torch-npu | grep '^Location:' | awk '{print $2}'" try: - torch_npu_path = subprocess.check_output( - torch_npu_command, shell=True).decode().strip() + torch_npu_path = subprocess.check_output(torch_npu_command, shell=True).decode().strip() torch_npu_path += "/torch_npu" except subprocess.CalledProcessError as e: raise RuntimeError(f"Retrieve torch version version failed: {e}") @@ -399,22 +382,21 @@ class cmake_build_ext(build_ext): # copy back to build folder for editable build if isinstance(self.distribution.get_command_obj("develop"), develop): import shutil + for root, _, files in os.walk(self.build_temp): for file in files: if file.endswith(".so"): src_path = os.path.join(root, file) - dst_path = os.path.join(self.build_lib, "vllm_ascend", - file) + dst_path = os.path.join(self.build_lib, "vllm_ascend", file) shutil.copy(src_path, dst_path) print(f"Copy: {src_path} -> {dst_path}") # copy back _cann_ops_custom directory - src_cann_ops_custom = os.path.join(ROOT_DIR, "vllm_ascend", - "_cann_ops_custom") - dst_cann_ops_custom = os.path.join(self.build_lib, "vllm_ascend", - "_cann_ops_custom") + src_cann_ops_custom = os.path.join(ROOT_DIR, "vllm_ascend", "_cann_ops_custom") + dst_cann_ops_custom = os.path.join(self.build_lib, "vllm_ascend", "_cann_ops_custom") if os.path.exists(src_cann_ops_custom): import shutil + if os.path.exists(dst_cann_ops_custom): shutil.rmtree(dst_cann_ops_custom) shutil.copytree(src_cann_ops_custom, dst_cann_ops_custom) @@ -428,7 +410,6 @@ class cmake_build_ext(build_ext): class custom_install(install): - def run(self): self.run_command("build_ext") install.run(self) @@ -459,10 +440,10 @@ def read_readme() -> str: return "" -def get_requirements() -> List[str]: +def get_requirements() -> list[str]: """Get Python package dependencies from requirements.txt.""" - def _read_requirements(filename: str) -> List[str]: + def _read_requirements(filename: str) -> list[str]: with open(get_path(filename)) as f: requirements = f.read().strip().split("\n") resolved_requirements = [] @@ -487,7 +468,7 @@ cmdclass = { "build_py": custom_build_info, "build_aclnn": build_and_install_aclnn, "build_ext": cmake_build_ext, - "install": custom_install + "install": custom_install, } setup( @@ -526,7 +507,7 @@ setup( "vllm.general_plugins": [ "ascend_kv_connector = vllm_ascend:register_connector", "ascend_model_loader = vllm_ascend:register_model_loader", - "ascend_service_profiling = vllm_ascend:register_service_profiling" + "ascend_service_profiling = vllm_ascend:register_service_profiling", ], }, ) diff --git a/tools/aisbench.py b/tools/aisbench.py index a4ddb0ad..dc22dded 100644 --- a/tools/aisbench.py +++ b/tools/aisbench.py @@ -29,60 +29,47 @@ import pandas as pd from modelscope import snapshot_download # type: ignore BENCHMARK_HOME = os.getenv("BENCHMARK_HOME", os.path.abspath("./benchmark")) -DATASET_CONF_DIR = os.path.join(BENCHMARK_HOME, "ais_bench", "benchmark", - "configs", "datasets") -REQUEST_CONF_DIR = os.path.join(BENCHMARK_HOME, "ais_bench", "benchmark", - "configs", "models", "vllm_api") +DATASET_CONF_DIR = os.path.join(BENCHMARK_HOME, "ais_bench", "benchmark", "configs", "datasets") +REQUEST_CONF_DIR = os.path.join(BENCHMARK_HOME, "ais_bench", "benchmark", "configs", "models", "vllm_api") DATASET_DIR = os.path.join(BENCHMARK_HOME, "ais_bench", "datasets") class AisbenchRunner: - RESULT_MSG = { - "performance": "Performance Result files locate in ", - "accuracy": "write csv to " - } - DATASET_RENAME = { - "aime2024": "aime", - "gsm8k-lite": "gsm8k", - "textvqa-lite": "textvqa" - } + RESULT_MSG = {"performance": "Performance Result files locate in ", "accuracy": "write csv to "} + DATASET_RENAME = {"aime2024": "aime", "gsm8k-lite": "gsm8k", "textvqa-lite": "textvqa"} def _run_aisbench_task(self): - dataset_conf = self.dataset_conf.split('/')[-1] + dataset_conf = self.dataset_conf.split("/")[-1] if self.task_type == "accuracy": - aisbench_cmd = [ - 'ais_bench', '--models', f'{self.request_conf}_custom', - '--datasets', f'{dataset_conf}' - ] + aisbench_cmd = ["ais_bench", "--models", f"{self.request_conf}_custom", "--datasets", f"{dataset_conf}"] if self.task_type == "performance": aisbench_cmd = [ - 'ais_bench', '--models', f'{self.request_conf}_custom', - '--datasets', f'{dataset_conf}_custom', '--mode', 'perf' + "ais_bench", + "--models", + f"{self.request_conf}_custom", + "--datasets", + f"{dataset_conf}_custom", + "--mode", + "perf", ] if self.num_prompts: - aisbench_cmd.extend(['--num-prompts', str(self.num_prompts)]) + aisbench_cmd.extend(["--num-prompts", str(self.num_prompts)]) print(f"running aisbench cmd: {' '.join(aisbench_cmd)}") - self.proc: subprocess.Popen = subprocess.Popen(aisbench_cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True) + self.proc: subprocess.Popen = subprocess.Popen( + aisbench_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + ) - def __init__(self, - model: str, - port: int, - aisbench_config: dict, - host_ip: str = "localhost", - verify=True): + def __init__(self, model: str, port: int, aisbench_config: dict, host_ip: str = "localhost", verify=True): self.model = model self.dataset_path = aisbench_config.get("dataset_path_local") if not self.dataset_path: - self.dataset_path = maybe_download_from_modelscope( - aisbench_config["dataset_path"], repo_type="dataset") + self.dataset_path = maybe_download_from_modelscope(aisbench_config["dataset_path"], repo_type="dataset") self.model_path = aisbench_config.get("model_path") if not self.model_path: self.model_path = maybe_download_from_modelscope(model) - assert self.dataset_path is not None and self.model_path is not None, \ + assert self.dataset_path is not None and self.model_path is not None, ( f"Failed to download dataset or model: dataset={self.dataset_path}, model={self.model_path}" + ) self.port = port self.host_ip = host_ip self.task_type = aisbench_config["case_type"] @@ -92,8 +79,7 @@ class AisbenchRunner: self.max_out_len = aisbench_config["max_out_len"] self.batch_size = aisbench_config["batch_size"] self.request_rate = aisbench_config.get("request_rate", 0) - self.trust_remote_code = aisbench_config.get("trust_remote_code", - False) + self.trust_remote_code = aisbench_config.get("trust_remote_code", False) self.temperature = aisbench_config.get("temperature") self.top_k = aisbench_config.get("top_k") self.top_p = aisbench_config.get("top_p") @@ -122,52 +108,38 @@ class AisbenchRunner: command = ["cp", "-r", self.dataset_path, dst_dir] subprocess.call(command) if self.task_type == "performance": - conf_path = os.path.join(DATASET_CONF_DIR, - f'{self.dataset_conf}.py') + conf_path = os.path.join(DATASET_CONF_DIR, f"{self.dataset_conf}.py") if self.dataset_conf.startswith("textvqa"): - self.dataset_path = os.path.join(self.dataset_path, - "textvqa_val.jsonl") - with open(conf_path, 'r', encoding='utf-8') as f: + self.dataset_path = os.path.join(self.dataset_path, "textvqa_val.jsonl") + with open(conf_path, encoding="utf-8") as f: content = f.read() - content = re.sub(r'path=.*', f'path="{self.dataset_path}",', - content) - conf_path_new = os.path.join(DATASET_CONF_DIR, - f'{self.dataset_conf}_custom.py') - with open(conf_path_new, 'w', encoding='utf-8') as f: + content = re.sub(r"path=.*", f'path="{self.dataset_path}",', content) + conf_path_new = os.path.join(DATASET_CONF_DIR, f"{self.dataset_conf}_custom.py") + with open(conf_path_new, "w", encoding="utf-8") as f: f.write(content) def _init_request_conf(self): - conf_path = os.path.join(REQUEST_CONF_DIR, f'{self.request_conf}.py') - with open(conf_path, 'r', encoding='utf-8') as f: + conf_path = os.path.join(REQUEST_CONF_DIR, f"{self.request_conf}.py") + with open(conf_path, encoding="utf-8") as f: content = f.read() - content = re.sub(r'model=.*', f'model="{self.model}",', content) - content = re.sub(r'host_port.*', f'host_port = {self.port},', content) - content = re.sub(r'host_ip.*', f'host_ip = "{self.host_ip}",', content) - content = re.sub(r'max_out_len.*', - f'max_out_len = {self.max_out_len},', content) - content = re.sub(r'batch_size.*', f'batch_size = {self.batch_size},', - content) - content = re.sub(r'trust_remote_code=.*', - f'trust_remote_code={self.trust_remote_code},', - content) + content = re.sub(r"model=.*", f'model="{self.model}",', content) + content = re.sub(r"host_port.*", f"host_port = {self.port},", content) + content = re.sub(r"host_ip.*", f'host_ip = "{self.host_ip}",', content) + content = re.sub(r"max_out_len.*", f"max_out_len = {self.max_out_len},", content) + content = re.sub(r"batch_size.*", f"batch_size = {self.batch_size},", content) + content = re.sub(r"trust_remote_code=.*", f"trust_remote_code={self.trust_remote_code},", content) content = content.replace("top_k", "#top_k") content = content.replace("seed", "#seed") content = content.replace("repetition_penalty", "#repetition_penalty") if self.task_type == "performance": - content = re.sub(r'path=.*', f'path="{self.model_path}",', content) - content = re.sub(r'request_rate.*', - f'request_rate = {self.request_rate},', content) - content = re.sub( - r"temperature.*", - "temperature = 0,\n ignore_eos = True,", content) + content = re.sub(r"path=.*", f'path="{self.model_path}",', content) + content = re.sub(r"request_rate.*", f"request_rate = {self.request_rate},", content) + content = re.sub(r"temperature.*", "temperature = 0,\n ignore_eos = True,", content) content = content.replace("top_p", "#top_p") if self.task_type == "accuracy": - content = re.sub( - r"temperature.*", - "temperature = 0.6,\n ignore_eos = False,", content) + content = re.sub(r"temperature.*", "temperature = 0.6,\n ignore_eos = False,", content) if self.temperature: - content = re.sub(r"temperature.*", - f"temperature = {self.temperature},", content) + content = re.sub(r"temperature.*", f"temperature = {self.temperature},", content) if self.top_p: content = re.sub(r"#?top_p.*", f"top_p = {self.top_p},", content) if self.top_k: @@ -175,12 +147,9 @@ class AisbenchRunner: if self.seed: content = re.sub(r"#seed.*", f"seed = {self.seed},", content) if self.repetition_penalty: - content = re.sub( - r"#repetition_penalty.*", - f"repetition_penalty = {self.repetition_penalty},", content) - conf_path_new = os.path.join(REQUEST_CONF_DIR, - f'{self.request_conf}_custom.py') - with open(conf_path_new, 'w', encoding='utf-8') as f: + content = re.sub(r"#repetition_penalty.*", f"repetition_penalty = {self.repetition_penalty},", content) + conf_path_new = os.path.join(REQUEST_CONF_DIR, f"{self.request_conf}_custom.py") + with open(conf_path_new, "w", encoding="utf-8") as f: f.write(content) print(f"The request config is\n {content}") @@ -200,8 +169,7 @@ class AisbenchRunner: line = self.proc.stdout.readline().strip() print(line) if "Current exp folder: " in line: - self.exp_folder = re.search(r'Current exp folder: (.*)', - line).group(1) + self.exp_folder = re.search(r"Current exp folder: (.*)", line).group(1) return if "ERROR" in line: error_msg = f"Some errors happened to Aisbench runtime, the first error is {line}" @@ -221,53 +189,48 @@ class AisbenchRunner: raise RuntimeError(error_msg) from None def _get_result_performance(self): - result_dir = re.search(r'Performance Result files locate in (.*)', - self.result_line).group(1)[:-1] - dataset_type = self.dataset_conf.split('/')[0] - result_csv_file = os.path.join(result_dir, - f"{dataset_type}dataset.csv") - result_json_file = os.path.join(result_dir, - f"{dataset_type}dataset.json") + result_dir = re.search(r"Performance Result files locate in (.*)", self.result_line).group(1)[:-1] + dataset_type = self.dataset_conf.split("/")[0] + result_csv_file = os.path.join(result_dir, f"{dataset_type}dataset.csv") + result_json_file = os.path.join(result_dir, f"{dataset_type}dataset.json") self.result_csv = pd.read_csv(result_csv_file, index_col=0) print("Getting performance results from file: ", result_json_file) - with open(result_json_file, 'r', encoding='utf-8') as f: + with open(result_json_file, encoding="utf-8") as f: self.result_json = json.load(f) self.result = [self.result_csv, self.result_json] def _get_result_accuracy(self): - acc_file = re.search(r'write csv to (.*)', self.result_line).group(1) + acc_file = re.search(r"write csv to (.*)", self.result_line).group(1) df = pd.read_csv(acc_file) self.result = float(df.loc[0][-1]) def _performance_verify(self): self._get_result_performance() - output_throughput = self.result_json["Output Token Throughput"][ - "total"].replace("token/s", "") - assert float( - output_throughput - ) >= self.threshold * self.baseline, f"Performance verification failed. The current Output Token Throughput is {output_throughput} token/s, which is not greater than or equal to {self.threshold} * baseline {self.baseline}." + output_throughput = self.result_json["Output Token Throughput"]["total"].replace("token/s", "") + assert float(output_throughput) >= self.threshold * self.baseline, ( + "Performance verification failed. " + f"The current Output Token Throughput is {output_throughput} token/s, " + f"which is not greater than or equal to {self.threshold} * baseline {self.baseline}." + ) def _accuracy_verify(self): self._get_result_accuracy() acc_value = self.result - assert self.baseline - self.threshold <= acc_value <= self.baseline + self.threshold, f"Accuracy verification failed. The accuracy of {self.dataset_path} is {acc_value}, which is not within {self.threshold} relative to baseline {self.baseline}." + assert self.baseline - self.threshold <= acc_value <= self.baseline + self.threshold, ( + "Accuracy verification failed. " + f"The accuracy of {self.dataset_path} is {acc_value}, " + f"which is not within {self.threshold} relative to baseline {self.baseline}." + ) -def run_aisbench_cases(model, - port, - aisbench_cases, - server_args="", - host_ip="localhost"): +def run_aisbench_cases(model, port, aisbench_cases, server_args="", host_ip="localhost"): aisbench_results = [] aisbench_errors = [] for aisbench_case in aisbench_cases: if not aisbench_case: continue try: - with AisbenchRunner(model=model, - port=port, - host_ip=host_ip, - aisbench_config=aisbench_case) as aisbench: + with AisbenchRunner(model=model, port=port, host_ip=host_ip, aisbench_config=aisbench_case) as aisbench: aisbench_results.append(aisbench.result) except Exception as e: aisbench_results.append("") @@ -299,8 +262,7 @@ def get_lock(model_name_or_path: str | Path, cache_dir: str | None = None): # add hash to avoid conflict with old users' lock files lock_file_name = hash_name + model_name + ".lock" # mode 0o666 is required for the filelock to be shared across users - lock = filelock.FileLock(os.path.join(lock_dir, lock_file_name), - mode=0o666) + lock = filelock.FileLock(os.path.join(lock_dir, lock_file_name), mode=0o666) return lock diff --git a/tools/check_python_src_init.py b/tools/check_python_src_init.py index 2c7e0a91..ab0fa21f 100644 --- a/tools/check_python_src_init.py +++ b/tools/check_python_src_init.py @@ -36,8 +36,8 @@ def check_init_file_in_package(directory): return False # If any .py file exists, we expect an __init__.py - if any(f.endswith('.py') for f in files): - init_file = os.path.join(directory, '__init__.py') + if any(f.endswith(".py") for f in files): + init_file = os.path.join(directory, "__init__.py") if not os.path.isfile(init_file): return False return True @@ -62,9 +62,7 @@ def main(): all_missing.update(missing) if all_missing: - print( - "āŒ Missing '__init__.py' files in the following Python package directories:" - ) + print("āŒ Missing '__init__.py' files in the following Python package directories:") for pkg in sorted(all_missing): print(f" - {pkg}") sys.exit(1) diff --git a/tools/enforce_regex_import.py b/tools/enforce_regex_import.py index 92e6f79e..896e1ad0 100644 --- a/tools/enforce_regex_import.py +++ b/tools/enforce_regex_import.py @@ -24,39 +24,33 @@ from pathlib import Path import regex as re -FORBIDDEN_PATTERNS = re.compile( - r'^\s*(?:import\s+re(?:$|\s|,)|from\s+re\s+import)') +FORBIDDEN_PATTERNS = re.compile(r"^\s*(?:import\s+re(?:$|\s|,)|from\s+re\s+import)") ALLOWED_PATTERNS = [ - re.compile(r'^\s*import\s+regex\s+as\s+re\s*$'), - re.compile(r'^\s*import\s+regex\s*$'), + re.compile(r"^\s*import\s+regex\s+as\s+re\s*$"), + re.compile(r"^\s*import\s+regex\s*$"), ] def get_staged_python_files() -> list[str]: try: result = subprocess.run( - ['git', 'diff', '--cached', '--name-only', '--diff-filter=AM'], - capture_output=True, - text=True, - check=True) - files = result.stdout.strip().split( - '\n') if result.stdout.strip() else [] - return [f for f in files if f.endswith('.py')] + ["git", "diff", "--cached", "--name-only", "--diff-filter=AM"], capture_output=True, text=True, check=True + ) + files = result.stdout.strip().split("\n") if result.stdout.strip() else [] + return [f for f in files if f.endswith(".py")] except subprocess.CalledProcessError: return [] def is_forbidden_import(line: str) -> bool: line = line.strip() - return bool( - FORBIDDEN_PATTERNS.match(line) - and not any(pattern.match(line) for pattern in ALLOWED_PATTERNS)) + return bool(FORBIDDEN_PATTERNS.match(line) and not any(pattern.match(line) for pattern in ALLOWED_PATTERNS)) def check_file(filepath: str) -> list[tuple[int, str]]: violations = [] try: - with open(filepath, encoding='utf-8') as f: + with open(filepath, encoding="utf-8") as f: for line_num, line in enumerate(f, 1): if is_forbidden_import(line): violations.append((line_num, line.strip())) @@ -89,9 +83,7 @@ def main() -> int: if total_violations > 0: print(f"\nšŸ’” Found {total_violations} violation(s).") print("āŒ Please replace 'import re' with 'import regex as re'") - print( - " Also replace 'from re import ...' with 'from regex import ...'" - ) # noqa: E501 + print(" Also replace 'from re import ...' with 'from regex import ...'") # noqa: E501 print("āœ… Allowed imports:") print(" - import regex as re") print(" - import regex") # noqa: E501 diff --git a/tools/format_contributors.py b/tools/format_contributors.py index 84dc4d19..93d5d9e1 100644 --- a/tools/format_contributors.py +++ b/tools/format_contributors.py @@ -20,9 +20,7 @@ import re import sys from datetime import datetime -p = re.compile( - r'@(?P[A-Za-z0-9-_]+)[^\`]*\`(?P[0-9a-fA-F]+)\`\s*[-–—]\s*(?P.+)$' -) +p = re.compile(r"@(?P[A-Za-z0-9-_]+)[^\`]*\`(?P[0-9a-fA-F]+)\`\s*[-–—]\s*(?P.+)$") def parse_lines(lines): @@ -34,9 +32,9 @@ def parse_lines(lines): m = p.search(ln) if not m: continue - user = m.group('user') - sha = m.group('sha') - datestr = m.group('date').strip() + user = m.group("user") + sha = m.group("sha") + datestr = m.group("date").strip() try: dt = datetime.fromisoformat(datestr) except Exception: @@ -51,27 +49,17 @@ def parse_lines(lines): def main(): ap = argparse.ArgumentParser( - description= - "Format and sort contributor lines by date (newest first). Outputs markdown table by default." + description="Format and sort contributor lines by date (newest first). Outputs markdown table by default." ) ap.add_argument( - 'file', - nargs='?', - help= - 'input file (default stdin), output from collect_user_first_contribution.sh' + "file", nargs="?", help="input file (default stdin), output from collect_user_first_contribution.sh" ) - ap.add_argument( - '--start', - type=int, - default=1, - help='minimum number for table (oldest row will have this number)') - ap.add_argument('--repo', - default='vllm-project/vllm-ascend', - help='repo used for commit links') + ap.add_argument("--start", type=int, default=1, help="minimum number for table (oldest row will have this number)") + ap.add_argument("--repo", default="vllm-project/vllm-ascend", help="repo used for commit links") args = ap.parse_args() if args.file: - with open(args.file, 'r', encoding='utf-8') as f: + with open(args.file, encoding="utf-8") as f: lines = f.readlines() else: lines = sys.stdin.readlines() @@ -88,9 +76,9 @@ def main(): for dt, user, sha, datestr in items: short = sha[:7] date_short = dt.strftime("%Y/%m/%d") - print( - f"| {n} | [@{user}](https://github.com/{user}) | {date_short} | [{short}](https://github.com/{args.repo}/commit/{sha}) |" - ) + user_url = f"https://github.com/{user}" + commit_url = f"https://github.com/{args.repo}/commit/{sha}" + print(f"| {n} | [@{user}]({user_url}) | {date_short} | [{short}]({commit_url}) |") n -= 1 diff --git a/tools/send_mm_request.py b/tools/send_mm_request.py index 61551200..8b3033ab 100644 --- a/tools/send_mm_request.py +++ b/tools/send_mm_request.py @@ -4,39 +4,30 @@ import os import requests from modelscope import snapshot_download # type: ignore -mm_dir = snapshot_download("vllm-ascend/mm_request", repo_type='dataset') +mm_dir = snapshot_download("vllm-ascend/mm_request", repo_type="dataset") image_path = os.path.join(mm_dir, "test_mm2.jpg") -with open(image_path, 'rb') as image_file: - image_data = base64.b64encode(image_file.read()).decode('utf-8') +with open(image_path, "rb") as image_file: + image_data = base64.b64encode(image_file.read()).decode("utf-8") data = { - "messages": [{ - "role": - "user", - "content": [{ - "type": "text", - "text": "What is the content of this image?" - }, { - "type": "image_url", - "image_url": { - "url": f"data:image/jpeg;base64,{image_data}" - } - }] - }], + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": "What is the content of this image?"}, + {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}}, + ], + } + ], "eos_token_id": [1, 106], - "pad_token_id": - 0, - "top_k": - 64, - "top_p": - 0.95, - "max_tokens": - 8192, - "stream": - False + "pad_token_id": 0, + "top_k": 64, + "top_p": 0.95, + "max_tokens": 8192, + "stream": False, } -headers = {'Accept': 'application/json', 'Content-Type': 'application/json'} +headers = {"Accept": "application/json", "Content-Type": "application/json"} def send_image_request(model, server): diff --git a/tools/send_request.py b/tools/send_request.py index faad3156..3edc12f8 100644 --- a/tools/send_request.py +++ b/tools/send_request.py @@ -20,10 +20,12 @@ def send_v1_completions(prompt, model, server, request_args=None): def send_v1_chat_completions(prompt, model, server, request_args=None): data: dict[str, Any] = { "model": model, - "messages": [{ - "role": "user", - "content": prompt, - }], + "messages": [ + { + "role": "user", + "content": prompt, + } + ], } if request_args: data.update(request_args) diff --git a/tools/vllm_bench.py b/tools/vllm_bench.py index b460e191..eab096a6 100644 --- a/tools/vllm_bench.py +++ b/tools/vllm_bench.py @@ -24,42 +24,58 @@ from .aisbench import maybe_download_from_modelscope class VllmbenchRunner: - def _run_vllm_bench_task(self): vllm_bench_cmd = [ - 'vllm', 'bench', 'serve', '--backend', 'openai-chat', - '--trust-remote-code', '--served-model-name', - str(self.model_name), '--model', self.model_path, '--tokenizer', - self.model_path, '--metric-percentiles', '50,90,99', '--host', - self.host_ip, '--port', - str(self.port), '--save-result', '--result-filename', - self.result_filename, '--endpoint', '/v1/chat/completions', - '--ready-check-timeout-sec', '0' + "vllm", + "bench", + "serve", + "--backend", + "openai-chat", + "--trust-remote-code", + "--served-model-name", + str(self.model_name), + "--model", + self.model_path, + "--tokenizer", + self.model_path, + "--metric-percentiles", + "50,90,99", + "--host", + self.host_ip, + "--port", + str(self.port), + "--save-result", + "--result-filename", + self.result_filename, + "--endpoint", + "/v1/chat/completions", + "--ready-check-timeout-sec", + "0", ] self._concat_config_args(vllm_bench_cmd) print(f"running vllm_bench cmd: {' '.join(vllm_bench_cmd)}") - self.proc: subprocess.Popen = subprocess.Popen(vllm_bench_cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True) + self.proc: subprocess.Popen = subprocess.Popen( + vllm_bench_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + ) - def __init__(self, - model_name: str, - port: int, - config: dict, - baseline: float, - threshold: float = 0.97, - model_path: str = "", - host_ip: str = "localhost"): + def __init__( + self, + model_name: str, + port: int, + config: dict, + baseline: float, + threshold: float = 0.97, + model_path: str = "", + host_ip: str = "localhost", + ): self.model_name = model_name self.model_path = model_path if not self.model_path: self.model_path = maybe_download_from_modelscope(model_name) - assert self.model_path is not None, \ - f"Failed to download model: model={self.model_path}" + assert self.model_path is not None, f"Failed to download model: model={self.model_path}" self.port = port self.host_ip = host_ip - curr_time = datetime.now().strftime('%Y%m%d%H%M%S') + curr_time = datetime.now().strftime("%Y%m%d%H%M%S") self.result_filename = f"result_vllm_bench_{curr_time}.json" self.config = config self.baseline = baseline @@ -96,19 +112,14 @@ class VllmbenchRunner: stdout, stderr = self.proc.communicate() if self.proc.returncode != 0: - logging.error( - f"vllm bench command failed, return code: {self.proc.returncode}" - ) + logging.error(f"vllm bench command failed, return code: {self.proc.returncode}") logging.error(f"Standard output: {stdout}") logging.error(f"Standard error: {stderr}") - raise RuntimeError( - f"vllm bench command execution failed: {stderr}") + raise RuntimeError(f"vllm bench command execution failed: {stderr}") - logging.info( - f"vllm bench command completed, return code: {self.proc.returncode}" - ) + logging.info(f"vllm bench command completed, return code: {self.proc.returncode}") if stdout: - lines = stdout.split('\n') + lines = stdout.split("\n") last_lines = lines[-100:] if len(lines) > 100 else lines logging.info(f"Last {len(last_lines)} lines of standard output:") for line in last_lines: @@ -119,36 +130,28 @@ class VllmbenchRunner: def _get_result(self): result_file = os.path.join(os.getcwd(), self.result_filename) print("Getting performance results from file: ", result_file) - with open(result_file, 'r', encoding='utf-8') as f: + with open(result_file, encoding="utf-8") as f: self.result = json.load(f) def _performance_verify(self): self._get_result() output_throughput = self.result["output_throughput"] - assert float( - output_throughput - ) >= self.baseline * self.threshold, f"Performance verification failed. The current Output Token Throughput is {output_throughput} token/s, which is not greater than or equal to {self.threshold} * baseline {self.baseline}." + assert float(output_throughput) >= self.baseline * self.threshold, ( + "Performance verification failed. " + f"The current Output Token Throughput is {output_throughput} token/s, " + f"which is not greater than or equal to {self.threshold} * baseline {self.baseline}." + ) -def run_vllm_bench_case(model_name, - port, - config, - baseline, - threshold=0.97, - model_path="", - host_ip="localhost"): +def run_vllm_bench_case(model_name, port, config, baseline, threshold=0.97, model_path="", host_ip="localhost"): try: - with VllmbenchRunner(model_name, - port, - config, - baseline, - threshold, - model_path=model_path, - host_ip=host_ip) as vllm_bench: + with VllmbenchRunner( + model_name, port, config, baseline, threshold, model_path=model_path, host_ip=host_ip + ) as vllm_bench: vllm_bench_result = vllm_bench.result except Exception as e: print(e) error_msg = f"vllm_bench run failed, reason is {e}" logging.error(error_msg) - assert False, f"vllm_bench run failed, reason is {e}" + raise RuntimeError(error_msg) from e return vllm_bench_result