diff --git a/tools/aisbench.py b/tools/aisbench.py index 9f37f126..2dc13b4d 100644 --- a/tools/aisbench.py +++ b/tools/aisbench.py @@ -28,7 +28,7 @@ import huggingface_hub import pandas as pd from modelscope import snapshot_download # type: ignore -BENCHMARK_HOME = os.getenv("BENCHMARK_HOME", os.path.abspath(".")) +BENCHMARK_HOME = os.getenv("BENCHMARK_HOME", os.path.abspath("./benchmark")) DATASET_CONF_DIR = os.path.join(BENCHMARK_HOME, "ais_bench", "benchmark", "configs", "datasets") REQUEST_CONF_DIR = os.path.join(BENCHMARK_HOME, "ais_bench", "benchmark", @@ -74,9 +74,13 @@ class AisbenchRunner: host_ip: str = "localhost", verify=True): self.model = model - self.dataset_path = maybe_download_from_modelscope( - aisbench_config["dataset_path"], repo_type="dataset") - self.model_path = maybe_download_from_modelscope(model) + self.dataset_path = aisbench_config.get("dataset_path_local") + if not self.dataset_path: + self.dataset_path = maybe_download_from_modelscope( + aisbench_config["dataset_path"], repo_type="dataset") + self.model_path = aisbench_config.get("model_path") + if not self.model_path: + self.model_path = maybe_download_from_modelscope(model) assert self.dataset_path is not None and self.model_path is not None, \ f"Failed to download dataset or model: dataset={self.dataset_path}, model={self.model_path}" self.port = port diff --git a/tools/vllm_bench.py b/tools/vllm_bench.py new file mode 100644 index 00000000..6cfcc2be --- /dev/null +++ b/tools/vllm_bench.py @@ -0,0 +1,125 @@ +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# Copyright 2023 The vLLM team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# +import json +import logging +import os +import subprocess +from datetime import datetime + +from .aisbench import maybe_download_from_modelscope + + +class VllmbenchRunner: + + def _run_vllm_bench_task(self): + vllm_bench_cmd = [ + 'vllm', 'bench', 'serve', '--backend', 'openai-chat', + '--trust-remote-code', '--served-model-name', + str(self.model_name), '--model', self.model_path, '--tokenizer', + self.model_path, '--metric-percentiles', '50,90,99', '--host', + self.host_ip, '--port', + str(self.port), '--save-result', '--result-filename', + self.result_filename, '--endpoint', '/v1/chat/completions', + '--ready-check-timeout-sec', '0' + ] + self._concat_config_args(vllm_bench_cmd) + print(f"running vllm_bench cmd: {' '.join(vllm_bench_cmd)}") + self.proc: subprocess.Popen = subprocess.Popen(vllm_bench_cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True) + + def __init__(self, + model_name: str, + port: int, + config: dict, + model_path: str = "", + host_ip: str = "localhost"): + self.model_name = model_name + self.model_path = model_path + if not self.model_path: + self.model_path = maybe_download_from_modelscope(model_name) + assert self.model_path is not None, \ + f"Failed to download model: model={self.model_path}" + self.port = port + self.host_ip = host_ip + curr_time = datetime.now().strftime('%Y%m%d%H%M%S') + self.result_filename = f"result_vllm_bench_{curr_time}.json" + self.config = config + + self._run_vllm_bench_task() + self._wait_for_task() + self._get_result() + + def _concat_config_args(self, vllm_bench_cmd): + if "ignore_eos" in self.config: + if self.config["ignore_eos"]: + self.config["ignore_eos"] = "" + else: + self.config.pop("ignore_eos") + for key, value in self.config.items(): + key = "--" + key.replace("_", "-") + vllm_bench_cmd += [key, str(value)] + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.proc.terminate() + try: + self.proc.wait(8) + except subprocess.TimeoutExpired: + # force kill if needed + self.proc.kill() + + def _wait_for_task(self): + result_msg = "=========================" + while True: + line = self.proc.stdout.readline().strip() + if line: + print(line) + if result_msg in line: + return + if "ERROR" in line: + error_msg = f"Some errors happened to vllm_bench runtime, the first error is {line}" + raise RuntimeError(error_msg) from None + + def _get_result(self): + result_file = os.path.join(os.getcwd(), self.result_filename) + print("Getting performance results from file: ", result_file) + with open(result_file, 'r', encoding='utf-8') as f: + self.result = json.load(f) + + +def run_vllm_bench_case(model_name, + port, + config, + model_path="", + host_ip="localhost"): + try: + with VllmbenchRunner(model_name, + port, + config, + model_path=model_path, + host_ip=host_ip) as vllm_bench: + vllm_bench_result = vllm_bench.result + except Exception as e: + print(e) + error_msg = f"vllm_bench run failed, reason is {e}" + logging.error(error_msg) + assert False, f"vllm_bench run failed, reason is {e}" + return vllm_bench_result