init
This commit is contained in:
1
transformers/benchmark/.gitignore
vendored
Normal file
1
transformers/benchmark/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
benchmark_results/
|
||||
49
transformers/benchmark/README.md
Normal file
49
transformers/benchmark/README.md
Normal file
@@ -0,0 +1,49 @@
|
||||
# Benchmarks
|
||||
|
||||
You might want to add new benchmarks.
|
||||
|
||||
You will need to define a python function named `run_benchmark` in your python file and the file must be located in this `benchmark/` directory.
|
||||
|
||||
The expected function signature is the following:
|
||||
|
||||
```py
|
||||
def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
|
||||
```
|
||||
|
||||
## Writing metrics to the database
|
||||
|
||||
`MetricsRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements.
|
||||
|
||||
cf [`llama.py`](./llama.py) to see an example of this in practice.
|
||||
|
||||
```py
|
||||
from benchmarks_entrypoint import MetricsRecorder
|
||||
import psycopg2
|
||||
|
||||
def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
|
||||
metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg)
|
||||
benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
|
||||
# To collect device measurements
|
||||
metrics_recorder.collect_device_measurements(
|
||||
benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
|
||||
)
|
||||
# To collect your model measurements
|
||||
metrics_recorder.collect_model_measurements(
|
||||
benchmark_id,
|
||||
{
|
||||
"model_load_time": model_load_time,
|
||||
"first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
|
||||
"second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
|
||||
"first_eager_generate_time_secs": first_eager_generate_time,
|
||||
"second_eager_generate_time_secs": second_eager_generate_time,
|
||||
"time_to_first_token_secs": time_to_first_token,
|
||||
"time_to_second_token_secs": time_to_second_token,
|
||||
"time_to_third_token_secs": time_to_third_token,
|
||||
"time_to_next_token_mean_secs": mean_time_to_next_token,
|
||||
"first_compile_generate_time_secs": first_compile_generate_time,
|
||||
"second_compile_generate_time_secs": second_compile_generate_time,
|
||||
"third_compile_generate_time_secs": third_compile_generate_time,
|
||||
"fourth_compile_generate_time_secs": fourth_compile_generate_time,
|
||||
},
|
||||
)
|
||||
```
|
||||
0
transformers/benchmark/__init__.py
Normal file
0
transformers/benchmark/__init__.py
Normal file
354
transformers/benchmark/benches/llama.py
Normal file
354
transformers/benchmark/benches/llama.py
Normal file
@@ -0,0 +1,354 @@
|
||||
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import os
|
||||
import sys
|
||||
from logging import Logger
|
||||
from threading import Event, Thread
|
||||
from time import perf_counter, sleep
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# Add the parent directory to Python path to import benchmarks_entrypoint
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
import gpustat
|
||||
import psutil
|
||||
import psycopg2
|
||||
from benchmarks_entrypoint import MetricsRecorder
|
||||
|
||||
|
||||
# Optional heavy ML dependencies - only required when actually running the benchmark
|
||||
try:
|
||||
import torch
|
||||
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, StaticCache
|
||||
|
||||
TRANSFORMERS_AVAILABLE = True
|
||||
except ImportError:
|
||||
TRANSFORMERS_AVAILABLE = False
|
||||
torch = None
|
||||
AutoModelForCausalLM = None
|
||||
AutoTokenizer = None
|
||||
GenerationConfig = None
|
||||
StaticCache = None
|
||||
|
||||
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "1"
|
||||
|
||||
# Only set torch precision if torch is available
|
||||
if TRANSFORMERS_AVAILABLE:
|
||||
torch.set_float32_matmul_precision("high")
|
||||
|
||||
|
||||
def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder):
|
||||
p = psutil.Process(os.getpid())
|
||||
while not continue_metric_collection.is_set():
|
||||
with p.oneshot():
|
||||
cpu_util = p.cpu_percent()
|
||||
mem_megabytes = p.memory_info().rss / (1024 * 1024)
|
||||
gpu_stats = gpustat.GPUStatCollection.new_query()
|
||||
gpu_util = gpu_stats[0]["utilization.gpu"]
|
||||
gpu_mem_megabytes = gpu_stats[0]["memory.used"]
|
||||
metrics_recorder.collect_device_measurements(
|
||||
benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
|
||||
)
|
||||
sleep(0.01)
|
||||
|
||||
|
||||
def run_benchmark(
|
||||
logger: Logger,
|
||||
repository: str,
|
||||
branch: str,
|
||||
commit_id: str,
|
||||
commit_msg: str,
|
||||
metrics_recorder=None,
|
||||
num_tokens_to_generate=100,
|
||||
):
|
||||
# Check if required ML dependencies are available
|
||||
if not TRANSFORMERS_AVAILABLE:
|
||||
logger.error("Transformers and torch are required to run the LLaMA benchmark. Please install them with:")
|
||||
logger.error("pip install torch transformers")
|
||||
logger.error("Skipping LLaMA benchmark due to missing dependencies.")
|
||||
return
|
||||
|
||||
continue_metric_collection = Event()
|
||||
metrics_thread = None
|
||||
model_id = "meta-llama/Llama-2-7b-hf"
|
||||
|
||||
# If no metrics_recorder is provided, create one for backward compatibility
|
||||
if metrics_recorder is None:
|
||||
try:
|
||||
metrics_recorder = MetricsRecorder(
|
||||
psycopg2.connect("dbname=metrics"), logger, repository, branch, commit_id, commit_msg, True
|
||||
)
|
||||
should_close_recorder = True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create metrics recorder: {e}")
|
||||
return
|
||||
else:
|
||||
should_close_recorder = False
|
||||
try:
|
||||
gpu_stats = gpustat.GPUStatCollection.new_query()
|
||||
gpu_name = gpu_stats[0]["name"]
|
||||
benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
|
||||
logger.info(f"running benchmark #{benchmark_id} on {gpu_name} for {model_id}")
|
||||
metrics_thread = Thread(
|
||||
target=collect_metrics,
|
||||
args=[benchmark_id, continue_metric_collection, metrics_recorder],
|
||||
)
|
||||
metrics_thread.start()
|
||||
logger.info("started background thread to fetch device metrics")
|
||||
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false" # silence warnings when compiling
|
||||
|
||||
device = "cuda"
|
||||
|
||||
logger.info("downloading weights")
|
||||
# This is to avoid counting download in model load time measurement
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.float16)
|
||||
gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1)
|
||||
logger.info("loading model")
|
||||
start = perf_counter()
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_id, dtype=torch.float16, generation_config=gen_config
|
||||
).eval()
|
||||
model.to(device)
|
||||
torch.cuda.synchronize()
|
||||
end = perf_counter()
|
||||
model_load_time = end - start
|
||||
logger.info(f"loaded model in: {model_load_time}s")
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
|
||||
prompt = "Why dogs are so cute?"
|
||||
inputs = tokenizer(prompt, return_tensors="pt").to(device)
|
||||
|
||||
# Specify the max length (including both the prompt and the response)
|
||||
# When calling `generate` with `cache_implementation="static" later, this is also used to create a `StaticCache` object
|
||||
# with sequence length = `max_length`. The longer the more you will re-use it
|
||||
seq_length = inputs["input_ids"].shape[1]
|
||||
model.generation_config.max_length = seq_length + num_tokens_to_generate
|
||||
batch_size = inputs["input_ids"].shape[0]
|
||||
|
||||
# Copied from the gpt-fast repo
|
||||
def multinomial_sample_one_no_sync(probs_sort): # Does multinomial sampling without a cuda synchronization
|
||||
q = torch.empty_like(probs_sort).exponential_(1)
|
||||
return torch.argmax(probs_sort / q, dim=-1, keepdim=True).to(dtype=torch.int)
|
||||
|
||||
def logits_to_probs(logits, temperature: float = 1.0, top_k: Optional[int] = None):
|
||||
logits = logits / max(temperature, 1e-5)
|
||||
|
||||
if top_k is not None:
|
||||
v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
|
||||
pivot = v.select(-1, -1).unsqueeze(-1)
|
||||
logits = torch.where(logits < pivot, -float("Inf"), logits)
|
||||
probs = torch.nn.functional.softmax(logits, dim=-1)
|
||||
return probs
|
||||
|
||||
def sample(logits, temperature: float = 1.0, top_k: Optional[int] = None):
|
||||
probs = logits_to_probs(logits[0, -1], temperature, top_k)
|
||||
idx_next = multinomial_sample_one_no_sync(probs)
|
||||
return idx_next, probs
|
||||
|
||||
# First eager forward pass
|
||||
logger.info("running first eager forward pass")
|
||||
start = perf_counter()
|
||||
_ = model(**inputs)
|
||||
torch.cuda.synchronize()
|
||||
end = perf_counter()
|
||||
first_eager_fwd_pass_time = end - start
|
||||
logger.info(f"completed first eager forward pass in: {first_eager_fwd_pass_time}s")
|
||||
|
||||
# Second eager forward pass (should be faster)
|
||||
logger.info("running second eager forward pass")
|
||||
start = perf_counter()
|
||||
_ = model(**inputs)
|
||||
torch.cuda.synchronize()
|
||||
end = perf_counter()
|
||||
second_eager_fwd_pass_time = end - start
|
||||
logger.info(f"completed second eager forward pass in: {second_eager_fwd_pass_time}s")
|
||||
|
||||
# First eager generation
|
||||
logger.info("running first eager generation")
|
||||
start = perf_counter()
|
||||
output = model.generate(**inputs)
|
||||
torch.cuda.synchronize()
|
||||
end = perf_counter()
|
||||
first_eager_generate_time = end - start
|
||||
logger.info(f"completed first eager generation in: {first_eager_generate_time}s")
|
||||
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
|
||||
|
||||
# Second eager generation (should be faster)
|
||||
logger.info("running second eager generation")
|
||||
start = perf_counter()
|
||||
output = model.generate(**inputs)
|
||||
torch.cuda.synchronize()
|
||||
end = perf_counter()
|
||||
second_eager_generate_time = end - start
|
||||
logger.info(f"completed second eager generation in: {second_eager_generate_time}s")
|
||||
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
|
||||
|
||||
logger.info("running generation timing loop")
|
||||
|
||||
input_pos = torch.arange(0, seq_length, device=device)
|
||||
inputs = inputs["input_ids"]
|
||||
|
||||
start = perf_counter()
|
||||
with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
|
||||
logits = model(inputs, position_ids=input_pos).logits
|
||||
next_token, probs = sample(logits, temperature=0.6, top_k=5)
|
||||
torch.cuda.synchronize()
|
||||
end = perf_counter()
|
||||
time_to_first_token = end - start
|
||||
|
||||
input_pos = torch.tensor([seq_length], device=device, dtype=torch.int)
|
||||
next_token = next_token.clone()
|
||||
start = perf_counter()
|
||||
with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
|
||||
logits = model(next_token, position_ids=input_pos).logits
|
||||
next_token, probs = sample(logits, temperature=0.6, top_k=5)
|
||||
torch.cuda.synchronize()
|
||||
end = perf_counter()
|
||||
time_to_second_token = end - start
|
||||
|
||||
input_pos = torch.tensor([seq_length + 1], device=device, dtype=torch.int)
|
||||
next_token = next_token.clone()
|
||||
start = perf_counter()
|
||||
with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
|
||||
logits = model(next_token, position_ids=input_pos).logits
|
||||
next_token, probs = sample(logits, temperature=0.6, top_k=5)
|
||||
torch.cuda.synchronize()
|
||||
end = perf_counter()
|
||||
time_to_third_token = end - start
|
||||
|
||||
logger.info("running longer generation timing loop")
|
||||
|
||||
total_time = 0
|
||||
for i in range(20):
|
||||
input_pos = torch.tensor([seq_length + 2 + i], device=device, dtype=torch.int)
|
||||
next_token = next_token.clone()
|
||||
start = perf_counter()
|
||||
with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
|
||||
logits = model(next_token, position_ids=input_pos).logits
|
||||
next_token, probs = sample(logits, temperature=0.6, top_k=5)
|
||||
torch.cuda.synchronize()
|
||||
end = perf_counter()
|
||||
total_time += end - start
|
||||
|
||||
mean_time_to_next_token = total_time / 20
|
||||
|
||||
logger.info("running compilation benchmarks")
|
||||
|
||||
# Now compile the model
|
||||
model = torch.compile(model, mode="max-autotune", fullgraph=True)
|
||||
|
||||
# StaticCache for generation
|
||||
with torch.device(device):
|
||||
model.setup_caches(max_batch_size=batch_size, max_seq_len=seq_length + num_tokens_to_generate)
|
||||
|
||||
input_pos = torch.arange(0, seq_length, device=device)
|
||||
inputs = tokenizer(prompt, return_tensors="pt").to(device)["input_ids"]
|
||||
|
||||
logger.info("compiling model")
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.float16, generation_config=gen_config)
|
||||
model.to(device)
|
||||
model = torch.compile(model, mode="max-autotune", fullgraph=True)
|
||||
|
||||
past_key_values = StaticCache(
|
||||
model.config,
|
||||
max_batch_size=batch_size,
|
||||
device=device,
|
||||
dtype=torch.float16,
|
||||
max_cache_len=seq_length + 128,
|
||||
)
|
||||
# 1st call
|
||||
start = perf_counter()
|
||||
output = model.generate(**inputs, past_key_values=past_key_values)
|
||||
end = perf_counter()
|
||||
first_compile_generate_time = end - start
|
||||
logger.info(f"completed first compile generation in: {first_compile_generate_time}s")
|
||||
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
|
||||
|
||||
past_key_values = StaticCache(
|
||||
model.config,
|
||||
max_batch_size=batch_size,
|
||||
device=device,
|
||||
dtype=torch.float16,
|
||||
max_cache_len=seq_length + 128,
|
||||
)
|
||||
# 2nd call
|
||||
start = perf_counter()
|
||||
output = model.generate(**inputs, past_key_values=past_key_values)
|
||||
end = perf_counter()
|
||||
second_compile_generate_time = end - start
|
||||
logger.info(f"completed second compile generation in: {second_compile_generate_time}s")
|
||||
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
|
||||
|
||||
past_key_values = StaticCache(
|
||||
model.config,
|
||||
max_batch_size=batch_size,
|
||||
device=device,
|
||||
dtype=torch.float16,
|
||||
max_cache_len=seq_length + 128,
|
||||
)
|
||||
# 3rd call
|
||||
start = perf_counter()
|
||||
output = model.generate(**inputs, past_key_values=past_key_values)
|
||||
end = perf_counter()
|
||||
third_compile_generate_time = end - start
|
||||
logger.info(f"completed third compile generation in: {third_compile_generate_time}s")
|
||||
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
|
||||
|
||||
past_key_values = StaticCache(
|
||||
model.config,
|
||||
max_batch_size=batch_size,
|
||||
device=device,
|
||||
dtype=torch.float16,
|
||||
max_cache_len=seq_length + 128,
|
||||
)
|
||||
# 4th call
|
||||
start = perf_counter()
|
||||
output = model.generate(**inputs, past_key_values=past_key_values)
|
||||
end = perf_counter()
|
||||
fourth_compile_generate_time = end - start
|
||||
logger.info(f"completed fourth compile generation in: {fourth_compile_generate_time}s")
|
||||
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
|
||||
|
||||
metrics_recorder.collect_model_measurements(
|
||||
benchmark_id,
|
||||
{
|
||||
"model_load_time": model_load_time,
|
||||
"first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
|
||||
"second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
|
||||
"first_eager_generate_time_secs": first_eager_generate_time,
|
||||
"second_eager_generate_time_secs": second_eager_generate_time,
|
||||
"time_to_first_token_secs": time_to_first_token,
|
||||
"time_to_second_token_secs": time_to_second_token,
|
||||
"time_to_third_token_secs": time_to_third_token,
|
||||
"time_to_next_token_mean_secs": mean_time_to_next_token,
|
||||
"first_compile_generate_time_secs": first_compile_generate_time,
|
||||
"second_compile_generate_time_secs": second_compile_generate_time,
|
||||
"third_compile_generate_time_secs": third_compile_generate_time,
|
||||
"fourth_compile_generate_time_secs": fourth_compile_generate_time,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Caught exception: {e}")
|
||||
continue_metric_collection.set()
|
||||
if metrics_thread is not None:
|
||||
metrics_thread.join()
|
||||
|
||||
# Only close the recorder if we created it locally
|
||||
if should_close_recorder:
|
||||
metrics_recorder.close()
|
||||
324
transformers/benchmark/benchmark.py
Normal file
324
transformers/benchmark/benchmark.py
Normal file
@@ -0,0 +1,324 @@
|
||||
# Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Run benchmark using the `optimum-benchmark` library with some customization in `transformers`.
|
||||
|
||||
Assume we are under `transformers` root directory: (make sure the commits are valid commits)
|
||||
```bash
|
||||
python benchmark/benchmark.py --config-dir benchmark/config --config-name generation --commit=9b9c7f03da625b13643e99205c691fe046461724 --metrics=decode.latency.mean,per_token.latency.mean,per_token.throughput.value backend.model=google/gemma-2b benchmark.input_shapes.sequence_length=5,7 benchmark.input_shapes.batch_size=1,2 --multirun
|
||||
```
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import json
|
||||
import os.path
|
||||
import re
|
||||
import tempfile
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
|
||||
from git import Repo
|
||||
from huggingface_hub import HfApi
|
||||
from optimum_benchmark import Benchmark
|
||||
from optimum_benchmark_wrapper import main
|
||||
|
||||
|
||||
PATH_TO_REPO = Path(__file__).parent.parent.resolve()
|
||||
|
||||
|
||||
@contextmanager
|
||||
def checkout_commit(repo: Repo, commit_id: str):
|
||||
"""
|
||||
Context manager that checks out a given commit when entered, but gets back to the reference it was at on exit.
|
||||
Args:
|
||||
repo (`git.Repo`): A git repository (for instance the Transformers repo).
|
||||
commit_id (`str`): The commit reference to checkout inside the context manager.
|
||||
"""
|
||||
current_head = repo.head.commit if repo.head.is_detached else repo.head.ref
|
||||
|
||||
try:
|
||||
repo.git.checkout(commit_id)
|
||||
yield
|
||||
|
||||
finally:
|
||||
repo.git.checkout(current_head)
|
||||
|
||||
|
||||
def summarize(run_dir, metrics, expand_metrics=False):
|
||||
"""Produce a summary for each optimum-benchmark launched job's output directory found in `run_dir`.
|
||||
|
||||
Each summary's format is as follows (for `expand_metrics=False`):
|
||||
```
|
||||
{
|
||||
"model": "google/gemma-2b",
|
||||
"commit": "3cd6ed22e4d49219f300f5055e71e3929aba20d7",
|
||||
"config": "benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5",
|
||||
"metrics": {
|
||||
"decode.latency.mean": 1.624666809082031,
|
||||
"per_token.latency.mean": 0.012843788806628804,
|
||||
"per_token.throughput.value": 77.85864553330948
|
||||
}
|
||||
}
|
||||
```
|
||||
"""
|
||||
reports = glob.glob(os.path.join(run_dir, "**/benchmark_report.json"), recursive=True)
|
||||
report_dirs = [str(Path(report).parent) for report in reports]
|
||||
|
||||
summaries = []
|
||||
for report_dir in report_dirs:
|
||||
commit = re.search(r"/commit=([^/]+)", report_dir).groups()[0]
|
||||
|
||||
if not os.path.isfile(os.path.join(report_dir, "benchmark.json")):
|
||||
continue
|
||||
benchmark = Benchmark.from_json(os.path.join(report_dir, "benchmark.json"))
|
||||
report = benchmark.report
|
||||
|
||||
model = benchmark.config.backend["model"]
|
||||
|
||||
# This looks like `benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5`.
|
||||
# (we rely on the usage of hydra's `${hydra.job.override_dirname}`.)
|
||||
benchmark_name = re.sub(f"backend.model={model},*", "", report_dir)
|
||||
benchmark_name = str(Path(benchmark_name).parts[-1])
|
||||
if benchmark_name.startswith("commit="):
|
||||
benchmark_name = benchmark.config.name
|
||||
|
||||
metrics_values = {}
|
||||
# post-processing of report: show a few selected/important metric
|
||||
for metric in metrics:
|
||||
keys = metric.split(".")
|
||||
value = report.to_dict()
|
||||
current = metrics_values
|
||||
for key in keys:
|
||||
# Avoid KeyError when a user's specified metric has typo.
|
||||
# TODO: Give warnings.
|
||||
if key not in value:
|
||||
continue
|
||||
value = value[key]
|
||||
|
||||
if expand_metrics:
|
||||
if isinstance(value, dict):
|
||||
if key not in current:
|
||||
current[key] = {}
|
||||
current = current[key]
|
||||
else:
|
||||
current[key] = value
|
||||
|
||||
if not expand_metrics:
|
||||
metrics_values[metric] = value
|
||||
|
||||
# show some config information
|
||||
print(f"model: {model}")
|
||||
print(f"commit: {commit}")
|
||||
print(f"config: {benchmark_name}")
|
||||
if len(metrics_values) > 0:
|
||||
print("metrics:")
|
||||
if expand_metrics:
|
||||
print(metrics_values)
|
||||
else:
|
||||
for metric, value in metrics_values.items():
|
||||
print(f" - {metric}: {value}")
|
||||
print("-" * 80)
|
||||
|
||||
summary = {
|
||||
"model": model,
|
||||
"commit": commit,
|
||||
"config": benchmark_name,
|
||||
"metrics": metrics_values,
|
||||
}
|
||||
summaries.append(summary)
|
||||
|
||||
with open(os.path.join(report_dir, "summary.json"), "w") as fp:
|
||||
json.dump(summary, fp, indent=4)
|
||||
|
||||
return summaries
|
||||
|
||||
|
||||
def combine_summaries(summaries):
|
||||
"""Combine a list of summary obtained from the function `summarize`.
|
||||
|
||||
The combined summary's format is as follows:
|
||||
```
|
||||
"google/gemma-2b": {
|
||||
"benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5": {
|
||||
"3cd6ed22e4d49219f300f5055e71e3929aba20d7": {
|
||||
"metrics": {"decode.latency.mean": 1.624666809082031}
|
||||
},
|
||||
"c97ee28b117c0abe8e08891f402065e4df6d72aa": {
|
||||
"metrics": {"decode.latency.mean": 1.6278163452148438}
|
||||
}
|
||||
},
|
||||
"benchmark.input_shapes.batch_size=2,benchmark.input_shapes.sequence_length=5": {
|
||||
"3cd6ed22e4d49219f300f5055e71e3929aba20d7": {
|
||||
"metrics": {"decode.latency.mean": 1.6947791748046876}
|
||||
},
|
||||
"c97ee28b117c0abe8e08891f402065e4df6d72aa": {
|
||||
"metrics": {
|
||||
"decode.latency.mean": 1.6980519409179688}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
"""
|
||||
combined = {}
|
||||
for summary in summaries:
|
||||
model = summary["model"]
|
||||
config = summary["config"]
|
||||
commit = summary["commit"]
|
||||
|
||||
if model not in combined:
|
||||
combined[model] = {}
|
||||
|
||||
if config not in combined[model]:
|
||||
combined[model][config] = {}
|
||||
|
||||
if commit not in combined[model][config]:
|
||||
combined[model][config][commit] = {"metrics": summary["metrics"]}
|
||||
|
||||
with open(os.path.join(exp_run_dir, "summary.json"), "w") as fp:
|
||||
json.dump(combined, fp, indent=4)
|
||||
|
||||
print(json.dumps(combined, indent=4))
|
||||
|
||||
return combined
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
def list_str(values):
|
||||
return values.split(",")
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument("--config-dir", type=str, required=True, help="The path to the config directory.")
|
||||
parser.add_argument("--config-name", type=str, required=True, help="The config name.")
|
||||
|
||||
# arguments specific to this wrapper for our own customization
|
||||
parser.add_argument("--ensure_empty", type=bool, default=True, help="If to create a temporary directory.")
|
||||
parser.add_argument(
|
||||
"--commit",
|
||||
type=list_str,
|
||||
default="",
|
||||
help="Comma-separated list of branch names and/or commit sha values on which the benchmark will run. If `diff` is specified, it will run on both the current head and the `main` branch.",
|
||||
)
|
||||
parser.add_argument("--metrics", type=str, help="The metrics to be included in the summary.")
|
||||
|
||||
parser.add_argument("--repo_id", type=str, default=None, help="The repository to which the file will be uploaded.")
|
||||
parser.add_argument("--path_in_repo", type=str, default=None, help="Relative filepath in the repo.")
|
||||
parser.add_argument("--token", type=str, default=None, help="A valid user access token (string).")
|
||||
|
||||
args, optimum_benchmark_args = parser.parse_known_args()
|
||||
|
||||
repo = Repo(PATH_TO_REPO)
|
||||
|
||||
metrics = [
|
||||
"prefill.latency.mean",
|
||||
"prefill.throughput.value",
|
||||
"decode.latency.mean",
|
||||
"decode.throughput.value",
|
||||
"per_token.latency.mean",
|
||||
"per_token.throughput.value",
|
||||
]
|
||||
if args.metrics is not None:
|
||||
metrics = args.metrics.split(",")
|
||||
|
||||
# Get `backend.model` in a hacky way: We want to control the experiment flow manually.
|
||||
models = [""]
|
||||
for idx, arg in enumerate(optimum_benchmark_args):
|
||||
if arg.startswith("backend.model="):
|
||||
models = arg[len("backend.model=") :]
|
||||
models = models.split(",")
|
||||
break
|
||||
optimum_benchmark_args = [arg for arg in optimum_benchmark_args if not arg.startswith("backend.model=")]
|
||||
|
||||
# Get the commit(s)
|
||||
current_head = str(repo.head.commit) if repo.head.is_detached else str(repo.head.ref)
|
||||
commits = [x for x in args.commit if x != ""]
|
||||
if len(commits) == 0:
|
||||
commits = [current_head]
|
||||
elif len(commits) == 1 and commits[0] == "diff":
|
||||
# compare to `main`
|
||||
commits = ["main", current_head]
|
||||
|
||||
# Get the specified run directory
|
||||
run_dir_arg_idx, run_dir = -1, None
|
||||
sweep_dir_arg_idx, sweep_dir = -1, None
|
||||
for idx, arg in enumerate(optimum_benchmark_args):
|
||||
if arg.startswith("hydra.run.dir="):
|
||||
run_dir = arg[len("hydra.run.dir=") :]
|
||||
run_dir_arg_idx = idx
|
||||
elif arg.startswith("hydra.sweep.dir="):
|
||||
sweep_dir = arg[len("hydra.sweep.dir=") :]
|
||||
sweep_dir_arg_idx = idx
|
||||
exp_run_dir, arg_dix, arg_name = (
|
||||
(sweep_dir, sweep_dir_arg_idx, "hydra.sweep.dir")
|
||||
if "--multirun" in optimum_benchmark_args
|
||||
else (run_dir, run_dir_arg_idx, "hydra.run.dir")
|
||||
)
|
||||
|
||||
# TODO: not hardcoded
|
||||
if exp_run_dir is None and args.ensure_empty:
|
||||
exp_run_dir = "_benchmark"
|
||||
|
||||
if args.ensure_empty:
|
||||
os.makedirs(exp_run_dir, exist_ok=True)
|
||||
exp_run_dir = tempfile.mkdtemp(dir=exp_run_dir)
|
||||
|
||||
run_summaries = []
|
||||
for commit in commits:
|
||||
with checkout_commit(repo, commit):
|
||||
commit = str(repo.head.commit)
|
||||
|
||||
commit_run_dir = exp_run_dir
|
||||
if exp_run_dir is not None:
|
||||
commit_run_dir = os.path.join(exp_run_dir, rf"commit\={commit}")
|
||||
|
||||
print(f"Run benchmark on commit: {commit}")
|
||||
|
||||
for model in models:
|
||||
model_arg = [f"backend.model={model}"] if model != "" else []
|
||||
dir_args = []
|
||||
if commit_run_dir is not None:
|
||||
if arg_dix > -1:
|
||||
optimum_benchmark_args[arg_dix] = f"{arg_name}={commit_run_dir}"
|
||||
else:
|
||||
dir_args = [
|
||||
f"hydra.sweep.dir={commit_run_dir}",
|
||||
f"hydra.run.dir={commit_run_dir}/" + "${hydra.job.override_dirname}",
|
||||
]
|
||||
main(args.config_dir, args.config_name, model_arg + dir_args + optimum_benchmark_args)
|
||||
|
||||
if commit_run_dir is not None:
|
||||
# Need to remove the `\` character
|
||||
summaries = summarize(commit_run_dir.replace("\\", ""), metrics)
|
||||
run_summaries.extend(summaries)
|
||||
|
||||
# aggregate the information across the commits
|
||||
if exp_run_dir is not None:
|
||||
with open(os.path.join(exp_run_dir, "summaries.json"), "w") as fp:
|
||||
json.dump(run_summaries, fp, indent=4)
|
||||
|
||||
combined_summary = combine_summaries(run_summaries)
|
||||
|
||||
if args.repo_id is not None and args.path_in_repo is not None:
|
||||
# Upload to Hub
|
||||
api = HfApi()
|
||||
api.upload_folder(
|
||||
folder_path=exp_run_dir,
|
||||
path_in_repo=args.path_in_repo,
|
||||
repo_id=args.repo_id,
|
||||
repo_type="dataset",
|
||||
token=args.token,
|
||||
)
|
||||
502
transformers/benchmark/benchmarks_entrypoint.py
Normal file
502
transformers/benchmark/benchmarks_entrypoint.py
Normal file
@@ -0,0 +1,502 @@
|
||||
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import argparse
|
||||
import importlib.util
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
try:
|
||||
from psycopg2.extensions import register_adapter
|
||||
from psycopg2.extras import Json
|
||||
|
||||
register_adapter(dict, Json)
|
||||
PSYCOPG2_AVAILABLE = True
|
||||
except ImportError:
|
||||
PSYCOPG2_AVAILABLE = False
|
||||
|
||||
|
||||
class ImportModuleException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class MetricsRecorder:
|
||||
def __init__(
|
||||
self,
|
||||
connection,
|
||||
logger: logging.Logger,
|
||||
repository: str,
|
||||
branch: str,
|
||||
commit_id: str,
|
||||
commit_msg: str,
|
||||
collect_csv_data: bool = True,
|
||||
):
|
||||
self.conn = connection
|
||||
self.use_database = connection is not None
|
||||
if self.use_database:
|
||||
self.conn.autocommit = True
|
||||
self.logger = logger
|
||||
self.repository = repository
|
||||
self.branch = branch
|
||||
self.commit_id = commit_id
|
||||
self.commit_msg = commit_msg
|
||||
self.collect_csv_data = collect_csv_data
|
||||
|
||||
# For CSV export - store all data in pandas DataFrames (only if CSV collection is enabled)
|
||||
if self.collect_csv_data:
|
||||
# Initialize empty DataFrames with proper schemas
|
||||
self.benchmarks_df = pd.DataFrame(
|
||||
columns=[
|
||||
"benchmark_id",
|
||||
"repository",
|
||||
"branch",
|
||||
"commit_id",
|
||||
"commit_message",
|
||||
"metadata",
|
||||
"created_at",
|
||||
]
|
||||
)
|
||||
self.device_measurements_df = pd.DataFrame(
|
||||
columns=["benchmark_id", "cpu_util", "mem_megabytes", "gpu_util", "gpu_mem_megabytes", "time"]
|
||||
)
|
||||
self.model_measurements_df = pd.DataFrame(
|
||||
columns=[
|
||||
"benchmark_id",
|
||||
"time",
|
||||
"model_load_time",
|
||||
"first_eager_forward_pass_time_secs",
|
||||
"second_eager_forward_pass_time_secs",
|
||||
"first_eager_generate_time_secs",
|
||||
"second_eager_generate_time_secs",
|
||||
"time_to_first_token_secs",
|
||||
"time_to_second_token_secs",
|
||||
"time_to_third_token_secs",
|
||||
"time_to_next_token_mean_secs",
|
||||
"first_compile_generate_time_secs",
|
||||
"second_compile_generate_time_secs",
|
||||
"third_compile_generate_time_secs",
|
||||
"fourth_compile_generate_time_secs",
|
||||
]
|
||||
)
|
||||
else:
|
||||
self.benchmarks_df = None
|
||||
self.device_measurements_df = None
|
||||
self.model_measurements_df = None
|
||||
|
||||
def initialise_benchmark(self, metadata: dict[str, str]) -> str:
|
||||
"""
|
||||
Creates a new benchmark, returns the benchmark id (UUID)
|
||||
"""
|
||||
# Generate a unique UUID for this benchmark
|
||||
benchmark_id = str(uuid.uuid4())
|
||||
|
||||
if self.use_database:
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"INSERT INTO benchmarks (benchmark_id, repository, branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s, %s, %s)",
|
||||
(benchmark_id, self.repository, self.branch, self.commit_id, self.commit_msg, metadata),
|
||||
)
|
||||
self.logger.debug(f"initialised benchmark #{benchmark_id}")
|
||||
|
||||
# Store benchmark data for CSV export (if enabled)
|
||||
if self.collect_csv_data:
|
||||
# Add row to pandas DataFrame
|
||||
new_row = pd.DataFrame(
|
||||
[
|
||||
{
|
||||
"benchmark_id": benchmark_id,
|
||||
"repository": self.repository,
|
||||
"branch": self.branch,
|
||||
"commit_id": self.commit_id,
|
||||
"commit_message": self.commit_msg,
|
||||
"metadata": json.dumps(metadata),
|
||||
"created_at": datetime.utcnow().isoformat(),
|
||||
}
|
||||
]
|
||||
)
|
||||
self.benchmarks_df = pd.concat([self.benchmarks_df, new_row], ignore_index=True)
|
||||
|
||||
mode_info = []
|
||||
if self.use_database:
|
||||
mode_info.append("database")
|
||||
if self.collect_csv_data:
|
||||
mode_info.append("CSV")
|
||||
mode_str = " + ".join(mode_info) if mode_info else "no storage"
|
||||
|
||||
self.logger.debug(f"initialised benchmark #{benchmark_id} ({mode_str} mode)")
|
||||
return benchmark_id
|
||||
|
||||
def collect_device_measurements(self, benchmark_id: str, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes):
|
||||
"""
|
||||
Collect device metrics, such as CPU & GPU usage. These are "static", as in you cannot pass arbitrary arguments to the function.
|
||||
"""
|
||||
# Store device measurements for CSV export (if enabled)
|
||||
if self.collect_csv_data:
|
||||
# Add row to pandas DataFrame
|
||||
new_row = pd.DataFrame(
|
||||
[
|
||||
{
|
||||
"benchmark_id": benchmark_id,
|
||||
"cpu_util": cpu_util,
|
||||
"mem_megabytes": mem_megabytes,
|
||||
"gpu_util": gpu_util,
|
||||
"gpu_mem_megabytes": gpu_mem_megabytes,
|
||||
"time": datetime.utcnow().isoformat(),
|
||||
}
|
||||
]
|
||||
)
|
||||
self.device_measurements_df = pd.concat([self.device_measurements_df, new_row], ignore_index=True)
|
||||
|
||||
# Store in database if available
|
||||
if self.use_database:
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
|
||||
(benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
|
||||
)
|
||||
|
||||
self.logger.debug(
|
||||
f"collected device measurements for benchmark #{benchmark_id} [CPU util: {cpu_util}, mem MBs: {mem_megabytes}, GPU util: {gpu_util}, GPU mem MBs: {gpu_mem_megabytes}]"
|
||||
)
|
||||
|
||||
def collect_model_measurements(self, benchmark_id: str, measurements: dict[str, float]):
|
||||
# Store model measurements for CSV export (if enabled)
|
||||
if self.collect_csv_data:
|
||||
# Add row to pandas DataFrame with flattened measurements
|
||||
row_data = {"benchmark_id": benchmark_id, "time": datetime.utcnow().isoformat()}
|
||||
# Flatten the measurements dict into the row
|
||||
row_data.update(measurements)
|
||||
|
||||
new_row = pd.DataFrame([row_data])
|
||||
self.model_measurements_df = pd.concat([self.model_measurements_df, new_row], ignore_index=True)
|
||||
|
||||
# Store in database if available
|
||||
if self.use_database:
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO model_measurements (
|
||||
benchmark_id,
|
||||
measurements
|
||||
) VALUES (%s, %s)
|
||||
""",
|
||||
(
|
||||
benchmark_id,
|
||||
measurements,
|
||||
),
|
||||
)
|
||||
|
||||
self.logger.debug(f"collected model measurements for benchmark #{benchmark_id}: {measurements}")
|
||||
|
||||
def export_to_csv(self, output_dir: str = "benchmark_results"):
|
||||
"""
|
||||
Export all collected data to CSV files using pandas DataFrames
|
||||
"""
|
||||
if not self.collect_csv_data:
|
||||
self.logger.warning("CSV data collection is disabled - no CSV files will be generated")
|
||||
return
|
||||
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
self.logger.info(f"Created output directory: {output_dir}")
|
||||
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
files_created = []
|
||||
|
||||
# Export using pandas DataFrames
|
||||
self._export_pandas_data(output_dir, timestamp, files_created)
|
||||
|
||||
self.logger.info(f"CSV export complete! Created {len(files_created)} files in {output_dir}")
|
||||
|
||||
def _export_pandas_data(self, output_dir: str, timestamp: str, files_created: list):
|
||||
"""
|
||||
Export CSV files using pandas DataFrames
|
||||
"""
|
||||
# Export benchmarks
|
||||
benchmarks_file = os.path.join(output_dir, f"benchmarks_{timestamp}.csv")
|
||||
self.benchmarks_df.to_csv(benchmarks_file, index=False)
|
||||
files_created.append(benchmarks_file)
|
||||
self.logger.info(f"Exported {len(self.benchmarks_df)} benchmark records to {benchmarks_file}")
|
||||
|
||||
# Export device measurements
|
||||
device_file = os.path.join(output_dir, f"device_measurements_{timestamp}.csv")
|
||||
self.device_measurements_df.to_csv(device_file, index=False)
|
||||
files_created.append(device_file)
|
||||
self.logger.info(f"Exported {len(self.device_measurements_df)} device measurement records to {device_file}")
|
||||
|
||||
# Export model measurements (already flattened)
|
||||
model_file = os.path.join(output_dir, f"model_measurements_{timestamp}.csv")
|
||||
self.model_measurements_df.to_csv(model_file, index=False)
|
||||
files_created.append(model_file)
|
||||
self.logger.info(f"Exported {len(self.model_measurements_df)} model measurement records to {model_file}")
|
||||
|
||||
# Create comprehensive summary using pandas operations
|
||||
summary_file = os.path.join(output_dir, f"benchmark_summary_{timestamp}.csv")
|
||||
self._create_summary(summary_file)
|
||||
files_created.append(summary_file)
|
||||
|
||||
def _create_summary(self, summary_file: str):
|
||||
"""
|
||||
Create a comprehensive summary CSV using pandas operations
|
||||
"""
|
||||
if len(self.benchmarks_df) == 0:
|
||||
# Create empty summary file
|
||||
summary_df = pd.DataFrame()
|
||||
summary_df.to_csv(summary_file, index=False)
|
||||
self.logger.info(f"Created empty benchmark summary at {summary_file}")
|
||||
return
|
||||
|
||||
# Start with benchmarks as the base
|
||||
summary_df = self.benchmarks_df.copy()
|
||||
|
||||
# Add model measurements (join on benchmark_id)
|
||||
if len(self.model_measurements_df) > 0:
|
||||
# Drop 'time' column from model measurements to avoid conflicts
|
||||
model_df = self.model_measurements_df.drop(columns=["time"], errors="ignore")
|
||||
summary_df = summary_df.merge(model_df, on="benchmark_id", how="left")
|
||||
|
||||
# Calculate device measurement aggregates using pandas groupby
|
||||
if len(self.device_measurements_df) > 0:
|
||||
device_agg = (
|
||||
self.device_measurements_df.groupby("benchmark_id")
|
||||
.agg(
|
||||
{
|
||||
"cpu_util": ["mean", "max", "std", "count"],
|
||||
"mem_megabytes": ["mean", "max", "std"],
|
||||
"gpu_util": ["mean", "max", "std"],
|
||||
"gpu_mem_megabytes": ["mean", "max", "std"],
|
||||
}
|
||||
)
|
||||
.round(3)
|
||||
)
|
||||
|
||||
# Flatten column names
|
||||
device_agg.columns = [f"{col[0]}_{col[1]}" for col in device_agg.columns]
|
||||
device_agg = device_agg.reset_index()
|
||||
|
||||
# Rename count column to be more descriptive
|
||||
if "cpu_util_count" in device_agg.columns:
|
||||
device_agg = device_agg.rename(columns={"cpu_util_count": "device_measurement_count"})
|
||||
|
||||
# Merge with summary
|
||||
summary_df = summary_df.merge(device_agg, on="benchmark_id", how="left")
|
||||
|
||||
# Export the comprehensive summary
|
||||
summary_df.to_csv(summary_file, index=False)
|
||||
self.logger.info(f"Created comprehensive benchmark summary with {len(summary_df)} records at {summary_file}")
|
||||
|
||||
def close(self):
|
||||
if self.use_database and self.conn:
|
||||
self.conn.close()
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.setLevel(logging.INFO)
|
||||
formatter = logging.Formatter("[%(levelname)s - %(asctime)s] %(message)s")
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
|
||||
|
||||
def parse_arguments() -> tuple[str, str, str, str, bool, str]:
|
||||
"""
|
||||
Parse command line arguments for the benchmarking CLI.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.")
|
||||
|
||||
parser.add_argument(
|
||||
"repository",
|
||||
type=str,
|
||||
help="The repository name on which the benchmarking is performed.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"branch",
|
||||
type=str,
|
||||
help="The branch name on which the benchmarking is performed.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"commit_id",
|
||||
type=str,
|
||||
help="The commit hash on which the benchmarking is performed.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"commit_msg",
|
||||
type=str,
|
||||
help="The commit message associated with the commit, truncated to 70 characters.",
|
||||
)
|
||||
|
||||
parser.add_argument("--csv", action="store_true", default=False, help="Enable CSV output files generation.")
|
||||
|
||||
parser.add_argument(
|
||||
"--csv-output-dir",
|
||||
type=str,
|
||||
default="benchmark_results",
|
||||
help="Directory for CSV output files (default: benchmark_results).",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# CSV is disabled by default, only enabled when --csv is used
|
||||
generate_csv = args.csv
|
||||
|
||||
return args.repository, args.branch, args.commit_id, args.commit_msg, generate_csv, args.csv_output_dir
|
||||
|
||||
|
||||
def import_from_path(module_name, file_path):
|
||||
try:
|
||||
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
sys.modules[module_name] = module
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
except Exception as e:
|
||||
raise ImportModuleException(f"failed to load python module: {e}")
|
||||
|
||||
|
||||
def create_database_connection():
|
||||
"""
|
||||
Try to create a database connection. Returns None if connection fails.
|
||||
"""
|
||||
if not PSYCOPG2_AVAILABLE:
|
||||
logger.warning("psycopg2 not available - running in CSV-only mode")
|
||||
return None
|
||||
|
||||
try:
|
||||
import psycopg2
|
||||
|
||||
conn = psycopg2.connect("dbname=metrics")
|
||||
logger.info("Successfully connected to database")
|
||||
return conn
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to connect to database: {e}. Running in CSV-only mode")
|
||||
return None
|
||||
|
||||
|
||||
def create_global_metrics_recorder(
|
||||
repository: str, branch: str, commit_id: str, commit_msg: str, generate_csv: bool = False
|
||||
) -> MetricsRecorder:
|
||||
"""
|
||||
Create a global metrics recorder that will be used across all benchmarks.
|
||||
"""
|
||||
connection = create_database_connection()
|
||||
recorder = MetricsRecorder(connection, logger, repository, branch, commit_id, commit_msg, generate_csv)
|
||||
|
||||
# Log the storage mode
|
||||
storage_modes = []
|
||||
if connection is not None:
|
||||
storage_modes.append("database")
|
||||
if generate_csv:
|
||||
storage_modes.append("CSV")
|
||||
|
||||
if not storage_modes:
|
||||
logger.warning("Running benchmarks with NO data storage (no database connection, CSV disabled)")
|
||||
logger.warning("Use --csv flag to enable CSV output when database is unavailable")
|
||||
else:
|
||||
logger.info(f"Running benchmarks with: {' + '.join(storage_modes)} storage")
|
||||
|
||||
return recorder
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
benchmarks_folder_path = os.path.dirname(os.path.realpath(__file__))
|
||||
benches_folder_path = os.path.join(benchmarks_folder_path, "benches")
|
||||
|
||||
repository, branch, commit_id, commit_msg, generate_csv, csv_output_dir = parse_arguments()
|
||||
|
||||
# Create a global metrics recorder
|
||||
global_metrics_recorder = create_global_metrics_recorder(repository, branch, commit_id, commit_msg, generate_csv)
|
||||
|
||||
successful_benchmarks = 0
|
||||
failed_benchmarks = 0
|
||||
|
||||
# Automatically discover all benchmark modules in benches/ folder
|
||||
benchmark_modules = []
|
||||
|
||||
if os.path.exists(benches_folder_path):
|
||||
logger.debug(f"Scanning for benchmarks in: {benches_folder_path}")
|
||||
for entry in os.scandir(benches_folder_path):
|
||||
if not entry.name.endswith(".py"):
|
||||
continue
|
||||
if entry.name.startswith("__"): # Skip __init__.py, __pycache__, etc.
|
||||
continue
|
||||
|
||||
# Check if the file has a run_benchmark function
|
||||
try:
|
||||
logger.debug(f"checking if benches/{entry.name} has run_benchmark function")
|
||||
module = import_from_path(entry.name.split(".")[0], entry.path)
|
||||
if hasattr(module, "run_benchmark"):
|
||||
benchmark_modules.append(entry.name)
|
||||
logger.debug(f"discovered benchmark: {entry.name}")
|
||||
else:
|
||||
logger.debug(f"skipping {entry.name} - no run_benchmark function found")
|
||||
except Exception as e:
|
||||
logger.debug(f"failed to check benches/{entry.name}: {e}")
|
||||
else:
|
||||
logger.warning(f"Benches directory not found: {benches_folder_path}")
|
||||
|
||||
if benchmark_modules:
|
||||
logger.info(f"Discovered {len(benchmark_modules)} benchmark(s): {benchmark_modules}")
|
||||
else:
|
||||
logger.warning("No benchmark modules found in benches/ directory")
|
||||
|
||||
for module_name in benchmark_modules:
|
||||
module_path = os.path.join(benches_folder_path, module_name)
|
||||
try:
|
||||
logger.debug(f"loading: {module_name}")
|
||||
module = import_from_path(module_name.split(".")[0], module_path)
|
||||
logger.info(f"running benchmarks in: {module_name}")
|
||||
|
||||
# Check if the module has an updated run_benchmark function that accepts metrics_recorder
|
||||
try:
|
||||
# Try the new signature first
|
||||
module.run_benchmark(logger, repository, branch, commit_id, commit_msg, global_metrics_recorder)
|
||||
except TypeError:
|
||||
# Fall back to the old signature for backward compatibility
|
||||
logger.warning(
|
||||
f"Module {module_name} using old run_benchmark signature - database connection will be created per module"
|
||||
)
|
||||
module.run_benchmark(logger, repository, branch, commit_id, commit_msg)
|
||||
|
||||
successful_benchmarks += 1
|
||||
except ImportModuleException as e:
|
||||
logger.error(e)
|
||||
failed_benchmarks += 1
|
||||
except Exception as e:
|
||||
logger.error(f"error running benchmarks for {module_name}: {e}")
|
||||
failed_benchmarks += 1
|
||||
|
||||
# Export CSV results at the end (if enabled)
|
||||
try:
|
||||
if generate_csv:
|
||||
global_metrics_recorder.export_to_csv(csv_output_dir)
|
||||
logger.info(f"CSV reports have been generated and saved to the {csv_output_dir} directory")
|
||||
else:
|
||||
logger.info("CSV generation disabled - no CSV files created (use --csv to enable)")
|
||||
|
||||
logger.info(f"Benchmark run completed. Successful: {successful_benchmarks}, Failed: {failed_benchmarks}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to export CSV results: {e}")
|
||||
finally:
|
||||
global_metrics_recorder.close()
|
||||
57
transformers/benchmark/config/generation.yaml
Normal file
57
transformers/benchmark/config/generation.yaml
Normal file
@@ -0,0 +1,57 @@
|
||||
defaults:
|
||||
- benchmark # inheriting benchmark schema
|
||||
- scenario: inference
|
||||
- launcher: process
|
||||
- backend: pytorch
|
||||
- _self_ # for hydra 1.1 compatibility
|
||||
|
||||
name: pytorch_generate
|
||||
|
||||
launcher:
|
||||
start_method: spawn
|
||||
device_isolation: true
|
||||
device_isolation_action: warn
|
||||
|
||||
backend:
|
||||
device: cuda
|
||||
device_ids: 0
|
||||
no_weights: true
|
||||
model: meta-llama/Llama-2-7b-hf
|
||||
cache_implementation: static
|
||||
torch_compile: true
|
||||
dtype: float16
|
||||
torch_compile_config:
|
||||
backend: inductor
|
||||
mode: reduce-overhead
|
||||
fullgraph: true
|
||||
|
||||
scenario:
|
||||
input_shapes:
|
||||
batch_size: 1
|
||||
sequence_length: 7
|
||||
generate_kwargs:
|
||||
max_new_tokens: 128
|
||||
min_new_tokens: 128
|
||||
do_sample: false
|
||||
memory: true
|
||||
latency: true
|
||||
iterations: 2
|
||||
duration: 0
|
||||
|
||||
|
||||
# hydra/cli specific settings
|
||||
hydra:
|
||||
run:
|
||||
# where to store run results
|
||||
dir: runs/${name}
|
||||
job:
|
||||
# change working directory to the run directory
|
||||
chdir: true
|
||||
env_set:
|
||||
# set environment variable OVERRIDE_BENCHMARKS to 1
|
||||
# to not skip benchmarks that have been run before
|
||||
OVERRIDE_BENCHMARKS: 1
|
||||
LOG_LEVEL: WARN
|
||||
sweep:
|
||||
dir: multirun
|
||||
subdir: ${hydra.job.override_dirname}
|
||||
10
transformers/benchmark/default.yml
Normal file
10
transformers/benchmark/default.yml
Normal file
@@ -0,0 +1,10 @@
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: 'Transformers Benchmarks'
|
||||
orgId: 1
|
||||
type: file
|
||||
updateIntervalSeconds: 10
|
||||
allowUiUpdates: true
|
||||
options:
|
||||
path: /etc/grafana/dashboards
|
||||
2375
transformers/benchmark/grafana_dashboard.json
Normal file
2375
transformers/benchmark/grafana_dashboard.json
Normal file
File diff suppressed because it is too large
Load Diff
17
transformers/benchmark/grafana_datasource.yaml
Normal file
17
transformers/benchmark/grafana_datasource.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
apiVersion: 1
|
||||
datasources:
|
||||
- name: grafana-postgresql-datasource
|
||||
uid: be28nkzirtb0gd
|
||||
type: postgres
|
||||
url: $GRAFANA_POSTGRES_DATASOURCE_URL
|
||||
user: $GRAFANA_POSTGRES_DATASOURCE_USER
|
||||
secureJsonData:
|
||||
password: $GRAFANA_POSTGRES_DATASOURCE_PWD
|
||||
jsonData:
|
||||
database: metrics
|
||||
maxOpenConns: 100
|
||||
maxIdleConns: 100
|
||||
maxIdleConnsAuto: true
|
||||
connMaxLifetime: 14400
|
||||
postgresVersion: 1000
|
||||
timescaledb: false
|
||||
20
transformers/benchmark/optimum_benchmark_wrapper.py
Normal file
20
transformers/benchmark/optimum_benchmark_wrapper.py
Normal file
@@ -0,0 +1,20 @@
|
||||
import argparse
|
||||
import subprocess
|
||||
|
||||
|
||||
def main(config_dir, config_name, args):
|
||||
subprocess.run(
|
||||
["optimum-benchmark", "--config-dir", f"{config_dir}", "--config-name", f"{config_name}"]
|
||||
+ ["hydra/job_logging=disabled", "hydra/hydra_logging=disabled"]
|
||||
+ args
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument("--config-dir", type=str, required=True, help="The path to the config directory.")
|
||||
parser.add_argument("--config-name", type=str, required=True, help="The config name.")
|
||||
args, unknown = parser.parse_known_args()
|
||||
|
||||
main(args.config_dir, args.config_name, unknown)
|
||||
6
transformers/benchmark/requirements.txt
Normal file
6
transformers/benchmark/requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
gpustat==1.1.1
|
||||
psutil==6.0.0
|
||||
psycopg2==2.9.9
|
||||
torch>=2.4.0
|
||||
hf_transfer
|
||||
pandas>=1.5.0
|
||||
0
transformers/benchmark/utils/init_db.sql
Normal file
0
transformers/benchmark/utils/init_db.sql
Normal file
Reference in New Issue
Block a user