forked from EngineX-Cambricon/enginex-mlu370-vllm
add ops
This commit is contained in:
65
torch_mlu_ops-v1.3.2/benchmarks/common.py
Normal file
65
torch_mlu_ops-v1.3.2/benchmarks/common.py
Normal file
@@ -0,0 +1,65 @@
|
||||
import torch
|
||||
import torch_mlu
|
||||
import time
|
||||
from pathlib import Path
|
||||
import csv
|
||||
import os
|
||||
import subprocess
|
||||
from itertools import product
|
||||
|
||||
def benchmark_forward(fn, *inputs, repeats=1, **kwinputs):
|
||||
notify_start = torch.mlu.Event(enable_timing=True)
|
||||
notify_end = torch.mlu.Event(enable_timing=True)
|
||||
notify_start.record()
|
||||
t0 = time.perf_counter()
|
||||
for _ in range(repeats):
|
||||
fn(*inputs, **kwinputs)
|
||||
notify_end.record()
|
||||
notify_end.synchronize()
|
||||
total_e2e_time = time.perf_counter() - t0
|
||||
average_e2e_time = total_e2e_time / repeats * 1e6
|
||||
total_hardware_time = notify_start.hardware_time(notify_end)
|
||||
average_hardware_time = total_hardware_time / repeats
|
||||
return average_hardware_time, average_e2e_time
|
||||
|
||||
def save_to_csv(table, file_path, file_name):
|
||||
file_name_without_ext, _ = os.path.splitext(file_name)
|
||||
new_file_name = file_name_without_ext + '.csv'
|
||||
if file_path is None:
|
||||
file_path = './'
|
||||
path = Path(file_path)
|
||||
if path.suffix:
|
||||
directory = path.parent
|
||||
filename = path.name
|
||||
else:
|
||||
directory = path
|
||||
filename = new_file_name
|
||||
if not directory.exists():
|
||||
directory.mkdir(parents=True, exist_ok=True)
|
||||
full_path = directory / filename
|
||||
if not full_path.exists():
|
||||
full_path.touch()
|
||||
with open(full_path, mode="w", newline="") as file:
|
||||
writer = csv.writer(file)
|
||||
writer.writerows(table)
|
||||
print(f"output saved at: {full_path}")
|
||||
|
||||
def get_band_width(card_id: int = 0):
|
||||
cmd = "cnmon info -c " + str(card_id) + " | grep 'MEM BandWidth'| cut -d ':' -f2 | cut -d ' ' -f 2"
|
||||
res = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
assert res.returncode == 0, "Failed to get BandWidth."
|
||||
bd = int(res.stdout.decode().strip())
|
||||
return bd
|
||||
|
||||
def generate_token_count(num_expert,
|
||||
total_token_count):
|
||||
token_count = torch.randint(low=1, high=1024, size=(num_expert, ), dtype=torch.int32).to(dtype=torch.float32)
|
||||
sum = torch.sum(token_count, dim=-1) * 1.0
|
||||
token_count *= total_token_count / sum.item()
|
||||
token_count = token_count.to(dtype=torch.int32)
|
||||
cusum_token_count = torch.zeros(num_expert + 1, dtype=torch.int32)
|
||||
end_expert_token_count = torch.ones(num_expert + 1, dtype=torch.int32) * total_token_count
|
||||
cusum_token_count[1:] = torch.cumsum(token_count, dim=-1)
|
||||
cusum_token_count = torch.minimum(cusum_token_count, end_expert_token_count)
|
||||
cusum_token_count[-1] = total_token_count
|
||||
return cusum_token_count, cusum_token_count[1:] - cusum_token_count[:-1]
|
||||
Reference in New Issue
Block a user