66 lines
2.0 KiB
Python
66 lines
2.0 KiB
Python
import argparse
|
|
import os
|
|
from collections import defaultdict
|
|
from multiprocessing import Pool
|
|
|
|
|
|
log_tag = "LOG_TRACE:"
|
|
tid_names = [
|
|
(0, "module"),
|
|
(1, "megatron"),
|
|
(2, "deepspeed"),
|
|
(10, "vacc-odsp"),
|
|
(11, "vacc-dlc"),
|
|
(12, "vacc-vccl"),
|
|
(13, "vacc-cpu"),
|
|
(14, "vacc-cpu_fallback"),
|
|
(15, "vacc-ddr"),
|
|
(20, "lib-vccl"),
|
|
]
|
|
|
|
|
|
def parse_files_of_process(args):
|
|
pid, in_files = args
|
|
out_file = "trace_" + pid + ".json"
|
|
with open(out_file, "w", encoding="utf-8") as new_file:
|
|
metadata_lines = [
|
|
f'{{"name": "thread_name","ph": "M","pid": {pid},"tid": {tid},"args": {{"name": "{name}"}}}},'
|
|
for tid, name in tid_names
|
|
]
|
|
new_file.write("[\n")
|
|
new_file.write("\n".join(metadata_lines))
|
|
new_file.write("\n")
|
|
for file_path in in_files:
|
|
with open(file_path, "r", encoding="utf-8") as file:
|
|
for line in file:
|
|
if log_tag in line:
|
|
new_line = line.split(log_tag, 1)[1].strip()
|
|
new_file.write(new_line + "\n")
|
|
new_file.write("]")
|
|
|
|
|
|
def parse_directory(directory):
|
|
pro_files = defaultdict(list)
|
|
for dirpath, dirnames, filenames in os.walk(directory):
|
|
for filename in filenames:
|
|
file_path = os.path.join(dirpath, filename)
|
|
if filename.startswith("vacc") and os.path.getsize(file_path) != 0:
|
|
pid = filename.rsplit("_", 1)[1].split(".")[0]
|
|
pro_files[pid].append(file_path)
|
|
|
|
args = []
|
|
for pid, in_files in pro_files.items():
|
|
args.append((pid, in_files))
|
|
|
|
with Pool() as p:
|
|
p.map(parse_files_of_process, args)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(
|
|
description="parse vacc log files and generate trace files"
|
|
)
|
|
parser.add_argument("directory", type=str, help="log directory to parse")
|
|
args = parser.parse_args()
|
|
parse_directory(args.directory)
|