This commit is contained in:
2026-04-02 04:53:13 +00:00
parent 80932c96e5
commit 24df76db9d
1987 changed files with 447445 additions and 0 deletions

View File

@@ -0,0 +1,65 @@
import argparse
import os
from collections import defaultdict
from multiprocessing import Pool
log_tag = "LOG_TRACE:"
tid_names = [
(0, "module"),
(1, "megatron"),
(2, "deepspeed"),
(10, "vacc-odsp"),
(11, "vacc-dlc"),
(12, "vacc-vccl"),
(13, "vacc-cpu"),
(14, "vacc-cpu_fallback"),
(15, "vacc-ddr"),
(20, "lib-vccl"),
]
def parse_files_of_process(args):
pid, in_files = args
out_file = "trace_" + pid + ".json"
with open(out_file, "w", encoding="utf-8") as new_file:
metadata_lines = [
f'{{"name": "thread_name","ph": "M","pid": {pid},"tid": {tid},"args": {{"name": "{name}"}}}},'
for tid, name in tid_names
]
new_file.write("[\n")
new_file.write("\n".join(metadata_lines))
new_file.write("\n")
for file_path in in_files:
with open(file_path, "r", encoding="utf-8") as file:
for line in file:
if log_tag in line:
new_line = line.split(log_tag, 1)[1].strip()
new_file.write(new_line + "\n")
new_file.write("]")
def parse_directory(directory):
pro_files = defaultdict(list)
for dirpath, dirnames, filenames in os.walk(directory):
for filename in filenames:
file_path = os.path.join(dirpath, filename)
if filename.startswith("vacc") and os.path.getsize(file_path) != 0:
pid = filename.rsplit("_", 1)[1].split(".")[0]
pro_files[pid].append(file_path)
args = []
for pid, in_files in pro_files.items():
args.append((pid, in_files))
with Pool() as p:
p.map(parse_files_of_process, args)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="parse vacc log files and generate trace files"
)
parser.add_argument("directory", type=str, help="log directory to parse")
args = parser.parse_args()
parse_directory(args.directory)