import argparse import os from collections import defaultdict from multiprocessing import Pool log_tag = "LOG_TRACE:" tid_names = [ (0, "module"), (1, "megatron"), (2, "deepspeed"), (10, "vacc-odsp"), (11, "vacc-dlc"), (12, "vacc-vccl"), (13, "vacc-cpu"), (14, "vacc-cpu_fallback"), (15, "vacc-ddr"), (20, "lib-vccl"), ] def parse_files_of_process(args): pid, in_files = args out_file = "trace_" + pid + ".json" with open(out_file, "w", encoding="utf-8") as new_file: metadata_lines = [ f'{{"name": "thread_name","ph": "M","pid": {pid},"tid": {tid},"args": {{"name": "{name}"}}}},' for tid, name in tid_names ] new_file.write("[\n") new_file.write("\n".join(metadata_lines)) new_file.write("\n") for file_path in in_files: with open(file_path, "r", encoding="utf-8") as file: for line in file: if log_tag in line: new_line = line.split(log_tag, 1)[1].strip() new_file.write(new_line + "\n") new_file.write("]") def parse_directory(directory): pro_files = defaultdict(list) for dirpath, dirnames, filenames in os.walk(directory): for filename in filenames: file_path = os.path.join(dirpath, filename) if filename.startswith("vacc") and os.path.getsize(file_path) != 0: pid = filename.rsplit("_", 1)[1].split(".")[0] pro_files[pid].append(file_path) args = [] for pid, in_files in pro_files.items(): args.append((pid, in_files)) with Pool() as p: p.map(parse_files_of_process, args) if __name__ == "__main__": parser = argparse.ArgumentParser( description="parse vacc log files and generate trace files" ) parser.add_argument("directory", type=str, help="log directory to parse") args = parser.parse_args() parse_directory(args.directory)