[benchmark] Add benchmark scripts for ceval and boolq (#8946)

Co-authored-by: chenyuxing <2818499974@qq.com> Co-authored-by: hanqing <huang010706@126.com> Co-authored-by: Muggle <62579327+trawolf@users.noreply.github.com> Co-authored-by: ronnie_zheng <zl19940307@163.com>
2025-08-23 15:40:15 +08:00
parent 0374304a2c
commit 4edbe0d534
6 changed files with 350 additions and 0 deletions
--- a/benchmark/boolq/convert_parquet_to_json.py
+++ b/benchmark/boolq/convert_parquet_to_json.py
@@ -0,0 +1,28 @@
+import sys
+
+import pyarrow.parquet as pq
+
+
+def convert_parquet_to_json(input_file, output_file):
+    # read parquet file
+    table = pq.read_table(input_file)
+
+    # turn parquet data to dataframe
+    df = table.to_pandas()
+
+    # turn dataframe to json form
+    json_data = df.to_json(orient="records", lines=True)
+
+    # write json to file
+    with open(output_file, "w") as f:
+        f.write(json_data)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage:python convert_parquet_to_json.py <input_file> <output_file>")
+
+    input_file = sys.argv[1]
+    output_file = sys.argv[2]
+
+    convert_parquet_to_json(input_file, output_file)