[benchmark] Add benchmark scripts for ceval and boolq (#8946)
Co-authored-by: chenyuxing <2818499974@qq.com> Co-authored-by: hanqing <huang010706@126.com> Co-authored-by: Muggle <62579327+trawolf@users.noreply.github.com> Co-authored-by: ronnie_zheng <zl19940307@163.com>
This commit is contained in:
28
benchmark/boolq/convert_parquet_to_json.py
Normal file
28
benchmark/boolq/convert_parquet_to_json.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import sys
|
||||
|
||||
import pyarrow.parquet as pq
|
||||
|
||||
|
||||
def convert_parquet_to_json(input_file, output_file):
|
||||
# read parquet file
|
||||
table = pq.read_table(input_file)
|
||||
|
||||
# turn parquet data to dataframe
|
||||
df = table.to_pandas()
|
||||
|
||||
# turn dataframe to json form
|
||||
json_data = df.to_json(orient="records", lines=True)
|
||||
|
||||
# write json to file
|
||||
with open(output_file, "w") as f:
|
||||
f.write(json_data)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage:python convert_parquet_to_json.py <input_file> <output_file>")
|
||||
|
||||
input_file = sys.argv[1]
|
||||
output_file = sys.argv[2]
|
||||
|
||||
convert_parquet_to_json(input_file, output_file)
|
||||
Reference in New Issue
Block a user