Format Benchmark Code (#399)

2024-04-28 21:06:22 +08:00
parent 19818b9c2f
commit 95c4e0dfac
41 changed files with 1169 additions and 608 deletions
--- a/benchmark/long_json_decode/build_dataset.py
+++ b/benchmark/long_json_decode/build_dataset.py
@@ -3,7 +3,6 @@ import json
 import transformers
 import wikipedia

-
 name = "meta-llama/Llama-2-7b-chat-hf"
 t = transformers.AutoTokenizer.from_pretrained(name)
 city_names = ["los angles", "london", "tokyo", "beijing", "singapore"]
@@ -20,7 +19,9 @@ for city_name in city_names:
    truncate_tokens = t.encode(truncate_content)

    # Count token
-    print(f"city_name: {city_name}, #tokens: {len(tokens)}, #truncate tokens: {len(truncate_tokens)}")
+    print(
+        f"city_name: {city_name}, #tokens: {len(tokens)}, #truncate tokens: {len(truncate_tokens)}"
+    )

    with open("questions.jsonl", "a") as fout:
        fout.write(json.dumps({"document": truncate_content}) + "\n")