Benchmark Updates (#382)
This commit is contained in:
@@ -73,7 +73,7 @@ def main(args):
|
|||||||
# Run requests
|
# Run requests
|
||||||
tic = time.time()
|
tic = time.time()
|
||||||
states = few_shot_gsm8k.run_batch(
|
states = few_shot_gsm8k.run_batch(
|
||||||
arguments, temperature=0, backend=backend, num_threads=args.parallel)
|
arguments, temperature=0, backend=backend, num_threads=args.parallel, progress_bar=True)
|
||||||
latency = time.time() - tic
|
latency = time.time() - tic
|
||||||
|
|
||||||
preds = []
|
preds = []
|
||||||
|
|||||||
@@ -61,7 +61,7 @@ def main(args):
|
|||||||
# Run requests
|
# Run requests
|
||||||
tic = time.time()
|
tic = time.time()
|
||||||
rets = few_shot_hellaswag.run_batch(
|
rets = few_shot_hellaswag.run_batch(
|
||||||
arguments, temperature=0, backend=backend, num_threads=args.parallel)
|
arguments, temperature=0, backend=backend, num_threads=args.parallel, progress_bar=True)
|
||||||
preds = [choices[i].index(rets[i]["answer"]) for i in range(len(rets))]
|
preds = [choices[i].index(rets[i]["answer"]) for i in range(len(rets))]
|
||||||
latency = time.time() - tic
|
latency = time.time() - tic
|
||||||
|
|
||||||
|
|||||||
@@ -63,7 +63,7 @@ def main(args):
|
|||||||
|
|
||||||
# Run requests
|
# Run requests
|
||||||
tic = time.time()
|
tic = time.time()
|
||||||
states = json_decode.run_batch(arguments, temperature=0, num_threads=args.parallel)
|
states = json_decode.run_batch(arguments, temperature=0, num_threads=args.parallel, progress_bar=True)
|
||||||
latency = time.time() - tic
|
latency = time.time() - tic
|
||||||
|
|
||||||
# Compute accuracy
|
# Compute accuracy
|
||||||
|
|||||||
@@ -72,7 +72,7 @@ def bench_city_doc(args):
|
|||||||
arguments,
|
arguments,
|
||||||
temperature=0,
|
temperature=0,
|
||||||
num_threads=args.parallel,
|
num_threads=args.parallel,
|
||||||
progress_bar=(args.parallel == 1),
|
progress_bar=True,
|
||||||
)
|
)
|
||||||
latency = time.time() - tic
|
latency = time.time() - tic
|
||||||
|
|
||||||
@@ -96,7 +96,7 @@ def bench_character(args):
|
|||||||
arguments,
|
arguments,
|
||||||
temperature=0,
|
temperature=0,
|
||||||
num_threads=args.parallel,
|
num_threads=args.parallel,
|
||||||
progress_bar=(args.parallel == 1),
|
progress_bar=True,
|
||||||
)
|
)
|
||||||
latency = time.time() - tic
|
latency = time.time() - tic
|
||||||
|
|
||||||
|
|||||||
@@ -61,7 +61,7 @@ def eval_model(args, line_obj, num_hoops, src_indices, dst_percents):
|
|||||||
|
|
||||||
tic = time.time()
|
tic = time.time()
|
||||||
states = line_retrieval.run_batch(
|
states = line_retrieval.run_batch(
|
||||||
arguments, temperature=0, backend=backend, num_threads=args.parallel)
|
arguments, temperature=0, backend=backend, num_threads=args.parallel, progress_bar=True)
|
||||||
latency = time.time() - tic
|
latency = time.time() - tic
|
||||||
|
|
||||||
corrects = []
|
corrects = []
|
||||||
|
|||||||
@@ -54,7 +54,7 @@ def main(args):
|
|||||||
# Run requests
|
# Run requests
|
||||||
tic = time.time()
|
tic = time.time()
|
||||||
states = multi_dimension_judge.run_batch(
|
states = multi_dimension_judge.run_batch(
|
||||||
arguments, temperature=0, backend=backend, num_threads=args.parallel)
|
arguments, temperature=0, backend=backend, num_threads=args.parallel, progress_bar=True)
|
||||||
latency = time.time() - tic
|
latency = time.time() - tic
|
||||||
|
|
||||||
print(f"Latency: {latency:.3f}")
|
print(f"Latency: {latency:.3f}")
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ def main(args):
|
|||||||
# Run requests
|
# Run requests
|
||||||
tic = time.time()
|
tic = time.time()
|
||||||
states = json_decode.run_batch(
|
states = json_decode.run_batch(
|
||||||
arguments, temperature=0, num_threads=args.parallel)
|
arguments, temperature=0, num_threads=args.parallel, progress_bar=True)
|
||||||
latency = time.time() - tic
|
latency = time.time() - tic
|
||||||
|
|
||||||
# Compute accuracy
|
# Compute accuracy
|
||||||
|
|||||||
@@ -60,7 +60,9 @@ def main(args):
|
|||||||
arguments,
|
arguments,
|
||||||
temperature=0,
|
temperature=0,
|
||||||
max_new_tokens=256,
|
max_new_tokens=256,
|
||||||
num_threads=args.parallel)
|
num_threads=args.parallel,
|
||||||
|
progress_bar=True,
|
||||||
|
)
|
||||||
answers = [[s["answer_1"], s["answer_2"]] for s in rets]
|
answers = [[s["answer_1"], s["answer_2"]] for s in rets]
|
||||||
latency = time.time() - tic
|
latency = time.time() - tic
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ wget https://raw.githubusercontent.com/openai/grade-school-math/master/grade_sch
|
|||||||
|
|
||||||
### Benchmark sglang
|
### Benchmark sglang
|
||||||
```
|
```
|
||||||
python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
|
python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 --schedule-conservativeness 1.3
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -86,7 +86,7 @@ def main(args):
|
|||||||
# Run requests
|
# Run requests
|
||||||
tic = time.time()
|
tic = time.time()
|
||||||
states = multi_chain_gsm8k.run_batch(
|
states = multi_chain_gsm8k.run_batch(
|
||||||
arguments, temperature=0, backend=backend, num_threads=args.parallel)
|
arguments, temperature=0, backend=backend, num_threads=args.parallel, progress_bar=True)
|
||||||
latency = time.time() - tic
|
latency = time.time() - tic
|
||||||
|
|
||||||
preds = []
|
preds = []
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ def main(args):
|
|||||||
# Run requests
|
# Run requests
|
||||||
tic = time.time()
|
tic = time.time()
|
||||||
states = multi_document_qa.run_batch(
|
states = multi_document_qa.run_batch(
|
||||||
arguments, temperature=0, num_threads=args.parallel)
|
arguments, temperature=0, num_threads=args.parallel, progress_bar=True)
|
||||||
latency = time.time() - tic
|
latency = time.time() - tic
|
||||||
|
|
||||||
# Compute accuracy
|
# Compute accuracy
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ def main(args):
|
|||||||
|
|
||||||
tic = time.time()
|
tic = time.time()
|
||||||
states = multi_turns.run_batch(
|
states = multi_turns.run_batch(
|
||||||
multi_qas, temperature=0, backend=backend, num_threads=args.parallel
|
multi_qas, temperature=0, backend=backend, num_threads=args.parallel, progress_bar=True
|
||||||
)
|
)
|
||||||
latency = time.time() - tic
|
latency = time.time() - tic
|
||||||
|
|
||||||
|
|||||||
@@ -110,7 +110,9 @@ def main(args):
|
|||||||
tic = time.time()
|
tic = time.time()
|
||||||
states = webthink.run_batch(arguments,
|
states = webthink.run_batch(arguments,
|
||||||
temperature=0,
|
temperature=0,
|
||||||
num_threads=args.parallel)
|
num_threads=args.parallel,
|
||||||
|
progress_bar=True,
|
||||||
|
)
|
||||||
latency = time.time() - tic
|
latency = time.time() - tic
|
||||||
|
|
||||||
# Compute accuracy
|
# Compute accuracy
|
||||||
|
|||||||
1
benchmark/tip_suggestion/.gitignore
vendored
Normal file
1
benchmark/tip_suggestion/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
!topic.jsonl
|
||||||
@@ -59,7 +59,7 @@ def main(args):
|
|||||||
# Run requests
|
# Run requests
|
||||||
tic = time.time()
|
tic = time.time()
|
||||||
states = suggest_tips.run_batch(
|
states = suggest_tips.run_batch(
|
||||||
arguments, temperature=0, num_threads=args.parallel)
|
arguments, temperature=0, num_threads=args.parallel, progress_bar=True)
|
||||||
latency = time.time() - tic
|
latency = time.time() - tic
|
||||||
|
|
||||||
# Compute accuracy
|
# Compute accuracy
|
||||||
|
|||||||
50
benchmark/tip_suggestion/topic.jsonl
Normal file
50
benchmark/tip_suggestion/topic.jsonl
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
{"topic": "organizing a successful charity event", "number": 6}
|
||||||
|
{"topic": "improving personal credit scores", "number": 7}
|
||||||
|
{"topic": "staying motivated during job searches", "number": 5}
|
||||||
|
{"topic": "maintaining a work-life balance", "number": 9}
|
||||||
|
{"topic": "reducing carbon footprint at home", "number": 8}
|
||||||
|
{"topic": "starting a book club", "number": 5}
|
||||||
|
{"topic": "learning to play a musical instrument", "number": 7}
|
||||||
|
{"topic": "getting into freelance writing", "number": 6}
|
||||||
|
{"topic": "beginner yoga poses", "number": 8}
|
||||||
|
{"topic": "preparing for graduate school exams", "number": 5}
|
||||||
|
{"topic": "exploring minimalist living", "number": 9}
|
||||||
|
{"topic": "effective grocery shopping", "number": 7}
|
||||||
|
{"topic": "winter camping", "number": 5}
|
||||||
|
{"topic": "starting a podcast on a budget", "number": 8}
|
||||||
|
{"topic": "creating a capsule wardrobe", "number": 6}
|
||||||
|
{"topic": "improving your writing skills", "number": 7}
|
||||||
|
{"topic": "learning a new software quickly", "number": 9}
|
||||||
|
{"topic": "reducing anxiety before public speaking", "number": 5}
|
||||||
|
{"topic": "planning a solo travel adventure", "number": 8}
|
||||||
|
{"topic": "beginner skateboarders", "number": 6}
|
||||||
|
{"topic": "studying abroad", "number": 7}
|
||||||
|
{"topic": "planting a vegetable garden", "number": 5}
|
||||||
|
{"topic": "adopting a shelter pet", "number": 9}
|
||||||
|
{"topic": "learning to cook ethnic cuisines", "number": 8}
|
||||||
|
{"topic": "effective conflict resolution", "number": 5}
|
||||||
|
{"topic": "starting a vlog", "number": 7}
|
||||||
|
{"topic": "keeping a daily journal", "number": 6}
|
||||||
|
{"topic": "improving sleep hygiene", "number": 8}
|
||||||
|
{"topic": "beginner mountain climbers", "number": 5}
|
||||||
|
{"topic": "creating a mobile app", "number": 9}
|
||||||
|
{"topic": "maintaining a saltwater aquarium", "number": 7}
|
||||||
|
{"topic": "preparing for a baby's arrival", "number": 6}
|
||||||
|
{"topic": "writing a fantasy novel", "number": 5}
|
||||||
|
{"topic": "effective team leadership", "number": 8}
|
||||||
|
{"topic": "making a documentary film", "number": 9}
|
||||||
|
{"topic": "learning about historical events", "number": 7}
|
||||||
|
{"topic": "baking gluten-free treats", "number": 6}
|
||||||
|
{"topic": "improving mental arithmetic skills", "number": 5}
|
||||||
|
{"topic": "building a treehouse", "number": 8}
|
||||||
|
{"topic": "getting started with watercolor painting", "number": 9}
|
||||||
|
{"topic": "creating a YouTube tutorial series", "number": 7}
|
||||||
|
{"topic": "landscape photography", "number": 5}
|
||||||
|
{"topic": "navigating cultural differences", "number": 6}
|
||||||
|
{"topic": "preparing for a marathon", "number": 8}
|
||||||
|
{"topic": "building an online business", "number": 9}
|
||||||
|
{"topic": "learning to dance at home", "number": 5}
|
||||||
|
{"topic": "self-publishing a book", "number": 7}
|
||||||
|
{"topic": "starting an urban farm", "number": 6}
|
||||||
|
{"topic": "improving your memory", "number": 8}
|
||||||
|
{"topic": "creating a personal brand online", "number": 9}
|
||||||
@@ -112,7 +112,7 @@ def main(args):
|
|||||||
# Run requests
|
# Run requests
|
||||||
tic = time.time()
|
tic = time.time()
|
||||||
states = tree_search.run_batch(
|
states = tree_search.run_batch(
|
||||||
arguments, temperature=0, backend=backend, num_threads=args.parallel)
|
arguments, temperature=0, backend=backend, num_threads=args.parallel, progress_bar=True)
|
||||||
latency = time.time() - tic
|
latency = time.time() - tic
|
||||||
answers_text = []
|
answers_text = []
|
||||||
for s in states:
|
for s in states:
|
||||||
|
|||||||
@@ -102,7 +102,7 @@ def main(args):
|
|||||||
# Run requests
|
# Run requests
|
||||||
tic = time.time()
|
tic = time.time()
|
||||||
states = tree_search.run_batch(
|
states = tree_search.run_batch(
|
||||||
arguments, temperature=0, backend=backend, num_threads=args.parallel)
|
arguments, temperature=0, backend=backend, num_threads=args.parallel, progress_bar=True)
|
||||||
latency = time.time() - tic
|
latency = time.time() - tic
|
||||||
answers_text = []
|
answers_text = []
|
||||||
for s in states:
|
for s in states:
|
||||||
|
|||||||
Reference in New Issue
Block a user