Minor tool for comparison of benchmark results (#7974)
This commit is contained in:
@@ -10,6 +10,7 @@ import numpy as np
|
||||
from sglang.api import set_default_backend
|
||||
from sglang.test.test_utils import (
|
||||
add_common_sglang_args_and_parse,
|
||||
dump_bench_raw_result,
|
||||
select_sglang_backend,
|
||||
)
|
||||
from sglang.utils import download_and_cache_file, dump_state_text, read_jsonl
|
||||
@@ -115,6 +116,12 @@ def main(args):
|
||||
|
||||
# Dump results
|
||||
dump_state_text(f"tmp_output_{args.backend}.txt", states)
|
||||
dump_bench_raw_result(
|
||||
path=args.raw_result_file,
|
||||
states=states,
|
||||
preds=preds,
|
||||
labels=labels,
|
||||
)
|
||||
|
||||
with open(args.result_file, "a") as fout:
|
||||
value = {
|
||||
|
||||
@@ -9,6 +9,7 @@ import tiktoken
|
||||
|
||||
from sglang.test.test_utils import (
|
||||
add_common_sglang_args_and_parse,
|
||||
dump_bench_raw_result,
|
||||
select_sglang_backend,
|
||||
)
|
||||
|
||||
@@ -142,6 +143,13 @@ def main(args):
|
||||
assert pt == len(cors)
|
||||
weighted_acc = np.mean(cors)
|
||||
|
||||
dump_bench_raw_result(
|
||||
path=args.raw_result_file,
|
||||
states=states,
|
||||
preds=preds,
|
||||
labels=labels,
|
||||
)
|
||||
|
||||
# Print results
|
||||
print("Total latency: {:.3f}".format(latency))
|
||||
print("Average accuracy: {:.3f}".format(weighted_acc))
|
||||
|
||||
Reference in New Issue
Block a user