Minor tool for comparison of benchmark results (#7974)

This commit is contained in:
fzyzcjy
2025-07-27 15:27:50 +08:00
committed by GitHub
parent ed0fdbf35b
commit 62222bd27e
4 changed files with 222 additions and 0 deletions

View File

@@ -9,6 +9,7 @@ import tiktoken
from sglang.test.test_utils import (
add_common_sglang_args_and_parse,
dump_bench_raw_result,
select_sglang_backend,
)
@@ -142,6 +143,13 @@ def main(args):
assert pt == len(cors)
weighted_acc = np.mean(cors)
dump_bench_raw_result(
path=args.raw_result_file,
states=states,
preds=preds,
labels=labels,
)
# Print results
print("Total latency: {:.3f}".format(latency))
print("Average accuracy: {:.3f}".format(weighted_acc))