Minor tool for comparison of benchmark results (#7974)

This commit is contained in:
fzyzcjy
2025-07-27 15:27:50 +08:00
committed by GitHub
parent ed0fdbf35b
commit 62222bd27e
4 changed files with 222 additions and 0 deletions

View File

@@ -10,6 +10,7 @@ import numpy as np
from sglang.api import set_default_backend
from sglang.test.test_utils import (
add_common_sglang_args_and_parse,
dump_bench_raw_result,
select_sglang_backend,
)
from sglang.utils import download_and_cache_file, dump_state_text, read_jsonl
@@ -115,6 +116,12 @@ def main(args):
# Dump results
dump_state_text(f"tmp_output_{args.backend}.txt", states)
dump_bench_raw_result(
path=args.raw_result_file,
states=states,
preds=preds,
labels=labels,
)
with open(args.result_file, "a") as fout:
value = {

View File

@@ -9,6 +9,7 @@ import tiktoken
from sglang.test.test_utils import (
add_common_sglang_args_and_parse,
dump_bench_raw_result,
select_sglang_backend,
)
@@ -142,6 +143,13 @@ def main(args):
assert pt == len(cors)
weighted_acc = np.mean(cors)
dump_bench_raw_result(
path=args.raw_result_file,
states=states,
preds=preds,
labels=labels,
)
# Print results
print("Total latency: {:.3f}".format(latency))
print("Average accuracy: {:.3f}".format(weighted_acc))