Minor tool for comparison of benchmark results (#7974)
This commit is contained in:
@@ -9,6 +9,7 @@ import tiktoken
|
||||
|
||||
from sglang.test.test_utils import (
|
||||
add_common_sglang_args_and_parse,
|
||||
dump_bench_raw_result,
|
||||
select_sglang_backend,
|
||||
)
|
||||
|
||||
@@ -142,6 +143,13 @@ def main(args):
|
||||
assert pt == len(cors)
|
||||
weighted_acc = np.mean(cors)
|
||||
|
||||
dump_bench_raw_result(
|
||||
path=args.raw_result_file,
|
||||
states=states,
|
||||
preds=preds,
|
||||
labels=labels,
|
||||
)
|
||||
|
||||
# Print results
|
||||
print("Total latency: {:.3f}".format(latency))
|
||||
print("Average accuracy: {:.3f}".format(weighted_acc))
|
||||
|
||||
Reference in New Issue
Block a user