Minor tool for comparison of benchmark results (#7974)
This commit is contained in:
@@ -15,6 +15,7 @@ import unittest
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from dataclasses import dataclass
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from typing import Awaitable, Callable, List, Optional, Tuple
|
||||
|
||||
@@ -27,6 +28,7 @@ from sglang.bench_serving import run_benchmark
|
||||
from sglang.global_config import global_config
|
||||
from sglang.lang.backend.openai import OpenAI
|
||||
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
|
||||
from sglang.lang.interpreter import ProgramState
|
||||
from sglang.srt.utils import (
|
||||
get_bool_env_var,
|
||||
get_device,
|
||||
@@ -348,6 +350,7 @@ def add_common_sglang_args_and_parse(parser: argparse.ArgumentParser):
|
||||
help="Device type (auto/cuda/rocm/cpu). Auto will detect available platforms",
|
||||
)
|
||||
parser.add_argument("--result-file", type=str, default="result.jsonl")
|
||||
parser.add_argument("--raw-result-file", type=str)
|
||||
args = parser.parse_args()
|
||||
|
||||
return args
|
||||
@@ -1309,3 +1312,35 @@ class CustomTestCase(unittest.TestCase):
|
||||
lambda: super(CustomTestCase, self)._callTestMethod(method),
|
||||
max_retry=max_retry,
|
||||
)
|
||||
|
||||
|
||||
def dump_bench_raw_result(
|
||||
path: str,
|
||||
states,
|
||||
preds,
|
||||
labels,
|
||||
):
|
||||
if not path:
|
||||
return
|
||||
|
||||
rows = []
|
||||
for i in range(len(states)):
|
||||
state = states[i]
|
||||
output = state["answer"]
|
||||
prompt = _ensure_remove_suffix(state.text(), output)
|
||||
rows.append(
|
||||
dict(
|
||||
prompt_id=i,
|
||||
prompt=prompt,
|
||||
output=output,
|
||||
correct=bool(preds[i] == labels[i]),
|
||||
)
|
||||
)
|
||||
|
||||
print(f"BenchRawResultDumper save results to {path}")
|
||||
Path(path).write_text("\n".join(json.dumps(row) for row in rows))
|
||||
|
||||
|
||||
def _ensure_remove_suffix(text: str, suffix: str):
|
||||
assert text.endswith(suffix)
|
||||
return text.removesuffix(suffix)
|
||||
|
||||
Reference in New Issue
Block a user