Offline LLM Engine Benchmark Throughput (#1968)

Co-authored-by: ByronHsu <byronhsu1230@gmail.com>
This commit is contained in:
zolinthecow
2024-11-14 21:59:33 -08:00
committed by GitHub
parent ea53c63bad
commit f6dd648620
4 changed files with 358 additions and 31 deletions

View File

@@ -11,7 +11,9 @@ from types import SimpleNamespace
import torch
import sglang as sgl
from sglang.bench_offline_throughput import BenchArgs, throughput_test
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.server_args import ServerArgs
from sglang.test.few_shot_gsm8k_engine import run_eval
from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
@@ -152,6 +154,14 @@ class TestSRTEngine(unittest.TestCase):
self.assertTrue(torch.allclose(out1, out2, atol=1e-5, rtol=1e-3))
def test_7_engine_offline_throughput(self):
server_args = ServerArgs(
model_path=DEFAULT_MODEL_NAME_FOR_TEST,
)
bench_args = BenchArgs(num_prompts=100)
result = throughput_test(server_args=server_args, bench_args=bench_args)
self.assertTrue(result["total_throughput"] > 3000)
if __name__ == "__main__":
unittest.main()