Fix warmup in bench_offline_throughput.py (#2449)

2024-12-11 06:16:01 -08:00
parent 959735fc9e
commit f8548295d6
1 changed files with 5 additions and 5 deletions
--- a/python/sglang/bench_offline_throughput.py
+++ b/python/sglang/bench_offline_throughput.py
@@ -201,18 +201,17 @@ def throughput_test_once(
        for r in reqs
    ]

-    st = time.perf_counter()
    if profile:
        backend.start_profile()

+    st = time.perf_counter()
    gen_out = backend.generate(prompt=prompt, sampling_params=sampling_params)
+    latency = time.perf_counter() - st

    if profile:
        backend.stop_profile()
        monitor_trace_file(os.getenv("SGLANG_TORCH_PROFILER_DIR"))

-    latency = time.perf_counter() - st
-
    if backend_name == "runtime":
        gen_out = json.loads(gen_out)

@@ -304,8 +303,8 @@ def throughput_test(
    warmup_requests = sample_random_requests(
        input_len=256,
        output_len=16,
-        num_prompts=16,
-        range_ratio=0.8,
+        num_prompts=min(bench_args.num_prompts, 16),
+        range_ratio=1.0,
        tokenizer=tokenizer,
        dataset_path=bench_args.dataset_path,
    )
@@ -321,6 +320,7 @@ def throughput_test(
            extra_request_body=extra_request_body,
            profile=False,
        )
+        time.sleep(0.5)

    logging.info("\nBenchmark...")
    result = throughput_test_once(