Fix warmup in bench_offline_throughput.py (#2449)

2024-12-11 06:16:01 -08:00
parent 959735fc9e
commit f8548295d6
1 changed files with 5 additions and 5 deletions
--- a/python/sglang/bench_offline_throughput.py
+++ b/python/sglang/bench_offline_throughput.py
@@ -201,18 +201,17 @@ def throughput_test_once(
        for r in reqs
    ]
    st = time.perf_counter()
    if profile:
        backend.start_profile()
    st = time.perf_counter()
    gen_out = backend.generate(prompt=prompt, sampling_params=sampling_params)
    latency = time.perf_counter() - st
    if profile:
        backend.stop_profile()
        monitor_trace_file(os.getenv("SGLANG_TORCH_PROFILER_DIR"))
    latency = time.perf_counter() - st
    if backend_name == "runtime":
        gen_out = json.loads(gen_out)
@@ -304,8 +303,8 @@ def throughput_test(
    warmup_requests = sample_random_requests(
        input_len=256,
        output_len=16,
-        num_prompts=16,
+        num_prompts=min(bench_args.num_prompts, 16),
-        range_ratio=0.8,
+        range_ratio=1.0,
        tokenizer=tokenizer,
        dataset_path=bench_args.dataset_path,
    )
@@ -321,6 +320,7 @@ def throughput_test(
            extra_request_body=extra_request_body,
            profile=False,
        )
        time.sleep(0.5)
    logging.info("\nBenchmark...")
    result = throughput_test_once(