Fix warmup in bench_offline_throughput.py (#2449)
This commit is contained in:
@@ -201,18 +201,17 @@ def throughput_test_once(
|
||||
for r in reqs
|
||||
]
|
||||
|
||||
st = time.perf_counter()
|
||||
if profile:
|
||||
backend.start_profile()
|
||||
|
||||
st = time.perf_counter()
|
||||
gen_out = backend.generate(prompt=prompt, sampling_params=sampling_params)
|
||||
latency = time.perf_counter() - st
|
||||
|
||||
if profile:
|
||||
backend.stop_profile()
|
||||
monitor_trace_file(os.getenv("SGLANG_TORCH_PROFILER_DIR"))
|
||||
|
||||
latency = time.perf_counter() - st
|
||||
|
||||
if backend_name == "runtime":
|
||||
gen_out = json.loads(gen_out)
|
||||
|
||||
@@ -304,8 +303,8 @@ def throughput_test(
|
||||
warmup_requests = sample_random_requests(
|
||||
input_len=256,
|
||||
output_len=16,
|
||||
num_prompts=16,
|
||||
range_ratio=0.8,
|
||||
num_prompts=min(bench_args.num_prompts, 16),
|
||||
range_ratio=1.0,
|
||||
tokenizer=tokenizer,
|
||||
dataset_path=bench_args.dataset_path,
|
||||
)
|
||||
@@ -321,6 +320,7 @@ def throughput_test(
|
||||
extra_request_body=extra_request_body,
|
||||
profile=False,
|
||||
)
|
||||
time.sleep(0.5)
|
||||
|
||||
logging.info("\nBenchmark...")
|
||||
result = throughput_test_once(
|
||||
|
||||
Reference in New Issue
Block a user