Fix warmup in bench_offline_throughput.py (#2449)
This commit is contained in:
@@ -201,18 +201,17 @@ def throughput_test_once(
|
|||||||
for r in reqs
|
for r in reqs
|
||||||
]
|
]
|
||||||
|
|
||||||
st = time.perf_counter()
|
|
||||||
if profile:
|
if profile:
|
||||||
backend.start_profile()
|
backend.start_profile()
|
||||||
|
|
||||||
|
st = time.perf_counter()
|
||||||
gen_out = backend.generate(prompt=prompt, sampling_params=sampling_params)
|
gen_out = backend.generate(prompt=prompt, sampling_params=sampling_params)
|
||||||
|
latency = time.perf_counter() - st
|
||||||
|
|
||||||
if profile:
|
if profile:
|
||||||
backend.stop_profile()
|
backend.stop_profile()
|
||||||
monitor_trace_file(os.getenv("SGLANG_TORCH_PROFILER_DIR"))
|
monitor_trace_file(os.getenv("SGLANG_TORCH_PROFILER_DIR"))
|
||||||
|
|
||||||
latency = time.perf_counter() - st
|
|
||||||
|
|
||||||
if backend_name == "runtime":
|
if backend_name == "runtime":
|
||||||
gen_out = json.loads(gen_out)
|
gen_out = json.loads(gen_out)
|
||||||
|
|
||||||
@@ -304,8 +303,8 @@ def throughput_test(
|
|||||||
warmup_requests = sample_random_requests(
|
warmup_requests = sample_random_requests(
|
||||||
input_len=256,
|
input_len=256,
|
||||||
output_len=16,
|
output_len=16,
|
||||||
num_prompts=16,
|
num_prompts=min(bench_args.num_prompts, 16),
|
||||||
range_ratio=0.8,
|
range_ratio=1.0,
|
||||||
tokenizer=tokenizer,
|
tokenizer=tokenizer,
|
||||||
dataset_path=bench_args.dataset_path,
|
dataset_path=bench_args.dataset_path,
|
||||||
)
|
)
|
||||||
@@ -321,6 +320,7 @@ def throughput_test(
|
|||||||
extra_request_body=extra_request_body,
|
extra_request_body=extra_request_body,
|
||||||
profile=False,
|
profile=False,
|
||||||
)
|
)
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
logging.info("\nBenchmark...")
|
logging.info("\nBenchmark...")
|
||||||
result = throughput_test_once(
|
result = throughput_test_once(
|
||||||
|
|||||||
Reference in New Issue
Block a user