Support outputing details for bench_serving (#6107)
This commit is contained in:
@@ -1380,21 +1380,24 @@ async def benchmark(
|
|||||||
else:
|
else:
|
||||||
output_file_name = f"{args.backend}_{now}_{args.num_prompts}_sharegpt.jsonl"
|
output_file_name = f"{args.backend}_{now}_{args.num_prompts}_sharegpt.jsonl"
|
||||||
|
|
||||||
|
result_details = {
|
||||||
|
"input_lens": [output.prompt_len for output in outputs],
|
||||||
|
"output_lens": output_lens,
|
||||||
|
"ttfts": [output.ttft for output in outputs],
|
||||||
|
"itls": [output.itl for output in outputs],
|
||||||
|
"generated_texts": [output.generated_text for output in outputs],
|
||||||
|
"errors": [output.error for output in outputs],
|
||||||
|
}
|
||||||
|
|
||||||
# Append results to a JSONL file
|
# Append results to a JSONL file
|
||||||
with open(output_file_name, "a") as file:
|
with open(output_file_name, "a") as file:
|
||||||
file.write(json.dumps(result) + "\n")
|
if args.output_details:
|
||||||
|
result_for_dump = result | result_details
|
||||||
|
else:
|
||||||
|
result_for_dump = result
|
||||||
|
file.write(json.dumps(result_for_dump) + "\n")
|
||||||
|
|
||||||
result.update(
|
return result | result_details
|
||||||
{
|
|
||||||
"input_lens": [output.prompt_len for output in outputs],
|
|
||||||
"output_lens": output_lens,
|
|
||||||
"ttfts": [output.ttft for output in outputs],
|
|
||||||
"itls": [output.itl for output in outputs],
|
|
||||||
"generated_texts": [output.generated_text for output in outputs],
|
|
||||||
"errors": [output.error for output in outputs],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def check_chat_template(model_path):
|
def check_chat_template(model_path):
|
||||||
@@ -1424,6 +1427,9 @@ def run_benchmark(args_: argparse.Namespace):
|
|||||||
if not hasattr(args, "warmup_requests"):
|
if not hasattr(args, "warmup_requests"):
|
||||||
args.warmup_requests = 1
|
args.warmup_requests = 1
|
||||||
|
|
||||||
|
if not hasattr(args, "output_details"):
|
||||||
|
args.output_details = False
|
||||||
|
|
||||||
print(f"benchmark_args={args}")
|
print(f"benchmark_args={args}")
|
||||||
|
|
||||||
# Set global environments
|
# Set global environments
|
||||||
@@ -1668,6 +1674,9 @@ if __name__ == "__main__":
|
|||||||
"if the server is not processing requests fast enough to keep up.",
|
"if the server is not processing requests fast enough to keep up.",
|
||||||
)
|
)
|
||||||
parser.add_argument("--output-file", type=str, help="Output JSONL file name.")
|
parser.add_argument("--output-file", type=str, help="Output JSONL file name.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--output-details", action="store_true", help="Output details of benchmarking."
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--disable-tqdm",
|
"--disable-tqdm",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
|
|||||||
Reference in New Issue
Block a user