diff --git a/python/sglang/bench_serving.py b/python/sglang/bench_serving.py index a35dfe500..6e6f2af12 100644 --- a/python/sglang/bench_serving.py +++ b/python/sglang/bench_serving.py @@ -1380,21 +1380,24 @@ async def benchmark( else: output_file_name = f"{args.backend}_{now}_{args.num_prompts}_sharegpt.jsonl" + result_details = { + "input_lens": [output.prompt_len for output in outputs], + "output_lens": output_lens, + "ttfts": [output.ttft for output in outputs], + "itls": [output.itl for output in outputs], + "generated_texts": [output.generated_text for output in outputs], + "errors": [output.error for output in outputs], + } + # Append results to a JSONL file with open(output_file_name, "a") as file: - file.write(json.dumps(result) + "\n") + if args.output_details: + result_for_dump = result | result_details + else: + result_for_dump = result + file.write(json.dumps(result_for_dump) + "\n") - result.update( - { - "input_lens": [output.prompt_len for output in outputs], - "output_lens": output_lens, - "ttfts": [output.ttft for output in outputs], - "itls": [output.itl for output in outputs], - "generated_texts": [output.generated_text for output in outputs], - "errors": [output.error for output in outputs], - } - ) - return result + return result | result_details def check_chat_template(model_path): @@ -1424,6 +1427,9 @@ def run_benchmark(args_: argparse.Namespace): if not hasattr(args, "warmup_requests"): args.warmup_requests = 1 + if not hasattr(args, "output_details"): + args.output_details = False + print(f"benchmark_args={args}") # Set global environments @@ -1668,6 +1674,9 @@ if __name__ == "__main__": "if the server is not processing requests fast enough to keep up.", ) parser.add_argument("--output-file", type=str, help="Output JSONL file name.") + parser.add_argument( + "--output-details", action="store_true", help="Output details of benchmarking." + ) parser.add_argument( "--disable-tqdm", action="store_true",