From b54b5a96e4e94e2bd5b7e18bb006a96fe85bfef5 Mon Sep 17 00:00:00 2001 From: Baizhou Zhang Date: Sun, 20 Apr 2025 14:05:36 -0700 Subject: [PATCH] [Doc]Add instruction for profiling with bench_one_batch (#5581) --- docs/references/benchmark_and_profiling.md | 7 ++++++- python/sglang/bench_one_batch.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/references/benchmark_and_profiling.md b/docs/references/benchmark_and_profiling.md index 67daa8db5..98c67fd4d 100644 --- a/docs/references/benchmark_and_profiling.md +++ b/docs/references/benchmark_and_profiling.md @@ -41,9 +41,14 @@ Please make sure that the `SGLANG_TORCH_PROFILER_DIR` should be set at both server and client side, otherwise the trace file cannot be generated correctly . A secure way will be setting `SGLANG_TORCH_PROFILER_DIR` in the `.*rc` file of shell (e.g. `~/.bashrc` for bash shells). - To profile offline - ```bash export SGLANG_TORCH_PROFILER_DIR=/root/sglang/profile_log + + # profile one batch with bench_one_batch.py + # batch size can be controlled with --batch argument + python3 -m sglang.bench_one_batch --model-path meta-llama/Llama-3.1-8B-Instruct --batch 32 --input-len 1024 --output-len 10 --profile + + # profile multiple batches with bench_offline_throughput.py python -m sglang.bench_offline_throughput --model-path meta-llama/Llama-3.1-8B-Instruct --dataset-name random --num-prompts 10 --profile --mem-frac=0.8 ``` diff --git a/python/sglang/bench_one_batch.py b/python/sglang/bench_one_batch.py index acf1520ed..0c492626e 100644 --- a/python/sglang/bench_one_batch.py +++ b/python/sglang/bench_one_batch.py @@ -396,7 +396,7 @@ def latency_test_run_once( decode_latencies.append(latency) if i < 5: rank_print( - f"Decode. latency: {latency:6.5f} s, throughput: {throughput:9.2f} token/s" + f"Decode. Batch size: {batch_size}, latency: {latency:6.5f} s, throughput: {throughput:9.2f} token/s" ) if profile: