From 2d3ae4e1258791a04a28279044359c08c16af99e Mon Sep 17 00:00:00 2001
From: Yineng Zhang <me@zhyncs.com>
Date: Thu, 25 Jul 2024 00:03:17 +1000
Subject: [PATCH] docs: update doc (#713)

---
 benchmark/blog_v0_2/README.md | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/benchmark/blog_v0_2/README.md b/benchmark/blog_v0_2/README.md
index c2f1b7822..a718bbd4a 100644
--- a/benchmark/blog_v0_2/README.md
+++ b/benchmark/blog_v0_2/README.md
@@ -29,6 +29,9 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruc
 
 # Meta-Llama-3.1-70B-Instruct
 python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-70B-Instruct --disable-radix-cache --tp 8
+
+# Meta-Llama-3-70B-Instruct-FP8
+python -m sglang.launch_server --model-path neuralmagic/Meta-Llama-3-70B-Instruct-FP8 --disable-radix-cache --tp 8
 ```
 
 ## Benchmark
@@ -59,19 +62,19 @@ cat sglang_offline_benchmark.jsonl | cut -d':' -f12 | cut -d',' -f1
 #### Online benchmark
 
 ```bash
-# Random dataset, Input [1024, 4096], Output [256, 1024], request rate 1, num prompts 300
+# Random dataset, Input [512, 4096], Output [128, 1024], request rate 1, num prompts 300
 python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-input 4096 --random-output 1024 --random-range-ratio 0.125 --num-prompts 300 --request-rate 1 --output-file sglang_online_benchmark.jsonl
 
-# Random dataset, Input [1024, 4096], Output [256, 1024], request rate 2, num prompts 600
+# Random dataset, Input [512, 4096], Output [128, 1024], request rate 2, num prompts 600
 python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-input 4096 --random-output 1024 --random-range-ratio 0.125 --num-prompts 600 --request-rate 2 --output-file sglang_online_benchmark.jsonl
 
-# Random dataset, Input [1024, 4096], Output [256, 1024], request rate 4, num prompts 1200
+# Random dataset, Input [512, 4096], Output [128, 1024], request rate 4, num prompts 1200
 python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-input 4096 --random-output 1024 --random-range-ratio 0.125 --num-prompts 1200 --request-rate 4 --output-file sglang_online_benchmark.jsonl
 
-# Random dataset, Input [1024, 4096], Output [256, 1024], request rate 8, num prompts 2400
+# Random dataset, Input [512, 4096], Output [128, 1024], request rate 8, num prompts 2400
 python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-input 4096 --random-output 1024 --random-range-ratio 0.125 --num-prompts 2400 --request-rate 8 --output-file sglang_online_benchmark.jsonl
 
-# Random dataset, Input [1024, 4096], Output [256, 1024], request rate 16, num prompts 3200
+# Random dataset, Input [512, 4096], Output [128, 1024], request rate 16, num prompts 3200
 python3 -m sglang.bench_serving --backend sglang --dataset-name random --random-input 4096 --random-output 1024 --random-range-ratio 0.125 --num-prompts 3200 --request-rate 16 --output-file sglang_online_benchmark.jsonl
 
 # get median e2e latency