Updated Instructions on Profiling SGLang Infer System with AMD GPUs (#1966)

Co-authored-by: wunhuang <wunhuang@amd.com>
2024-11-08 23:19:03 -08:00
parent e3126e3c5f
commit d1150e9a00
10 changed files with 729 additions and 1 deletions
--- a/3rdparty/amd/profiling/server.sh
+++ b/3rdparty/amd/profiling/server.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# export SGLANG_TORCH_PROFILER_DIR=/data/sglang/
+export SGLANG_TORCH_PROFILER_DIR=/sgl-workspace/sglang/profile/
+
+# Get the current timestamp
+TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
+
+# Define the log file with a timestamp
+LOGFILE="sglang_server_log_$TIMESTAMP.json"
+
+# Run the Python command and save the output to the log file
+loadTracer.sh python3 -m sglang.launch_server \
+    --model-path /sgl-workspace/sglang/dummy_grok1 \
+    --tokenizer-path Xenova/grok-1-tokenizer \
+    --load-format dummy \
+    --quant fp8 \
+    --tp 8 \
+    --port 30000 \
+    --disable-radix-cache 2>&1 | tee "$LOGFILE"