From 977f785dad98540f01bca34abe6c6fd326fd6a7c Mon Sep 17 00:00:00 2001
From: mlmz <54172054+minleminzui@users.noreply.github.com>
Date: Wed, 8 Jan 2025 16:02:59 +0800
Subject: [PATCH] Docs: Rewrite docs for LLama 405B and ModelSpace (#2773)

Co-authored-by: Chayenne <zhaochen20@outlook.com>
---
 docs/backend/server_arguments.md | 43 --------------------------------
 docs/index.rst                   |  2 ++
 docs/references/llama_405B.md    | 16 ++++++++++++
 docs/references/modelscope.md    | 28 +++++++++++++++++++++
 4 files changed, 46 insertions(+), 43 deletions(-)
 create mode 100644 docs/references/llama_405B.md
 create mode 100644 docs/references/modelscope.md
diff --git a/docs/backend/server_arguments.md b/docs/backend/server_arguments.md
index a4913b8af..fcee7f88d 100644
--- a/docs/backend/server_arguments.md
+++ b/docs/backend/server_arguments.md
@@ -32,46 +32,3 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
 python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --tp 4 --nccl-init sgl-dev-0:50000 --nnodes 2 --node-rank 1
 ```
 
-## Use Models From ModelScope
-<details>
-<summary>More</summary>
-
-To use a model from [ModelScope](https://www.modelscope.cn), set the environment variable SGLANG_USE_MODELSCOPE.
-```
-export SGLANG_USE_MODELSCOPE=true
-```
-Launch [Qwen2-7B-Instruct](https://www.modelscope.cn/models/qwen/qwen2-7b-instruct) Server
-```
-SGLANG_USE_MODELSCOPE=true python -m sglang.launch_server --model-path qwen/Qwen2-7B-Instruct --port 30000
-```
-
-Or start it by docker.
-```bash
-docker run --gpus all \
-    -p 30000:30000 \
-    -v ~/.cache/modelscope:/root/.cache/modelscope \
-    --env "SGLANG_USE_MODELSCOPE=true" \
-    --ipc=host \
-    lmsysorg/sglang:latest \
-    python3 -m sglang.launch_server --model-path Qwen/Qwen2.5-7B-Instruct --host 0.0.0.0 --port 30000
-```
-
-</details>
-
-## Example: Run Llama 3.1 405B
-<details>
-<summary>More</summary>
-
-```bash
-# Run 405B (fp8) on a single node
-python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct-FP8 --tp 8
-
-# Run 405B (fp16) on two nodes
-## on the first node, replace the `172.16.4.52:20000` with your own first node ip address and port
-python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 0
-
-## on the first node, replace the `172.16.4.52:20000` with your own first node ip address and port
-python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 1
-```
-
-</details>
diff --git a/docs/index.rst b/docs/index.rst
index 80a53d1cb..414116189 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -60,3 +60,5 @@ The core features include:
    references/troubleshooting.md
    references/faq.md
    references/learn_more.md
+   references/llama_405B.md
+   references/modelscope.md
diff --git a/docs/references/llama_405B.md b/docs/references/llama_405B.md
new file mode 100644
index 000000000..3383d1625
--- /dev/null
+++ b/docs/references/llama_405B.md
@@ -0,0 +1,16 @@
+# Example: Run Llama 3.1 405B
+
+```bash
+# Run 405B (fp8) on a single node
+python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct-FP8 --tp 8
+```
+
+```bash
+# Run 405B (fp16) on two nodes
+## on the first node, replace the `172.16.4.52:20000` with your own first node ip address and port
+python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 0
+
+## on the first node, replace the `172.16.4.52:20000` with your own first node ip address and port
+python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 1
+```
+
diff --git a/docs/references/modelscope.md b/docs/references/modelscope.md
new file mode 100644
index 000000000..ad7b6151b
--- /dev/null
+++ b/docs/references/modelscope.md
@@ -0,0 +1,28 @@
+# Use Models From ModelScope
+
+To use a model from [ModelScope](https://www.modelscope.cn), set the environment variable `SGLANG_USE_MODELSCOPE`.
+
+```bash
+export SGLANG_USE_MODELSCOPE=true
+```
+
+We take [Qwen2-7B-Instruct](https://www.modelscope.cn/models/qwen/qwen2-7b-instruct) as an example. Launch the Server:
+---
+
+```bash
+python -m sglang.launch_server --model-path qwen/Qwen2-7B-Instruct --port 30000
+```
+
+Or start it by docker:
+
+```bash
+docker run --gpus all \
+    -p 30000:30000 \
+    -v ~/.cache/modelscope:/root/.cache/modelscope \
+    --env "SGLANG_USE_MODELSCOPE=true" \
+    --ipc=host \
+    lmsysorg/sglang:latest \
+    python3 -m sglang.launch_server --model-path Qwen/Qwen2.5-7B-Instruct --host 0.0.0.0 --port 30000
+```
+
+Note that modelscope uses a different cache directory than huggingface. You may need to set it manually to avoid running out of disk space.