From 977f785dad98540f01bca34abe6c6fd326fd6a7c Mon Sep 17 00:00:00 2001 From: mlmz <54172054+minleminzui@users.noreply.github.com> Date: Wed, 8 Jan 2025 16:02:59 +0800 Subject: [PATCH] Docs: Rewrite docs for LLama 405B and ModelSpace (#2773) Co-authored-by: Chayenne --- docs/backend/server_arguments.md | 43 -------------------------------- docs/index.rst | 2 ++ docs/references/llama_405B.md | 16 ++++++++++++ docs/references/modelscope.md | 28 +++++++++++++++++++++ 4 files changed, 46 insertions(+), 43 deletions(-) create mode 100644 docs/references/llama_405B.md create mode 100644 docs/references/modelscope.md diff --git a/docs/backend/server_arguments.md b/docs/backend/server_arguments.md index a4913b8af..fcee7f88d 100644 --- a/docs/backend/server_arguments.md +++ b/docs/backend/server_arguments.md @@ -32,46 +32,3 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --tp 4 --nccl-init sgl-dev-0:50000 --nnodes 2 --node-rank 1 ``` -## Use Models From ModelScope -
-More - -To use a model from [ModelScope](https://www.modelscope.cn), set the environment variable SGLANG_USE_MODELSCOPE. -``` -export SGLANG_USE_MODELSCOPE=true -``` -Launch [Qwen2-7B-Instruct](https://www.modelscope.cn/models/qwen/qwen2-7b-instruct) Server -``` -SGLANG_USE_MODELSCOPE=true python -m sglang.launch_server --model-path qwen/Qwen2-7B-Instruct --port 30000 -``` - -Or start it by docker. -```bash -docker run --gpus all \ - -p 30000:30000 \ - -v ~/.cache/modelscope:/root/.cache/modelscope \ - --env "SGLANG_USE_MODELSCOPE=true" \ - --ipc=host \ - lmsysorg/sglang:latest \ - python3 -m sglang.launch_server --model-path Qwen/Qwen2.5-7B-Instruct --host 0.0.0.0 --port 30000 -``` - -
- -## Example: Run Llama 3.1 405B -
-More - -```bash -# Run 405B (fp8) on a single node -python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct-FP8 --tp 8 - -# Run 405B (fp16) on two nodes -## on the first node, replace the `172.16.4.52:20000` with your own first node ip address and port -python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 0 - -## on the first node, replace the `172.16.4.52:20000` with your own first node ip address and port -python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 1 -``` - -
diff --git a/docs/index.rst b/docs/index.rst index 80a53d1cb..414116189 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -60,3 +60,5 @@ The core features include: references/troubleshooting.md references/faq.md references/learn_more.md + references/llama_405B.md + references/modelscope.md diff --git a/docs/references/llama_405B.md b/docs/references/llama_405B.md new file mode 100644 index 000000000..3383d1625 --- /dev/null +++ b/docs/references/llama_405B.md @@ -0,0 +1,16 @@ +# Example: Run Llama 3.1 405B + +```bash +# Run 405B (fp8) on a single node +python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct-FP8 --tp 8 +``` + +```bash +# Run 405B (fp16) on two nodes +## on the first node, replace the `172.16.4.52:20000` with your own first node ip address and port +python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 0 + +## on the first node, replace the `172.16.4.52:20000` with your own first node ip address and port +python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 1 +``` + diff --git a/docs/references/modelscope.md b/docs/references/modelscope.md new file mode 100644 index 000000000..ad7b6151b --- /dev/null +++ b/docs/references/modelscope.md @@ -0,0 +1,28 @@ +# Use Models From ModelScope + +To use a model from [ModelScope](https://www.modelscope.cn), set the environment variable `SGLANG_USE_MODELSCOPE`. + +```bash +export SGLANG_USE_MODELSCOPE=true +``` + +We take [Qwen2-7B-Instruct](https://www.modelscope.cn/models/qwen/qwen2-7b-instruct) as an example. Launch the Server: +--- + +```bash +python -m sglang.launch_server --model-path qwen/Qwen2-7B-Instruct --port 30000 +``` + +Or start it by docker: + +```bash +docker run --gpus all \ + -p 30000:30000 \ + -v ~/.cache/modelscope:/root/.cache/modelscope \ + --env "SGLANG_USE_MODELSCOPE=true" \ + --ipc=host \ + lmsysorg/sglang:latest \ + python3 -m sglang.launch_server --model-path Qwen/Qwen2.5-7B-Instruct --host 0.0.0.0 --port 30000 +``` + +Note that modelscope uses a different cache directory than huggingface. You may need to set it manually to avoid running out of disk space.