diff --git a/docs/backend/server_arguments.md b/docs/backend/server_arguments.md index fcee7f88d..90b36a0bd 100644 --- a/docs/backend/server_arguments.md +++ b/docs/backend/server_arguments.md @@ -31,4 +31,3 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct # Node 1 python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --tp 4 --nccl-init sgl-dev-0:50000 --nnodes 2 --node-rank 1 ``` - diff --git a/docs/index.rst b/docs/index.rst index 414116189..ff104808c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -56,9 +56,9 @@ The core features include: references/hyperparameter_tuning.md references/benchmark_and_profiling.md references/custom_chat_template.md + references/llama_405B.md + references/modelscope.md references/contribution_guide.md references/troubleshooting.md references/faq.md references/learn_more.md - references/llama_405B.md - references/modelscope.md diff --git a/docs/references/llama_405B.md b/docs/references/llama_405B.md index 3383d1625..4f70e89f6 100644 --- a/docs/references/llama_405B.md +++ b/docs/references/llama_405B.md @@ -1,16 +1,19 @@ -# Example: Run Llama 3.1 405B +# Run Llama 3.1 405B + +## Run 405B (fp8) on a Single Node ```bash -# Run 405B (fp8) on a single node python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct-FP8 --tp 8 ``` +## Run 405B (fp16) on Two Nodes + ```bash -# Run 405B (fp16) on two nodes -## on the first node, replace the `172.16.4.52:20000` with your own first node ip address and port +# on the first node, replace 172.16.4.52:20000 with your own node ip address and port + python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 0 -## on the first node, replace the `172.16.4.52:20000` with your own first node ip address and port -python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 1 -``` +# on the second node, replace 172.18.45.52:20000 with your own node ip address and port +python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.18.45.52:20000 --nnodes 2 --node-rank 1 +```