Fix formatting in long code blocks (#10528)

This commit is contained in:
Philip Kiely - Baseten
2025-09-16 12:02:05 -07:00
committed by GitHub
parent 0abb41c70d
commit 7f028b07c4
11 changed files with 345 additions and 50 deletions

View File

@@ -34,22 +34,88 @@ uv pip install mooncake-transfer-engine
### Llama Single Node
```bash
$ python -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --disaggregation-mode prefill --disaggregation-ib-device mlx5_roce0
$ python -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --disaggregation-mode decode --port 30001 --base-gpu-id 1 --disaggregation-ib-device mlx5_roce0
$ python -m sglang_router.launch_router --pd-disaggregation --prefill http://127.0.0.1:30000 --decode http://127.0.0.1:30001 --host 0.0.0.0 --port 8000
python -m sglang.launch_server \
--model-path meta-llama/Llama-3.1-8B-Instruct \
--disaggregation-mode prefill \
--disaggregation-ib-device mlx5_roce0
python -m sglang.launch_server \
--model-path meta-llama/Llama-3.1-8B-Instruct \
--disaggregation-mode decode \
--port 30001 \
--base-gpu-id 1 \
--disaggregation-ib-device mlx5_roce0
python -m sglang_router.launch_router --pd-disaggregation --prefill http://127.0.0.1:30000 --decode http://127.0.0.1:30001 --host 0.0.0.0 --port 8000
```
### DeepSeek Multi-Node
```bash
# prefill 0
$ python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3-0324 --disaggregation-ib-device ${device_name} --disaggregation-mode prefill --host ${local_ip} --port 30000 --trust-remote-code --dist-init-addr ${prefill_master_ip}:5000 --nnodes 2 --node-rank 0 --tp-size 16 --dp-size 8 --enable-dp-attention --moe-a2a-backend deepep --mem-fraction-static 0.8
python -m sglang.launch_server \
--model-path deepseek-ai/DeepSeek-V3-0324 \
--disaggregation-ib-device ${device_name} \
--disaggregation-mode prefill \
--host ${local_ip} \
--port 30000 \
--trust-remote-code \
--dist-init-addr ${prefill_master_ip}:5000 \
--nnodes 2 \
--node-rank 0 \
--tp-size 16 \
--dp-size 8 \
--enable-dp-attention \
--moe-a2a-backend deepep \
--mem-fraction-static 0.8
# prefill 1
$ python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3-0324 --disaggregation-ib-device ${device_name} --disaggregation-mode prefill --host ${local_ip} --port 30000 --trust-remote-code --dist-init-addr ${prefill_master_ip}:5000 --nnodes 2 --node-rank 1 --tp-size 16 --dp-size 8 --enable-dp-attention --moe-a2a-backend deepep --mem-fraction-static 0.8
python -m sglang.launch_server \
--model-path deepseek-ai/DeepSeek-V3-0324 \
--disaggregation-ib-device ${device_name} \
--disaggregation-mode prefill \
--host ${local_ip} \
--port 30000 \
--trust-remote-code \
--dist-init-addr ${prefill_master_ip}:5000 \
--nnodes 2 \
--node-rank 1 \
--tp-size 16 \
--dp-size 8 \
--enable-dp-attention \
--moe-a2a-backend deepep \
--mem-fraction-static 0.8
# decode 0
$ python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3-0324 --disaggregation-ib-device ${device_name} --disaggregation-mode decode --host ${local_ip} --port 30001 --trust-remote-code --dist-init-addr ${decode_master_ip}:5000 --nnodes 2 --node-rank 0 --tp-size 16 --dp-size 8 --enable-dp-attention --moe-a2a-backend deepep --mem-fraction-static 0.8 --max-running-requests 128
python -m sglang.launch_server \
--model-path deepseek-ai/DeepSeek-V3-0324 \
--disaggregation-ib-device ${device_name} \
--disaggregation-mode decode \
--host ${local_ip} \
--port 30001 \
--trust-remote-code \
--dist-init-addr ${decode_master_ip}:5000 \
--nnodes 2 \
--node-rank 0 \
--tp-size 16 \
--dp-size 8 \
--enable-dp-attention \
--moe-a2a-backend deepep \
--mem-fraction-static 0.8 \
--max-running-requests 128
# decode 1
$ python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3-0324 --disaggregation-ib-device ${device_name} --disaggregation-mode decode --host ${local_ip} --port 30001 --trust-remote-code --dist-init-addr ${decode_master_ip}:5000 --nnodes 2 --node-rank 1 --tp-size 16 --dp-size 8 --enable-dp-attention --moe-a2a-backend deepep --mem-fraction-static 0.8 --max-running-requests 128
python -m sglang.launch_server \
--model-path deepseek-ai/DeepSeek-V3-0324 \
--disaggregation-ib-device ${device_name} \
--disaggregation-mode decode \
--host ${local_ip} \
--port 30001 \
--trust-remote-code \
--dist-init-addr ${decode_master_ip}:5000 \
--nnodes 2 \
--node-rank 1 \
--tp-size 16 \
--dp-size 8 \
--enable-dp-attention \
--moe-a2a-backend deepep \
--mem-fraction-static 0.8 \
--max-running-requests 128
```
### Advanced Configuration
@@ -98,22 +164,88 @@ pip install . --config-settings=setup-args="-Ducx_path=/path/to/ucx"
### Llama Single Node
```bash
$ python -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --disaggregation-mode prefill --disaggregation-transfer-backend nixl
$ python -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --disaggregation-mode decode --port 30001 --base-gpu-id 1 --disaggregation-transfer-backend nixl
$ python -m sglang_router.launch_router --pd-disaggregation --prefill http://127.0.0.1:30000 --decode http://127.0.0.1:30001 --host 0.0.0.0 --port 8000
python -m sglang.launch_server \
--model-path meta-llama/Llama-3.1-8B-Instruct \
--disaggregation-mode prefill \
--disaggregation-transfer-backend nixl
python -m sglang.launch_server \
--model-path meta-llama/Llama-3.1-8B-Instruct \
--disaggregation-mode decode \
--port 30001 \
--base-gpu-id 1 \
--disaggregation-transfer-backend nixl
python -m sglang_router.launch_router --pd-disaggregation --prefill http://127.0.0.1:30000 --decode http://127.0.0.1:30001 --host 0.0.0.0 --port 8000
```
### DeepSeek Multi-Node
```bash
# prefill 0
$ python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3-0324 --disaggregation-transfer-backend nixl --disaggregation-mode prefill --host ${local_ip} --port 30000 --trust-remote-code --dist-init-addr ${prefill_master_ip}:5000 --nnodes 2 --node-rank 0 --tp-size 16 --dp-size 8 --enable-dp-attention --moe-a2a-backend deepep --mem-fraction-static 0.8
python -m sglang.launch_server \
--model-path deepseek-ai/DeepSeek-V3-0324 \
--disaggregation-transfer-backend nixl \
--disaggregation-mode prefill \
--host ${local_ip} \
--port 30000 \
--trust-remote-code \
--dist-init-addr ${prefill_master_ip}:5000 \
--nnodes 2 \
--node-rank 0 \
--tp-size 16 \
--dp-size 8 \
--enable-dp-attention \
--moe-a2a-backend deepep \
--mem-fraction-static 0.8
# prefill 1
$ python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3-0324 --disaggregation-transfer-backend nixl --disaggregation-mode prefill --host ${local_ip} --port 30000 --trust-remote-code --dist-init-addr ${prefill_master_ip}:5000 --nnodes 2 --node-rank 1 --tp-size 16 --dp-size 8 --enable-dp-attention --moe-a2a-backend deepep --mem-fraction-static 0.8
python -m sglang.launch_server \
--model-path deepseek-ai/DeepSeek-V3-0324 \
--disaggregation-transfer-backend nixl \
--disaggregation-mode prefill \
--host ${local_ip} \
--port 30000 \
--trust-remote-code \
--dist-init-addr ${prefill_master_ip}:5000 \
--nnodes 2 \
--node-rank 1 \
--tp-size 16 \
--dp-size 8 \
--enable-dp-attention \
--moe-a2a-backend deepep \
--mem-fraction-static 0.8
# decode 0
$ python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3-0324 --disaggregation-transfer-backend nixl --disaggregation-mode decode --host ${local_ip} --port 30001 --trust-remote-code --dist-init-addr ${decode_master_ip}:5000 --nnodes 2 --node-rank 0 --tp-size 16 --dp-size 8 --enable-dp-attention --moe-a2a-backend deepep --mem-fraction-static 0.8 --max-running-requests 128
python -m sglang.launch_server \
--model-path deepseek-ai/DeepSeek-V3-0324 \
--disaggregation-transfer-backend nixl \
--disaggregation-mode decode \
--host ${local_ip} \
--port 30001 \
--trust-remote-code \
--dist-init-addr ${decode_master_ip}:5000 \
--nnodes 2 \
--node-rank 0 \
--tp-size 16 \
--dp-size 8 \
--enable-dp-attention \
--moe-a2a-backend deepep \
--mem-fraction-static 0.8 \
--max-running-requests 128
# decode 1
$ python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3-0324 --disaggregation-transfer-backend nixl --disaggregation-mode decode --host ${local_ip} --port 30001 --trust-remote-code --dist-init-addr ${decode_master_ip}:5000 --nnodes 2 --node-rank 1 --tp-size 16 --dp-size 8 --enable-dp-attention --moe-a2a-backend deepep --mem-fraction-static 0.8 --max-running-requests 128
python -m sglang.launch_server \
--model-path deepseek-ai/DeepSeek-V3-0324 \
--disaggregation-transfer-backend nixl \
--disaggregation-mode decode \
--host ${local_ip} \
--port 30001 \
--trust-remote-code \
--dist-init-addr ${decode_master_ip}:5000 \
--nnodes 2 \
--node-rank 1 \
--tp-size 16 \
--dp-size 8 \
--enable-dp-attention \
--moe-a2a-backend deepep \
--mem-fraction-static 0.8 \
--max-running-requests 128
```
## ASCEND
@@ -135,16 +267,44 @@ export ENABLE_ASCEND_TRANSFER_WITH_MOONCAKE=true
### Llama Single Node
```bash
$ python -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --disaggregation-mode prefill --disaggregation-transfer-backend ascend
$ python -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --disaggregation-mode decode --port 30001 --base-gpu-id 1 --disaggregation-transfer-backend ascend
$ python -m sglang_router.launch_router --pd-disaggregation --prefill http://127.0.0.1:30000 --decode http://127.0.0.1:30001 --host 0.0.0.0 --port 8000
python -m sglang.launch_server \
--model-path meta-llama/Llama-3.1-8B-Instruct \
--disaggregation-mode prefill \
--disaggregation-transfer-backend ascend
python -m sglang.launch_server \
--model-path meta-llama/Llama-3.1-8B-Instruct \
--disaggregation-mode decode \
--port 30001 \
--base-gpu-id 1 \
--disaggregation-transfer-backend ascend
python -m sglang_router.launch_router --pd-disaggregation --prefill http://127.0.0.1:30000 --decode http://127.0.0.1:30001 --host 0.0.0.0 --port 8000
```
### DeepSeek Multi-Node
```bash
# prefill 0
$ python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3-0324 --disaggregation-transfer-backend ascend --disaggregation-mode prefill --host ${local_ip} --port 30000 --trust-remote-code --dist-init-addr ${prefill_master_ip}:5000 --nnodes 1 --node-rank 0 --tp-size 16
python -m sglang.launch_server \
--model-path deepseek-ai/DeepSeek-V3-0324 \
--disaggregation-transfer-backend ascend \
--disaggregation-mode prefill \
--host ${local_ip} \
--port 30000 \
--trust-remote-code \
--dist-init-addr ${prefill_master_ip}:5000 \
--nnodes 1 \
--node-rank 0 \
--tp-size 16
# decode 0
$ python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3-0324 --disaggregation-transfer-backend ascend --disaggregation-mode decode --host ${local_ip} --port 30001 --trust-remote-code --dist-init-addr ${decode_master_ip}:5000 --nnodes 1 --node-rank 0 --tp-size 16
python -m sglang.launch_server \
--model-path deepseek-ai/DeepSeek-V3-0324 \
--disaggregation-transfer-backend ascend \
--disaggregation-mode decode \
--host ${local_ip} \
--port 30001 \
--trust-remote-code \
--dist-init-addr ${decode_master_ip}:5000 \
--nnodes 1 \
--node-rank 0 \
--tp-size 16
```