Fix formatting in long code blocks (#10528)
This commit is contained in:
committed by
GitHub
parent
0abb41c70d
commit
7f028b07c4
@@ -34,22 +34,88 @@ uv pip install mooncake-transfer-engine
|
||||
### Llama Single Node
|
||||
|
||||
```bash
|
||||
$ python -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --disaggregation-mode prefill --disaggregation-ib-device mlx5_roce0
|
||||
$ python -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --disaggregation-mode decode --port 30001 --base-gpu-id 1 --disaggregation-ib-device mlx5_roce0
|
||||
$ python -m sglang_router.launch_router --pd-disaggregation --prefill http://127.0.0.1:30000 --decode http://127.0.0.1:30001 --host 0.0.0.0 --port 8000
|
||||
python -m sglang.launch_server \
|
||||
--model-path meta-llama/Llama-3.1-8B-Instruct \
|
||||
--disaggregation-mode prefill \
|
||||
--disaggregation-ib-device mlx5_roce0
|
||||
python -m sglang.launch_server \
|
||||
--model-path meta-llama/Llama-3.1-8B-Instruct \
|
||||
--disaggregation-mode decode \
|
||||
--port 30001 \
|
||||
--base-gpu-id 1 \
|
||||
--disaggregation-ib-device mlx5_roce0
|
||||
python -m sglang_router.launch_router --pd-disaggregation --prefill http://127.0.0.1:30000 --decode http://127.0.0.1:30001 --host 0.0.0.0 --port 8000
|
||||
```
|
||||
|
||||
### DeepSeek Multi-Node
|
||||
|
||||
```bash
|
||||
# prefill 0
|
||||
$ python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3-0324 --disaggregation-ib-device ${device_name} --disaggregation-mode prefill --host ${local_ip} --port 30000 --trust-remote-code --dist-init-addr ${prefill_master_ip}:5000 --nnodes 2 --node-rank 0 --tp-size 16 --dp-size 8 --enable-dp-attention --moe-a2a-backend deepep --mem-fraction-static 0.8
|
||||
python -m sglang.launch_server \
|
||||
--model-path deepseek-ai/DeepSeek-V3-0324 \
|
||||
--disaggregation-ib-device ${device_name} \
|
||||
--disaggregation-mode prefill \
|
||||
--host ${local_ip} \
|
||||
--port 30000 \
|
||||
--trust-remote-code \
|
||||
--dist-init-addr ${prefill_master_ip}:5000 \
|
||||
--nnodes 2 \
|
||||
--node-rank 0 \
|
||||
--tp-size 16 \
|
||||
--dp-size 8 \
|
||||
--enable-dp-attention \
|
||||
--moe-a2a-backend deepep \
|
||||
--mem-fraction-static 0.8
|
||||
# prefill 1
|
||||
$ python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3-0324 --disaggregation-ib-device ${device_name} --disaggregation-mode prefill --host ${local_ip} --port 30000 --trust-remote-code --dist-init-addr ${prefill_master_ip}:5000 --nnodes 2 --node-rank 1 --tp-size 16 --dp-size 8 --enable-dp-attention --moe-a2a-backend deepep --mem-fraction-static 0.8
|
||||
python -m sglang.launch_server \
|
||||
--model-path deepseek-ai/DeepSeek-V3-0324 \
|
||||
--disaggregation-ib-device ${device_name} \
|
||||
--disaggregation-mode prefill \
|
||||
--host ${local_ip} \
|
||||
--port 30000 \
|
||||
--trust-remote-code \
|
||||
--dist-init-addr ${prefill_master_ip}:5000 \
|
||||
--nnodes 2 \
|
||||
--node-rank 1 \
|
||||
--tp-size 16 \
|
||||
--dp-size 8 \
|
||||
--enable-dp-attention \
|
||||
--moe-a2a-backend deepep \
|
||||
--mem-fraction-static 0.8
|
||||
# decode 0
|
||||
$ python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3-0324 --disaggregation-ib-device ${device_name} --disaggregation-mode decode --host ${local_ip} --port 30001 --trust-remote-code --dist-init-addr ${decode_master_ip}:5000 --nnodes 2 --node-rank 0 --tp-size 16 --dp-size 8 --enable-dp-attention --moe-a2a-backend deepep --mem-fraction-static 0.8 --max-running-requests 128
|
||||
python -m sglang.launch_server \
|
||||
--model-path deepseek-ai/DeepSeek-V3-0324 \
|
||||
--disaggregation-ib-device ${device_name} \
|
||||
--disaggregation-mode decode \
|
||||
--host ${local_ip} \
|
||||
--port 30001 \
|
||||
--trust-remote-code \
|
||||
--dist-init-addr ${decode_master_ip}:5000 \
|
||||
--nnodes 2 \
|
||||
--node-rank 0 \
|
||||
--tp-size 16 \
|
||||
--dp-size 8 \
|
||||
--enable-dp-attention \
|
||||
--moe-a2a-backend deepep \
|
||||
--mem-fraction-static 0.8 \
|
||||
--max-running-requests 128
|
||||
# decode 1
|
||||
$ python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3-0324 --disaggregation-ib-device ${device_name} --disaggregation-mode decode --host ${local_ip} --port 30001 --trust-remote-code --dist-init-addr ${decode_master_ip}:5000 --nnodes 2 --node-rank 1 --tp-size 16 --dp-size 8 --enable-dp-attention --moe-a2a-backend deepep --mem-fraction-static 0.8 --max-running-requests 128
|
||||
python -m sglang.launch_server \
|
||||
--model-path deepseek-ai/DeepSeek-V3-0324 \
|
||||
--disaggregation-ib-device ${device_name} \
|
||||
--disaggregation-mode decode \
|
||||
--host ${local_ip} \
|
||||
--port 30001 \
|
||||
--trust-remote-code \
|
||||
--dist-init-addr ${decode_master_ip}:5000 \
|
||||
--nnodes 2 \
|
||||
--node-rank 1 \
|
||||
--tp-size 16 \
|
||||
--dp-size 8 \
|
||||
--enable-dp-attention \
|
||||
--moe-a2a-backend deepep \
|
||||
--mem-fraction-static 0.8 \
|
||||
--max-running-requests 128
|
||||
```
|
||||
### Advanced Configuration
|
||||
|
||||
@@ -98,22 +164,88 @@ pip install . --config-settings=setup-args="-Ducx_path=/path/to/ucx"
|
||||
### Llama Single Node
|
||||
|
||||
```bash
|
||||
$ python -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --disaggregation-mode prefill --disaggregation-transfer-backend nixl
|
||||
$ python -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --disaggregation-mode decode --port 30001 --base-gpu-id 1 --disaggregation-transfer-backend nixl
|
||||
$ python -m sglang_router.launch_router --pd-disaggregation --prefill http://127.0.0.1:30000 --decode http://127.0.0.1:30001 --host 0.0.0.0 --port 8000
|
||||
python -m sglang.launch_server \
|
||||
--model-path meta-llama/Llama-3.1-8B-Instruct \
|
||||
--disaggregation-mode prefill \
|
||||
--disaggregation-transfer-backend nixl
|
||||
python -m sglang.launch_server \
|
||||
--model-path meta-llama/Llama-3.1-8B-Instruct \
|
||||
--disaggregation-mode decode \
|
||||
--port 30001 \
|
||||
--base-gpu-id 1 \
|
||||
--disaggregation-transfer-backend nixl
|
||||
python -m sglang_router.launch_router --pd-disaggregation --prefill http://127.0.0.1:30000 --decode http://127.0.0.1:30001 --host 0.0.0.0 --port 8000
|
||||
```
|
||||
|
||||
### DeepSeek Multi-Node
|
||||
|
||||
```bash
|
||||
# prefill 0
|
||||
$ python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3-0324 --disaggregation-transfer-backend nixl --disaggregation-mode prefill --host ${local_ip} --port 30000 --trust-remote-code --dist-init-addr ${prefill_master_ip}:5000 --nnodes 2 --node-rank 0 --tp-size 16 --dp-size 8 --enable-dp-attention --moe-a2a-backend deepep --mem-fraction-static 0.8
|
||||
python -m sglang.launch_server \
|
||||
--model-path deepseek-ai/DeepSeek-V3-0324 \
|
||||
--disaggregation-transfer-backend nixl \
|
||||
--disaggregation-mode prefill \
|
||||
--host ${local_ip} \
|
||||
--port 30000 \
|
||||
--trust-remote-code \
|
||||
--dist-init-addr ${prefill_master_ip}:5000 \
|
||||
--nnodes 2 \
|
||||
--node-rank 0 \
|
||||
--tp-size 16 \
|
||||
--dp-size 8 \
|
||||
--enable-dp-attention \
|
||||
--moe-a2a-backend deepep \
|
||||
--mem-fraction-static 0.8
|
||||
# prefill 1
|
||||
$ python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3-0324 --disaggregation-transfer-backend nixl --disaggregation-mode prefill --host ${local_ip} --port 30000 --trust-remote-code --dist-init-addr ${prefill_master_ip}:5000 --nnodes 2 --node-rank 1 --tp-size 16 --dp-size 8 --enable-dp-attention --moe-a2a-backend deepep --mem-fraction-static 0.8
|
||||
python -m sglang.launch_server \
|
||||
--model-path deepseek-ai/DeepSeek-V3-0324 \
|
||||
--disaggregation-transfer-backend nixl \
|
||||
--disaggregation-mode prefill \
|
||||
--host ${local_ip} \
|
||||
--port 30000 \
|
||||
--trust-remote-code \
|
||||
--dist-init-addr ${prefill_master_ip}:5000 \
|
||||
--nnodes 2 \
|
||||
--node-rank 1 \
|
||||
--tp-size 16 \
|
||||
--dp-size 8 \
|
||||
--enable-dp-attention \
|
||||
--moe-a2a-backend deepep \
|
||||
--mem-fraction-static 0.8
|
||||
# decode 0
|
||||
$ python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3-0324 --disaggregation-transfer-backend nixl --disaggregation-mode decode --host ${local_ip} --port 30001 --trust-remote-code --dist-init-addr ${decode_master_ip}:5000 --nnodes 2 --node-rank 0 --tp-size 16 --dp-size 8 --enable-dp-attention --moe-a2a-backend deepep --mem-fraction-static 0.8 --max-running-requests 128
|
||||
python -m sglang.launch_server \
|
||||
--model-path deepseek-ai/DeepSeek-V3-0324 \
|
||||
--disaggregation-transfer-backend nixl \
|
||||
--disaggregation-mode decode \
|
||||
--host ${local_ip} \
|
||||
--port 30001 \
|
||||
--trust-remote-code \
|
||||
--dist-init-addr ${decode_master_ip}:5000 \
|
||||
--nnodes 2 \
|
||||
--node-rank 0 \
|
||||
--tp-size 16 \
|
||||
--dp-size 8 \
|
||||
--enable-dp-attention \
|
||||
--moe-a2a-backend deepep \
|
||||
--mem-fraction-static 0.8 \
|
||||
--max-running-requests 128
|
||||
# decode 1
|
||||
$ python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3-0324 --disaggregation-transfer-backend nixl --disaggregation-mode decode --host ${local_ip} --port 30001 --trust-remote-code --dist-init-addr ${decode_master_ip}:5000 --nnodes 2 --node-rank 1 --tp-size 16 --dp-size 8 --enable-dp-attention --moe-a2a-backend deepep --mem-fraction-static 0.8 --max-running-requests 128
|
||||
python -m sglang.launch_server \
|
||||
--model-path deepseek-ai/DeepSeek-V3-0324 \
|
||||
--disaggregation-transfer-backend nixl \
|
||||
--disaggregation-mode decode \
|
||||
--host ${local_ip} \
|
||||
--port 30001 \
|
||||
--trust-remote-code \
|
||||
--dist-init-addr ${decode_master_ip}:5000 \
|
||||
--nnodes 2 \
|
||||
--node-rank 1 \
|
||||
--tp-size 16 \
|
||||
--dp-size 8 \
|
||||
--enable-dp-attention \
|
||||
--moe-a2a-backend deepep \
|
||||
--mem-fraction-static 0.8 \
|
||||
--max-running-requests 128
|
||||
```
|
||||
|
||||
## ASCEND
|
||||
@@ -135,16 +267,44 @@ export ENABLE_ASCEND_TRANSFER_WITH_MOONCAKE=true
|
||||
### Llama Single Node
|
||||
|
||||
```bash
|
||||
$ python -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --disaggregation-mode prefill --disaggregation-transfer-backend ascend
|
||||
$ python -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --disaggregation-mode decode --port 30001 --base-gpu-id 1 --disaggregation-transfer-backend ascend
|
||||
$ python -m sglang_router.launch_router --pd-disaggregation --prefill http://127.0.0.1:30000 --decode http://127.0.0.1:30001 --host 0.0.0.0 --port 8000
|
||||
python -m sglang.launch_server \
|
||||
--model-path meta-llama/Llama-3.1-8B-Instruct \
|
||||
--disaggregation-mode prefill \
|
||||
--disaggregation-transfer-backend ascend
|
||||
python -m sglang.launch_server \
|
||||
--model-path meta-llama/Llama-3.1-8B-Instruct \
|
||||
--disaggregation-mode decode \
|
||||
--port 30001 \
|
||||
--base-gpu-id 1 \
|
||||
--disaggregation-transfer-backend ascend
|
||||
python -m sglang_router.launch_router --pd-disaggregation --prefill http://127.0.0.1:30000 --decode http://127.0.0.1:30001 --host 0.0.0.0 --port 8000
|
||||
```
|
||||
|
||||
### DeepSeek Multi-Node
|
||||
|
||||
```bash
|
||||
# prefill 0
|
||||
$ python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3-0324 --disaggregation-transfer-backend ascend --disaggregation-mode prefill --host ${local_ip} --port 30000 --trust-remote-code --dist-init-addr ${prefill_master_ip}:5000 --nnodes 1 --node-rank 0 --tp-size 16
|
||||
python -m sglang.launch_server \
|
||||
--model-path deepseek-ai/DeepSeek-V3-0324 \
|
||||
--disaggregation-transfer-backend ascend \
|
||||
--disaggregation-mode prefill \
|
||||
--host ${local_ip} \
|
||||
--port 30000 \
|
||||
--trust-remote-code \
|
||||
--dist-init-addr ${prefill_master_ip}:5000 \
|
||||
--nnodes 1 \
|
||||
--node-rank 0 \
|
||||
--tp-size 16
|
||||
# decode 0
|
||||
$ python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3-0324 --disaggregation-transfer-backend ascend --disaggregation-mode decode --host ${local_ip} --port 30001 --trust-remote-code --dist-init-addr ${decode_master_ip}:5000 --nnodes 1 --node-rank 0 --tp-size 16
|
||||
python -m sglang.launch_server \
|
||||
--model-path deepseek-ai/DeepSeek-V3-0324 \
|
||||
--disaggregation-transfer-backend ascend \
|
||||
--disaggregation-mode decode \
|
||||
--host ${local_ip} \
|
||||
--port 30001 \
|
||||
--trust-remote-code \
|
||||
--dist-init-addr ${decode_master_ip}:5000 \
|
||||
--nnodes 1 \
|
||||
--node-rank 0 \
|
||||
--tp-size 16
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user