From 8a2681e26a1a7993f12a46fb19f7ee238117df83 Mon Sep 17 00:00:00 2001 From: Ke Bao Date: Sat, 28 Dec 2024 13:39:56 +0800 Subject: [PATCH] Update readme (#2625) --- benchmark/deepseek_v3/README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/benchmark/deepseek_v3/README.md b/benchmark/deepseek_v3/README.md index f019a8f9f..98328b592 100644 --- a/benchmark/deepseek_v3/README.md +++ b/benchmark/deepseek_v3/README.md @@ -18,8 +18,9 @@ If you see errors when launching the server, please check if it has finished dow ### Using Docker (Recommended) ```bash docker run --gpus all --shm-size 32g -p 30000:30000 -v ~/.cache/huggingface:/root/.cache/huggingface --ipc=host lmsysorg/sglang:latest \ - python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3 --enable-dp-attention --tp 8 --trust-remote-code --port 30000 + python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3 --tp 8 --trust-remote-code --port 30000 ``` +For large QPS scenarios, you can add the `--enable-dp-attention` argument to improve throughput. ### Using pip ```bash @@ -27,7 +28,7 @@ docker run --gpus all --shm-size 32g -p 30000:30000 -v ~/.cache/huggingface:/roo pip install "sglang[all]==0.4.1.post1" --find-links https://flashinfer.ai/whl/cu124/torch2.4/flashinfer # Launch -python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3 --enable-dp-attention --tp 8 --trust-remote-code +python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3 --tp 8 --trust-remote-code ``` ### Example with OpenAI API