[Doc] Modify DeepSeek-R1/V3.1 documentation (#5426)

### What this PR does / why we need it? Modify DeepSeek-R1/V3.1 documentation. Mainly update the mtp size and some other configs. Signed-off-by: GDzhu01 <809721801@qq.com>
2025-12-27 17:13:58 +08:00
parent 09f71c14a6
commit 04104031d0
2 changed files with 15 additions and 8 deletions
--- a/docs/source/tutorials/DeepSeek-V3.1.md
+++ b/docs/source/tutorials/DeepSeek-V3.1.md
@@ -166,8 +166,7 @@ export GLOO_SOCKET_IFNAME=$nic_name
 export TP_SOCKET_IFNAME=$nic_name
 export HCCL_SOCKET_IFNAME=$nic_name
 export OMP_PROC_BIND=false
-export OMP_NUM_THREADS=10
-export VLLM_USE_V1=1
+export OMP_NUM_THREADS=1
 export HCCL_BUFFSIZE=200
 export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
 export VLLM_ASCEND_ENABLE_MLAPO=1
@@ -220,7 +219,7 @@ export GLOO_SOCKET_IFNAME=$nic_name
 export TP_SOCKET_IFNAME=$nic_name
 export HCCL_SOCKET_IFNAME=$nic_name
 export OMP_PROC_BIND=false
-export OMP_NUM_THREADS=10
+export OMP_NUM_THREADS=1
 export HCCL_BUFFSIZE=200
 export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
 export VLLM_ASCEND_ENABLE_MLAPO=1
@@ -249,7 +248,7 @@ vllm serve /weights/DeepSeek-V3.1-w8a8-mtp-QuaRot \
 --trust-remote-code \
 --no-enable-prefix-caching \
 --gpu-memory-utilization 0.92 \
--speculative-config '{"num_speculative_tokens": 1, "method": "mtp"}' \
+--speculative-config '{"num_speculative_tokens": 3, "method": "mtp"}' \
 --compilation-config '{"cudagraph_capture_sizes":[4,16,32,48,64], "cudagraph_mode": "FULL_DECODE_ONLY"}'
 ```