diff --git a/docs/source/tutorials/DeepSeek-R1.md b/docs/source/tutorials/DeepSeek-R1.md
index cda22a70..ecc42c35 100644
--- a/docs/source/tutorials/DeepSeek-R1.md
+++ b/docs/source/tutorials/DeepSeek-R1.md
@@ -88,12 +88,14 @@ local_ip="xxxx"
 
 # AIV
 export HCCL_OP_EXPANSION_MODE="AIV"
+
 export HCCL_IF_IP=$local_ip
 export GLOO_SOCKET_IFNAME=$nic_name
 export TP_SOCKET_IFNAME=$nic_name
 export HCCL_SOCKET_IFNAME=$nic_name
 export VLLM_ASCEND_ENABLE_MLAPO=1
 export VLLM_ASCEND_BALANCE_SCHEDULING=1
+export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
 export VLLM_USE_MODELSCOPE=True
 
 vllm serve vllm-ascend/DeepSeek-R1-W8A8 \
@@ -143,6 +145,9 @@ export HCCL_IF_IP=$local_ip
 export GLOO_SOCKET_IFNAME=$nic_name
 export TP_SOCKET_IFNAME=$nic_name
 export HCCL_SOCKET_IFNAME=$nic_name
+export OMP_PROC_BIND=false
+export OMP_NUM_THREADS=1
+export HCCL_BUFFSIZE=200
 export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
 export VLLM_ASCEND_ENABLE_MLAPO=1
 export VLLM_ASCEND_BALANCE_SCHEDULING=1
@@ -167,8 +172,8 @@ vllm serve vllm-ascend/DeepSeek-R1-W8A8 \
   --max-model-len 16384 \
   --max-num-batched-tokens 4096 \
   --trust-remote-code \
-  --gpu-memory-utilization 0.94 \
-  --speculative-config '{"num_speculative_tokens":1,"method":"mtp"}' \
+  --gpu-memory-utilization 0.92 \
+  --speculative-config '{"num_speculative_tokens":3,"method":"mtp"}' \
   --compilation-config '{"cudagraph_capture_sizes":[4,16,32,48,64], "cudagraph_mode": "FULL_DECODE_ONLY"}'
 ```
 
@@ -187,6 +192,9 @@ export HCCL_IF_IP=$local_ip
 export GLOO_SOCKET_IFNAME=$nic_name
 export TP_SOCKET_IFNAME=$nic_name
 export HCCL_SOCKET_IFNAME=$nic_name
+export OMP_PROC_BIND=false
+export OMP_NUM_THREADS=1
+export HCCL_BUFFSIZE=200
 export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
 export VLLM_ASCEND_ENABLE_MLAPO=1
 export VLLM_ASCEND_BALANCE_SCHEDULING=1
@@ -213,8 +221,8 @@ vllm serve vllm-ascend/DeepSeek-R1-W8A8 \
   --max-model-len 16384 \
   --max-num-batched-tokens 4096 \
   --trust-remote-code \
-  --gpu-memory-utilization 0.94 \
-  --speculative-config '{"num_speculative_tokens":1,"method":"mtp"}' \
+  --gpu-memory-utilization 0.92 \
+  --speculative-config '{"num_speculative_tokens":3,"method":"mtp"}' \
   --compilation-config '{"cudagraph_capture_sizes":[4,16,32,48,64], "cudagraph_mode": "FULL_DECODE_ONLY"}'
 ```
 
diff --git a/docs/source/tutorials/DeepSeek-V3.1.md b/docs/source/tutorials/DeepSeek-V3.1.md
index a65a0d99..6e1c3490 100644
--- a/docs/source/tutorials/DeepSeek-V3.1.md
+++ b/docs/source/tutorials/DeepSeek-V3.1.md
@@ -166,8 +166,7 @@ export GLOO_SOCKET_IFNAME=$nic_name
 export TP_SOCKET_IFNAME=$nic_name
 export HCCL_SOCKET_IFNAME=$nic_name
 export OMP_PROC_BIND=false
-export OMP_NUM_THREADS=10
-export VLLM_USE_V1=1
+export OMP_NUM_THREADS=1
 export HCCL_BUFFSIZE=200
 export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
 export VLLM_ASCEND_ENABLE_MLAPO=1
@@ -220,7 +219,7 @@ export GLOO_SOCKET_IFNAME=$nic_name
 export TP_SOCKET_IFNAME=$nic_name
 export HCCL_SOCKET_IFNAME=$nic_name
 export OMP_PROC_BIND=false
-export OMP_NUM_THREADS=10
+export OMP_NUM_THREADS=1
 export HCCL_BUFFSIZE=200
 export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
 export VLLM_ASCEND_ENABLE_MLAPO=1
@@ -249,7 +248,7 @@ vllm serve /weights/DeepSeek-V3.1-w8a8-mtp-QuaRot \
 --trust-remote-code \
 --no-enable-prefix-caching \
 --gpu-memory-utilization 0.92 \
---speculative-config '{"num_speculative_tokens": 1, "method": "mtp"}' \
+--speculative-config '{"num_speculative_tokens": 3, "method": "mtp"}' \
 --compilation-config '{"cudagraph_capture_sizes":[4,16,32,48,64], "cudagraph_mode": "FULL_DECODE_ONLY"}'
 ```