diff --git a/docs/source/tutorials/models/DeepSeek-V3.2.md b/docs/source/tutorials/models/DeepSeek-V3.2.md index dd20aa84..92aaedec 100644 --- a/docs/source/tutorials/models/DeepSeek-V3.2.md +++ b/docs/source/tutorials/models/DeepSeek-V3.2.md @@ -297,7 +297,7 @@ export VLLM_USE_V1=1 export HCCL_BUFFSIZE=200 export VLLM_ASCEND_ENABLE_MLAPO=1 export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True -export VLLM_ASCEND_ENABLE_FLASHCOMM1=0 +export VLLM_ASCEND_ENABLE_FLASHCOMM1=1 export HCCL_CONNECT_TIMEOUT=120 export HCCL_INTRA_PCIE_ENABLE=1 export HCCL_INTRA_ROCE_ENABLE=0 @@ -350,7 +350,7 @@ export VLLM_USE_V1=1 export HCCL_BUFFSIZE=200 export VLLM_ASCEND_ENABLE_MLAPO=1 export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True -export VLLM_ASCEND_ENABLE_FLASHCOMM1=0 +export VLLM_ASCEND_ENABLE_FLASHCOMM1=1 export HCCL_CONNECT_TIMEOUT=120 export HCCL_INTRA_PCIE_ENABLE=1 export HCCL_INTRA_ROCE_ENABLE=0 @@ -830,6 +830,37 @@ python launch_online_dp.py --dp-size 8 --tp-size 4 --dp-size-local 4 --dp-rank-s python launch_online_dp.py --dp-size 8 --tp-size 4 --dp-size-local 4 --dp-rank-start 4 --dp-address 141.61.39.117 --dp-rpc-port 12777 --vllm-start-port 9100 ``` +### Request Forwarding + +To set up request forwarding, run the following script on any machine :download:`load_balance_proxy_server_example.py ` + +```shell +unset http_proxy +unset https_proxy + +python load_balance_proxy_server_example.py \ + --port 8000 \ + --host 0.0.0.0 \ + --prefiller-hosts \ + 141.61.39.105 \ + 141.61.39.113 \ + --prefiller-ports \ + 9100 \ + 9100 \ + --decoder-hosts \ + 141.61.39.117 \ + 141.61.39.117 \ + 141.61.39.117 \ + 141.61.39.117 \ + 141.61.39.181 \ + 141.61.39.181 \ + 141.61.39.181 \ + 141.61.39.181 \ + --decoder-ports \ + 9100 9101 9102 9103 \ + 9100 9101 9102 9103 \ +``` + ## Functional Verification Once your server is started, you can query the model with input prompts: