[v0.11.0][P/D]Set adxl as default backend and update readme (#3771)
### What this PR does / why we need it? Set adxl engine as the default Mooncake backend, because Ascend Transport is no longer maintained. Update README to include instructions for installing the adxl backend Mooncake. ### Does this PR introduce _any_ user-facing change? Users need to compile and install the mooncake backend for adxl according to the revised README instructions. ### How was this patch tested? By CI. --------- Signed-off-by: nwpu-zxr <zhouxuerong2@huawei.com>
This commit is contained in:
@@ -57,7 +57,7 @@ for i in {0..15}; do hccn_tool -i $i -ping -g address x.x.x.x;done
|
|||||||
Mooncake is the serving platform for Kimi, a leading LLM service provided by Moonshot AI. First, we need to obtain the Mooncake project. Refer to the following command:
|
Mooncake is the serving platform for Kimi, a leading LLM service provided by Moonshot AI. First, we need to obtain the Mooncake project. Refer to the following command:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
git clone -b pooling_async_memecpy_v1 https://github.com/AscendTransport/Mooncake
|
git clone https://github.com/kvcache-ai/Mooncake.git
|
||||||
```
|
```
|
||||||
|
|
||||||
Update and install Python.
|
Update and install Python.
|
||||||
@@ -67,22 +67,25 @@ apt-get update
|
|||||||
apt-get install python3
|
apt-get install python3
|
||||||
```
|
```
|
||||||
|
|
||||||
Install the relevant dependencies. The installation of Go is not required.
|
Modify Mooncake compilation option
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
cd Mooncake
|
cd Mooncake
|
||||||
bash dependencies.sh -y
|
vi mooncake-common/common.cmake
|
||||||
|
# find this row and set USE_ASCEND_DIRECT ON.
|
||||||
|
option(USE_ASCEND_DIRECT "option for using ascend npu with adxl engine" ON)
|
||||||
```
|
```
|
||||||
|
|
||||||
Install mpi
|
Install mpi
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
apt purge mpich libmpich-dev -y
|
apt-get install mpich libmpich-dev -y
|
||||||
apt purge openmpi-bin -y
|
```
|
||||||
apt purge openmpi-bin libopenmpi-dev -y
|
|
||||||
apt install mpich libmpich-dev -y
|
Install the relevant dependencies. The installation of Go is not required.
|
||||||
export CPATH=/usr/lib/aarch64-linux-gnu/mpich/include/:$CPATH
|
|
||||||
export CPATH=/usr/lib/aarch64-linux-gnu/openmpi/lib:$CPATH
|
```shell
|
||||||
|
bash dependencies.sh -y
|
||||||
```
|
```
|
||||||
|
|
||||||
Compile and install
|
Compile and install
|
||||||
@@ -93,8 +96,6 @@ cd build
|
|||||||
cmake ..
|
cmake ..
|
||||||
make -j
|
make -j
|
||||||
make install
|
make install
|
||||||
cp mooncake-transfer-engine/src/transport/ascend_transport/hccl_transport/ascend_transport_c/libascend_transport_mem.so /usr/local/Ascend/ascend-toolkit/latest/python/site-packages/
|
|
||||||
cp mooncake-transfer-engine/src/libtransfer_engine.so /usr/local/Ascend/ascend-toolkit/latest/python/site-packages/
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Prefiller/Decoder Deployment
|
## Prefiller/Decoder Deployment
|
||||||
@@ -119,10 +120,6 @@ export VLLM_USE_V1=1
|
|||||||
export HCCL_BUFFSIZE=1024
|
export HCCL_BUFFSIZE=1024
|
||||||
export OMP_PROC_BIND=false
|
export OMP_PROC_BIND=false
|
||||||
export OMP_NUM_THREADS=10
|
export OMP_NUM_THREADS=10
|
||||||
export ASCEND_AGGREGATE_ENABLE=1 # enable aggregated transmission
|
|
||||||
export ASCEND_TRANSPORT_PRINT=0 # print ascend transport logs
|
|
||||||
export ACL_OP_INIT_MODE=1 # acl op initialization mode to prevent device id acquisition failure
|
|
||||||
export ASCEND_A3_ENABLE=1 # enable hccs transmission for A3; set to 0 for A2
|
|
||||||
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
|
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
vllm serve /model/Qwen3-235B-A22B-W8A8 \
|
vllm serve /model/Qwen3-235B-A22B-W8A8 \
|
||||||
@@ -178,10 +175,6 @@ export VLLM_USE_V1=1
|
|||||||
export HCCL_BUFFSIZE=1024
|
export HCCL_BUFFSIZE=1024
|
||||||
export OMP_PROC_BIND=false
|
export OMP_PROC_BIND=false
|
||||||
export OMP_NUM_THREADS=10
|
export OMP_NUM_THREADS=10
|
||||||
export ASCEND_AGGREGATE_ENABLE=1
|
|
||||||
export ASCEND_TRANSPORT_PRINT=0
|
|
||||||
export ACL_OP_INIT_MODE=1
|
|
||||||
export ASCEND_A3_ENABLE=1
|
|
||||||
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
|
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
vllm serve /model/Qwen3-235B-A22B-W8A8 \
|
vllm serve /model/Qwen3-235B-A22B-W8A8 \
|
||||||
@@ -237,10 +230,6 @@ export VLLM_USE_V1=1
|
|||||||
export HCCL_BUFFSIZE=2048
|
export HCCL_BUFFSIZE=2048
|
||||||
export OMP_PROC_BIND=false
|
export OMP_PROC_BIND=false
|
||||||
export OMP_NUM_THREADS=10
|
export OMP_NUM_THREADS=10
|
||||||
export ASCEND_AGGREGATE_ENABLE=1
|
|
||||||
export ASCEND_TRANSPORT_PRINT=0
|
|
||||||
export ACL_OP_INIT_MODE=1
|
|
||||||
export ASCEND_A3_ENABLE=1
|
|
||||||
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
|
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
vllm serve /model/Qwen3-235B-A22B-W8A8 \
|
vllm serve /model/Qwen3-235B-A22B-W8A8 \
|
||||||
@@ -298,10 +287,6 @@ export VLLM_USE_V1=1
|
|||||||
export HCCL_BUFFSIZE=2048
|
export HCCL_BUFFSIZE=2048
|
||||||
export OMP_PROC_BIND=false
|
export OMP_PROC_BIND=false
|
||||||
export OMP_NUM_THREADS=10
|
export OMP_NUM_THREADS=10
|
||||||
export ASCEND_AGGREGATE_ENABLE=1
|
|
||||||
export ASCEND_TRANSPORT_PRINT=0
|
|
||||||
export ACL_OP_INIT_MODE=1
|
|
||||||
export ASCEND_A3_ENABLE=1
|
|
||||||
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
|
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
vllm serve /model/Qwen3-235B-A22B-W8A8 \
|
vllm serve /model/Qwen3-235B-A22B-W8A8 \
|
||||||
@@ -366,10 +351,6 @@ export VLLM_USE_V1=1
|
|||||||
export HCCL_BUFFSIZE=1024
|
export HCCL_BUFFSIZE=1024
|
||||||
export OMP_PROC_BIND=false
|
export OMP_PROC_BIND=false
|
||||||
export OMP_NUM_THREADS=10
|
export OMP_NUM_THREADS=10
|
||||||
export ASCEND_AGGREGATE_ENABLE=1
|
|
||||||
export ASCEND_TRANSPORT_PRINT=0
|
|
||||||
export ACL_OP_INIT_MODE=1
|
|
||||||
export ASCEND_A3_ENABLE=1
|
|
||||||
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
|
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
vllm serve /model/Qwen3-235B-A22B-W8A8 \
|
vllm serve /model/Qwen3-235B-A22B-W8A8 \
|
||||||
@@ -425,10 +406,6 @@ export VLLM_USE_V1=1
|
|||||||
export HCCL_BUFFSIZE=1024
|
export HCCL_BUFFSIZE=1024
|
||||||
export OMP_PROC_BIND=false
|
export OMP_PROC_BIND=false
|
||||||
export OMP_NUM_THREADS=10
|
export OMP_NUM_THREADS=10
|
||||||
export ASCEND_AGGREGATE_ENABLE=1
|
|
||||||
export ASCEND_TRANSPORT_PRINT=0
|
|
||||||
export ACL_OP_INIT_MODE=1
|
|
||||||
export ASCEND_A3_ENABLE=1
|
|
||||||
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
|
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
vllm serve /model/Qwen3-235B-A22B-W8A8 \
|
vllm serve /model/Qwen3-235B-A22B-W8A8 \
|
||||||
@@ -484,10 +461,6 @@ export VLLM_USE_V1=1
|
|||||||
export HCCL_BUFFSIZE=2048
|
export HCCL_BUFFSIZE=2048
|
||||||
export OMP_PROC_BIND=false
|
export OMP_PROC_BIND=false
|
||||||
export OMP_NUM_THREADS=10
|
export OMP_NUM_THREADS=10
|
||||||
export ASCEND_AGGREGATE_ENABLE=1
|
|
||||||
export ASCEND_TRANSPORT_PRINT=0
|
|
||||||
export ACL_OP_INIT_MODE=1
|
|
||||||
export ASCEND_A3_ENABLE=1
|
|
||||||
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
|
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
vllm serve /model/Qwen3-235B-A22B-W8A8 \
|
vllm serve /model/Qwen3-235B-A22B-W8A8 \
|
||||||
@@ -545,10 +518,6 @@ export VLLM_USE_V1=1
|
|||||||
export HCCL_BUFFSIZE=2048
|
export HCCL_BUFFSIZE=2048
|
||||||
export OMP_PROC_BIND=false
|
export OMP_PROC_BIND=false
|
||||||
export OMP_NUM_THREADS=10
|
export OMP_NUM_THREADS=10
|
||||||
export ASCEND_AGGREGATE_ENABLE=1
|
|
||||||
export ASCEND_TRANSPORT_PRINT=0
|
|
||||||
export ACL_OP_INIT_MODE=1
|
|
||||||
export ASCEND_A3_ENABLE=1
|
|
||||||
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
|
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
vllm serve /model/Qwen3-235B-A22B-W8A8 \
|
vllm serve /model/Qwen3-235B-A22B-W8A8 \
|
||||||
|
|||||||
@@ -899,7 +899,7 @@ class MooncakeConnectorWorker:
|
|||||||
self.device_id = device_ids[self.tp_rank] # type: ignore
|
self.device_id = device_ids[self.tp_rank] # type: ignore
|
||||||
|
|
||||||
if vllm_config.kv_transfer_config.get_from_extra_config(
|
if vllm_config.kv_transfer_config.get_from_extra_config(
|
||||||
'use_ascend_direct', False):
|
'use_ascend_direct', True):
|
||||||
hostname = self.side_channel_host
|
hostname = self.side_channel_host
|
||||||
else:
|
else:
|
||||||
hostname = f"{self.side_channel_host}:0:npu_{self.device_id}"
|
hostname = f"{self.side_channel_host}:0:npu_{self.device_id}"
|
||||||
|
|||||||
@@ -656,7 +656,7 @@ class MooncakeLayerwiseConnectorWorker:
|
|||||||
self.device_id = device_ids[self.tp_rank] # type: ignore
|
self.device_id = device_ids[self.tp_rank] # type: ignore
|
||||||
|
|
||||||
if vllm_config.kv_transfer_config.get_from_extra_config(
|
if vllm_config.kv_transfer_config.get_from_extra_config(
|
||||||
'use_ascend_direct', False):
|
'use_ascend_direct', True):
|
||||||
hostname = self.side_channel_host
|
hostname = self.side_channel_host
|
||||||
else:
|
else:
|
||||||
hostname = f"{self.side_channel_host}:0:npu_{self.device_id}"
|
hostname = f"{self.side_channel_host}:0:npu_{self.device_id}"
|
||||||
|
|||||||
Reference in New Issue
Block a user