[Doc] add tls check to pd disaggregation readme (#5638)
### What this PR does / why we need it?
update pd disaggregation multi_node readme, update the environment check
command for A3, add tls check
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: v0.13.0
- vLLM main:
8be6432bda
Signed-off-by: liziyu <liziyu16@huawei.com>
This commit is contained in:
@@ -44,7 +44,7 @@ such as IP addresses according to your actual environment.
|
|||||||
for i in {0..7}; do hccn_tool -i $i -gateway -g ; done
|
for i in {0..7}; do hccn_tool -i $i -gateway -g ; done
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Check NPU Network Configuration:
|
2. Check NPU HCCN Configuration:
|
||||||
|
|
||||||
Ensure that the hccn.conf file exists in the environment. If using Docker,
|
Ensure that the hccn.conf file exists in the environment. If using Docker,
|
||||||
mount it into the container.
|
mount it into the container.
|
||||||
@@ -67,6 +67,13 @@ such as IP addresses according to your actual environment.
|
|||||||
for i in {0..7}; do hccn_tool -i $i -ping -g address x.x.x.x; done
|
for i in {0..7}; do hccn_tool -i $i -ping -g address x.x.x.x; done
|
||||||
```
|
```
|
||||||
|
|
||||||
|
5. Check NPU TLS Configuration
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# The tls settings should be consistent across all nodes
|
||||||
|
for i in {0..7}; do hccn_tool -i $i -tls -g ; done | grep switch
|
||||||
|
```
|
||||||
|
|
||||||
## Run with Docker
|
## Run with Docker
|
||||||
|
|
||||||
Start a Docker container on each node.
|
Start a Docker container on each node.
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ for i in {0..15}; do hccn_tool -i $i -netdetect -g ; done
|
|||||||
for i in {0..15}; do hccn_tool -i $i -gateway -g ; done
|
for i in {0..15}; do hccn_tool -i $i -gateway -g ; done
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Check NPU network configuration:
|
2. Check NPU HCCN Configuration:
|
||||||
|
|
||||||
Ensure that the hccn.conf file exists in the environment. If using Docker, mount it into the container.
|
Ensure that the hccn.conf file exists in the environment. If using Docker, mount it into the container.
|
||||||
|
|
||||||
@@ -48,14 +48,28 @@ cat /etc/hccn.conf
|
|||||||
3. Get NPU IP Addresses
|
3. Get NPU IP Addresses
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
for i in {0..15}; do hccn_tool -i $i -ip -g | grep ipaddr; done
|
# Get virtual npu ip
|
||||||
|
for i in {0..15}; do hccn_tool -i $i -vnic -g;done
|
||||||
```
|
```
|
||||||
|
|
||||||
4. Cross-Node PING Test
|
4. Get superpodid and SDID
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Execute on the target node (replace 'x.x.x.x' with actual npu ip address)
|
for i in {0..15}; do npu-smi info -t spod-info -i $i -c 0;npu-smi info -t spod-info -i $i -c 1;done
|
||||||
for i in {0..15}; do hccn_tool -i $i -ping -g address x.x.x.x;done
|
```
|
||||||
|
|
||||||
|
5. Cross-Node PING Test
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Execute on the target node (replace 'x.x.x.x' with virtual npu ip address)
|
||||||
|
for i in {0..15}; do hccn_tool -i $i -hccs_ping -g address x.x.x.x;done
|
||||||
|
```
|
||||||
|
|
||||||
|
6. Check NPU TLS Configuration
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# The tls settings should be consistent across all nodes
|
||||||
|
for i in {0..15}; do hccn_tool -i $i -tls -g ; done | grep switch
|
||||||
```
|
```
|
||||||
|
|
||||||
::::
|
::::
|
||||||
@@ -79,7 +93,7 @@ for i in {0..7}; do hccn_tool -i $i -netdetect -g ; done
|
|||||||
for i in {0..7}; do hccn_tool -i $i -gateway -g ; done
|
for i in {0..7}; do hccn_tool -i $i -gateway -g ; done
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Check NPU network configuration:
|
2. Check NPU HCCN Configuration:
|
||||||
|
|
||||||
Ensure that the hccn.conf file exists in the environment. If using Docker, mount it into the container.
|
Ensure that the hccn.conf file exists in the environment. If using Docker, mount it into the container.
|
||||||
|
|
||||||
@@ -100,6 +114,13 @@ for i in {0..7}; do hccn_tool -i $i -ip -g;done
|
|||||||
for i in {0..7}; do hccn_tool -i $i -ping -g address x.x.x.x;done
|
for i in {0..7}; do hccn_tool -i $i -ping -g address x.x.x.x;done
|
||||||
```
|
```
|
||||||
|
|
||||||
|
5. Check NPU TLS Configuration
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# The tls settings should be consistent across all nodes
|
||||||
|
for i in {0..7}; do hccn_tool -i $i -tls -g ; done | grep switch
|
||||||
|
```
|
||||||
|
|
||||||
::::
|
::::
|
||||||
|
|
||||||
:::::
|
:::::
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ for i in {0..7}; do hccn_tool -i $i -netdetect -g ; done
|
|||||||
for i in {0..7}; do hccn_tool -i $i -gateway -g ; done
|
for i in {0..7}; do hccn_tool -i $i -gateway -g ; done
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Check NPU network configuration:
|
2. Check NPU HCCN Configuration:
|
||||||
|
|
||||||
Ensure that the hccn.conf file exists in the environment. If using Docker, mount it into the container.
|
Ensure that the hccn.conf file exists in the environment. If using Docker, mount it into the container.
|
||||||
|
|
||||||
@@ -41,6 +41,20 @@ cat /etc/hccn.conf
|
|||||||
for i in {0..7}; do hccn_tool -i $i -ip -g;done
|
for i in {0..7}; do hccn_tool -i $i -ip -g;done
|
||||||
```
|
```
|
||||||
|
|
||||||
|
4. Cross-Node PING Test
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Execute on the target node (replace 'x.x.x.x' with actual npu ip address)
|
||||||
|
for i in {0..7}; do hccn_tool -i $i -ping -g address x.x.x.x;done
|
||||||
|
```
|
||||||
|
|
||||||
|
5. Check NPU TLS Configuration
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# The tls settings should be consistent across all nodes
|
||||||
|
for i in {0..7}; do hccn_tool -i $i -tls -g ; done | grep switch
|
||||||
|
```
|
||||||
|
|
||||||
## Run with Docker
|
## Run with Docker
|
||||||
Start a Docker container.
|
Start a Docker container.
|
||||||
|
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ export PYTHONHASHSEED=0
|
|||||||
|
|
||||||
## Example of using Mooncake as a KV Pool backend
|
## Example of using Mooncake as a KV Pool backend
|
||||||
* Software:
|
* Software:
|
||||||
* Check NPU network configuration:
|
* Check NPU HCCN Configuration:
|
||||||
|
|
||||||
Ensure that the hccn.conf file exists in the environment. If using Docker, mount it into the container.
|
Ensure that the hccn.conf file exists in the environment. If using Docker, mount it into the container.
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user