[1/N][CI] Add multi node test (#3359)

### What this PR does / why we need it?
This pr purpose to add multi-node test, on the first step, add
`deepseek-v3` dp+tp+ep test
### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.11.0rc3
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0

---------

Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
Li Wang
2025-10-11 14:50:46 +08:00
committed by GitHub
parent 82b6c846ca
commit 9eb103607f
11 changed files with 897 additions and 1 deletions

View File

@@ -0,0 +1,142 @@
apiVersion: leaderworkerset.x-k8s.io/v1
kind: LeaderWorkerSet
metadata:
name: vllm
namespace: vllm-project
spec:
replicas: 1
leaderWorkerTemplate:
size: 2
restartPolicy: RecreateGroupOnPodRestart
leaderTemplate:
metadata:
labels:
role: leader
spec:
containers:
- name: vllm-leader
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
env:
- name: VLLM_USE_MODELSCOPE
value: "true"
- name: WORKSPACE
value: "/root/workspace"
- name: WORLD_SIZE
value: "2"
- name: NPU_PER_NODE
value: "16"
# Set vLLM version and vLLM-Ascend version here, once there is a new release, update here.
- name: VLLM_VERSION
value: "v0.11.0"
- name: VLLM_ASCEND_VERSION
value: "main"
- name: MOONCAKE_VERSION
value: "06cc217504a6f1b0cdaa26b096b985651b262748"
command:
- sh
- -c
- |
bash /root/.cache/tests/run.sh
resources:
limits:
huawei.com/ascend-1980: "16"
memory: 512Gi
ephemeral-storage: 100Gi
requests:
huawei.com/ascend-1980: "16"
ephemeral-storage: 100Gi
cpu: 125
ports:
- containerPort: 8080
# readinessProbe:
# tcpSocket:
# port: 8080
# initialDelaySeconds: 15
# periodSeconds: 10
volumeMounts:
- mountPath: /root/.cache
name: shared-volume
- mountPath: /usr/local/Ascend/driver/tools
name: driver-tools
- mountPath: /dev/shm
name: dshm
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: 15Gi
- name: shared-volume
persistentVolumeClaim:
claimName: nv-action-vllm-benchmarks-v2
- name: driver-tools
hostPath:
path: /usr/local/Ascend/driver/tools
workerTemplate:
spec:
containers:
- name: vllm-worker
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
env:
- name: VLLM_USE_MODELSCOPE
value: "true"
- name: WORKSPACE
value: "/root/workspace"
- name: WORLD_SIZE
value: "2"
- name: NPU_PER_NODE
value: "16"
# Set vLLM version and vLLM-Ascend version here, once there is a new release, update here.
- name: VLLM_VERSION
value: "v0.11.0"
- name: VLLM_ASCEND_VERSION
value: "main"
- name: MOONCAKE_VERSION
value: "06cc217504a6f1b0cdaa26b096b985651b262748"
command:
- sh
- -c
- |
bash /root/.cache/tests/run.sh
resources:
limits:
huawei.com/ascend-1980: "16"
memory: 512Gi
ephemeral-storage: 100Gi
requests:
huawei.com/ascend-1980: "16"
ephemeral-storage: 100Gi
cpu: 125
volumeMounts:
- mountPath: /root/.cache
name: shared-volume
- mountPath: /usr/local/Ascend/driver/tools
name: driver-tools
- mountPath: /dev/shm
name: dshm
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: 15Gi
- name: shared-volume
persistentVolumeClaim:
claimName: nv-action-vllm-benchmarks-v2
- name: driver-tools
hostPath:
path: /usr/local/Ascend/driver/tools
---
apiVersion: v1
kind: Service
metadata:
name: vllm-leader
namespace: vllm-project
spec:
ports:
- name: http
port: 8080
protocol: TCP
targetPort: 8080
selector:
leaderworkerset.sigs.k8s.io/name: vllm
role: leader
type: ClusterIP

View File

@@ -0,0 +1,96 @@
#!/bin/bash
set -euo pipefail
export SRC_DIR="$WORKSPACE/source_code"
check_npu_info() {
echo "====> Check NPU info"
npu-smi info
cat "/usr/local/Ascend/ascend-toolkit/latest/$(uname -i)-linux/ascend_toolkit_install.info"
}
check_and_config() {
echo "====> Configure mirrors and git proxy"
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf "https://github.com/"
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
}
checkout_src() {
echo "====> Checkout source code"
mkdir -p "$SRC_DIR"
# vllm-ascend
if [ ! -d "$SRC_DIR/vllm-ascend" ]; then
git clone --depth 1 -b $VLLM_ASCEND_VERSION https://github.com/vllm-project/vllm-ascend.git "$SRC_DIR/vllm-ascend"
fi
# vllm
if [ ! -d "$SRC_DIR/vllm" ]; then
git clone -b $VLLM_VERSION https://github.com/vllm-project/vllm.git "$SRC_DIR/vllm"
fi
#mooncake
if [ ! -d "$SRC_DIR/Mooncake" ]; then
git clone https://github.com/kvcache-ai/Mooncake.git "$SRC_DIR/Mooncake"
cd "$SRC_DIR/Mooncake"
git checkout 06cc217504a6f1b0cdaa26b096b985651b262748
cd -
fi
}
install_sys_dependencies() {
echo "====> Install system dependencies"
apt-get update -y
DEP_LIST=()
while IFS= read -r line; do
[[ -n "$line" && ! "$line" =~ ^# ]] && DEP_LIST+=("$line")
done < "$SRC_DIR/vllm-ascend/packages.txt"
apt-get install -y "${DEP_LIST[@]}" gcc g++ cmake libnuma-dev iproute2
}
install_vllm() {
echo "====> Install vllm and vllm-ascend"
VLLM_TARGET_DEVICE=empty pip install -e "$SRC_DIR/vllm"
pip install -e "$SRC_DIR/vllm-ascend"
pip install modelscope
# Install for pytest
pip install -r "$SRC_DIR/vllm-ascend/requirements-dev.txt"
}
install_mooncake() {
echo "====> Install mooncake"
apt-get update
apt install -y --allow-change-held-packages python3 python-is-python3
apt-get install -y --no-install-recommends mpich libmpich-dev
cd $SRC_DIR/Mooncake
sed -i '/option(USE_ASCEND_DIRECT)/s/OFF/ON/' mooncake-common/common.cmake
bash dependencies.sh --yes
mkdir build
cd -
cd $SRC_DIR/Mooncake/build
cmake ..
make -j
make install
cd -
}
run_tests() {
echo "====> Run tests"
cd "$SRC_DIR/vllm-ascend"
pytest -sv tests/e2e/multi_node/test_multi_dp.py
}
main() {
check_npu_info
check_and_config
checkout_src
install_sys_dependencies
install_vllm
#install_mooncake
run_tests
}
main "$@"