* [CI/Build] Add CI end-to-end (E2E) tests Signed-off-by: Chenchao Hu <huchenchao@example.com>
102 lines
2.8 KiB
Bash
Executable File
102 lines
2.8 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
source ci/scripts/common/env.sh
|
|
source ci/scripts/common/log.sh
|
|
|
|
log "Starting docker container: ${DOCKER_NAME}"
|
|
|
|
if docker ps -a --format '{{.Names}}' | grep -q "^${DOCKER_NAME}$"; then
|
|
log "Container exists, removing first..."
|
|
docker stop "${DOCKER_NAME}" >/dev/null 2>&1 || true
|
|
docker rm "${DOCKER_NAME}" >/dev/null 2>&1 || true
|
|
fi
|
|
|
|
HOST_CUDA_LIB_PATH=""
|
|
for path in "/usr/local/cuda/lib64" /usr/local/cuda-*/lib64; do
|
|
if [ -d "$path" ]; then
|
|
HOST_CUDA_LIB_PATH="$path"
|
|
break
|
|
fi
|
|
done
|
|
|
|
if [ -n "${HOST_CUDA_LIB_PATH}" ]; then
|
|
log "Detected host CUDA lib path: ${HOST_CUDA_LIB_PATH}"
|
|
else
|
|
log "Host CUDA lib path not found, will use container CUDA"
|
|
fi
|
|
|
|
# NVIDIA device mapping
|
|
DEVICE_ARGS=""
|
|
if [ -e "/dev/nvidia0" ]; then
|
|
DEVICE_ARGS="--device /dev/nvidia0:/dev/nvidia0"
|
|
for i in $(seq 1 16); do
|
|
if [ -e "/dev/nvidia${i}" ]; then
|
|
DEVICE_ARGS="${DEVICE_ARGS} --device /dev/nvidia${i}:/dev/nvidia${i}"
|
|
fi
|
|
done
|
|
if [ -e "/dev/nvidia-uvm" ]; then
|
|
DEVICE_ARGS="${DEVICE_ARGS} --device /dev/nvidia-uvm:/dev/nvidia-uvm"
|
|
fi
|
|
if [ -e "/dev/nvidia-modeset" ]; then
|
|
DEVICE_ARGS="${DEVICE_ARGS} --device /dev/nvidia-modeset:/dev/nvidia-modeset"
|
|
fi
|
|
else
|
|
log "WARNING: /dev/nvidia0 not found, GPU may not be available"
|
|
fi
|
|
|
|
# Mount nvidia-smi
|
|
NVIDIA_BIN=""
|
|
if [ -f "/usr/bin/nvidia-smi" ]; then
|
|
NVIDIA_BIN="-v /usr/bin/nvidia-smi:/usr/bin/nvidia-smi"
|
|
log "Added nvidia-smi mount"
|
|
else
|
|
log "WARNING: nvidia-smi not found on host"
|
|
fi
|
|
|
|
# Mount critical NVIDIA libs
|
|
NVIDIA_LIBS=""
|
|
if [ -d "/usr/lib64" ]; then
|
|
for lib in libnvidia-ml.so libnvidia-ml.so.1; do
|
|
if [ -f "/usr/lib64/${lib}" ]; then
|
|
NVIDIA_LIBS="${NVIDIA_LIBS} -v /usr/lib64/${lib}:/usr/lib64/${lib}"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# Ensure libcuda symlink
|
|
ln -sf /usr/lib64/libcuda.so.1 /usr/lib64/libcuda.so || true
|
|
|
|
log "docker run ${IMAGE_NAME}"
|
|
docker run \
|
|
-h "$(hostname)" \
|
|
--privileged \
|
|
--net=host \
|
|
--user=root \
|
|
--name="${DOCKER_NAME}" \
|
|
-v /home:/home \
|
|
-v "${WORKSPACE_MOUNT}" \
|
|
-v /ssd2:/ssd2 \
|
|
-v /ssd1:/ssd1 \
|
|
-v /ssd3:/ssd3 \
|
|
-v /dev/shm:/dev/shm \
|
|
-v /usr/lib64/libcuda.so.1:/usr/lib64/libcuda.so.1 \
|
|
-v /usr/lib64/libcuda.so:/usr/lib64/libcuda.so \
|
|
-v /usr/lib64/libnvidia-ml.so.1:/usr/lib64/libnvidia-ml.so.1 \
|
|
-v /usr/lib64/libnvidia-ptxjitcompiler.so.1:/usr/lib64/libnvidia-ptxjitcompiler.so.1 2>/dev/null \
|
|
-v /var/run/docker.sock:/var/run/docker.sock \
|
|
-w /workspace \
|
|
${DEVICE_ARGS} \
|
|
${NVIDIA_BIN} \
|
|
${NVIDIA_LIBS} \
|
|
--shm-size=16G \
|
|
-e NVIDIA_VISIBLE_DEVICES=all \
|
|
-e NVIDIA_DRIVER_CAPABILITIES=compute,utility \
|
|
-itd "${IMAGE_NAME}"
|
|
|
|
log "Container started. Inject conda activate into bashrc"
|
|
docker exec "${DOCKER_NAME}" bash -lc "
|
|
echo 'conda activate ${CONDA_ENV}' >> ~/.bashrc
|
|
conda env list || true
|
|
"
|