Reorganize CI and test files (#9027)
This commit is contained in:
43
scripts/ci/amd_ci_exec.sh
Executable file
43
scripts/ci/amd_ci_exec.sh
Executable file
@@ -0,0 +1,43 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
WORKDIR="/sglang-checkout/test/srt"
|
||||
declare -A ENV_MAP=(
|
||||
[SGLANG_AMD_CI]=1
|
||||
[SGLANG_IS_IN_CI]=1
|
||||
[SGLANG_USE_AITER]=1
|
||||
)
|
||||
|
||||
# Parse -w/--workdir and -e ENV=VAL
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-w|--workdir)
|
||||
WORKDIR="$2"
|
||||
shift 2
|
||||
;;
|
||||
-e)
|
||||
IFS="=" read -r key val <<< "$2"
|
||||
ENV_MAP["$key"]="$val"
|
||||
shift 2
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
;;
|
||||
*)
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Build final ENV_ARGS
|
||||
ENV_ARGS=()
|
||||
for key in "${!ENV_MAP[@]}"; do
|
||||
ENV_ARGS+=("-e" "$key=${ENV_MAP[$key]}")
|
||||
done
|
||||
|
||||
# Run docker exec
|
||||
docker exec \
|
||||
-w "$WORKDIR" \
|
||||
"${ENV_ARGS[@]}" \
|
||||
ci_sglang "$@"
|
||||
22
scripts/ci/amd_ci_install_dependency.sh
Executable file
22
scripts/ci/amd_ci_install_dependency.sh
Executable file
@@ -0,0 +1,22 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
# Install the required dependencies in CI.
|
||||
docker exec ci_sglang pip install --upgrade pip
|
||||
docker exec ci_sglang pip uninstall sgl-kernel -y || true
|
||||
docker exec -w /sglang-checkout/sgl-kernel ci_sglang bash -c "rm -f pyproject.toml && mv pyproject_rocm.toml pyproject.toml && python3 setup_rocm.py install"
|
||||
docker exec ci_sglang pip install -e "python[dev_hip]"
|
||||
|
||||
docker exec -w / ci_sglang git clone https://github.com/merrymercy/human-eval.git
|
||||
docker exec -w /human-eval ci_sglang pip install -e .
|
||||
|
||||
# For lmms_evals evaluating MMMU
|
||||
docker exec -w / ci_sglang git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
|
||||
docker exec -w /lmms-eval ci_sglang pip install -e .
|
||||
|
||||
docker exec -w / ci_sglang mkdir -p /dummy-grok
|
||||
mkdir -p dummy-grok && wget https://sharkpublic.blob.core.windows.net/sharkpublic/sglang/dummy_grok.json -O dummy-grok/config.json
|
||||
docker cp ./dummy-grok ci_sglang:/
|
||||
|
||||
docker exec ci_sglang pip install huggingface_hub[hf_xet]
|
||||
docker exec ci_sglang pip install pytest
|
||||
158
scripts/ci/amd_ci_start_container.sh
Executable file
158
scripts/ci/amd_ci_start_container.sh
Executable file
@@ -0,0 +1,158 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
# Get version from SGLang version.py file
|
||||
FALLBACK_SGLANG_VERSION="v0.4.10.post2"
|
||||
SGLANG_VERSION_FILE="$(dirname "$0")/../../python/sglang/version.py"
|
||||
|
||||
if [ -f "$SGLANG_VERSION_FILE" ]; then
|
||||
SGLANG_VERSION=$(python3 -c '
|
||||
import re, sys
|
||||
with open(sys.argv[1], "r") as f:
|
||||
content = f.read()
|
||||
match = re.search(r"__version__\s*=\s*[\"'"'"'](.*?)[\"'"'"']", content)
|
||||
if match:
|
||||
print("v" + match.group(1))
|
||||
' "$SGLANG_VERSION_FILE")
|
||||
|
||||
if [ -z "$SGLANG_VERSION" ]; then
|
||||
SGLANG_VERSION="$FALLBACK_SGLANG_VERSION"
|
||||
echo "Warning: Could not parse version from $SGLANG_VERSION_FILE, using fallback version: $SGLANG_VERSION" >&2
|
||||
fi
|
||||
else
|
||||
# Fallback version if file is not found
|
||||
SGLANG_VERSION="$FALLBACK_SGLANG_VERSION"
|
||||
echo "Warning: version.py not found, using fallback version: $SGLANG_VERSION" >&2
|
||||
fi
|
||||
|
||||
echo "Using SGLang version: $SGLANG_VERSION"
|
||||
|
||||
# Default base tags (can be overridden by command line arguments)
|
||||
DEFAULT_MI30X_BASE_TAG="${SGLANG_VERSION}-rocm630-mi30x"
|
||||
DEFAULT_MI35X_BASE_TAG="${SGLANG_VERSION}-rocm700-mi35x"
|
||||
|
||||
# Parse command line arguments
|
||||
MI30X_BASE_TAG="$DEFAULT_MI30X_BASE_TAG"
|
||||
MI35X_BASE_TAG="$DEFAULT_MI35X_BASE_TAG"
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--mi30x-base-tag)
|
||||
MI30X_BASE_TAG="$2"
|
||||
shift 2
|
||||
;;
|
||||
--mi35x-base-tag)
|
||||
MI35X_BASE_TAG="$2"
|
||||
shift 2
|
||||
;;
|
||||
-h|--help)
|
||||
echo "Usage: $0 [--mi30x-base-tag TAG] [--mi35x-base-tag TAG]"
|
||||
echo " --mi30x-base-tag TAG Base tag for mi30x images (default: $DEFAULT_MI30X_BASE_TAG)"
|
||||
echo " --mi35x-base-tag TAG Base tag for mi35x images (default: $DEFAULT_MI35X_BASE_TAG)"
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option $1"
|
||||
echo "Use --help for usage information"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Set up DEVICE_FLAG based on Kubernetes pod info
|
||||
if [ -f "/etc/podinfo/gha-render-devices" ]; then
|
||||
DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices)
|
||||
else
|
||||
DEVICE_FLAG="--device /dev/dri"
|
||||
fi
|
||||
|
||||
# Function to find latest available image for a given GPU architecture
|
||||
find_latest_image() {
|
||||
local gpu_arch=$1
|
||||
local base_tag
|
||||
|
||||
if [ "$gpu_arch" == "mi30x" ]; then
|
||||
base_tag="$MI30X_BASE_TAG"
|
||||
elif [ "$gpu_arch" == "mi35x" ]; then
|
||||
base_tag="$MI35X_BASE_TAG"
|
||||
else
|
||||
echo "Error: Unsupported GPU architecture '$gpu_arch'" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
local days_back=0
|
||||
|
||||
while [ $days_back -lt 30 ]; do
|
||||
local check_date=$(date -d "$days_back days ago" +%Y%m%d)
|
||||
local image_tag="${base_tag}-${check_date}"
|
||||
|
||||
echo "Checking for image: rocm/sgl-dev:${image_tag}" >&2
|
||||
|
||||
# Check if the image exists by trying to get its manifest
|
||||
if docker manifest inspect "rocm/sgl-dev:${image_tag}" >/dev/null 2>&1; then
|
||||
echo "Found available image: rocm/sgl-dev:${image_tag}" >&2
|
||||
echo "rocm/sgl-dev:${image_tag}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
days_back=$((days_back + 1))
|
||||
done
|
||||
|
||||
echo "Error: No ${gpu_arch} image found in the last 30 days" >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
# Determine image finder and fallback based on runner
|
||||
# In Kubernetes, the hostname contains the GPU type (e.g., linux-mi300-gpu-1-bgg8r-runner-vknlb)
|
||||
# Extract the GPU type from hostname
|
||||
HOSTNAME_VALUE=$(hostname)
|
||||
RUNNER_NAME="unknown"
|
||||
|
||||
if [[ "${HOSTNAME_VALUE}" =~ ^(linux-mi[0-9]+-gpu-[0-9]+) ]]; then
|
||||
RUNNER_NAME="${BASH_REMATCH[1]}"
|
||||
echo "Extracted runner from hostname: ${RUNNER_NAME}"
|
||||
else
|
||||
echo "Could not extract runner info from hostname: ${HOSTNAME_VALUE}"
|
||||
fi
|
||||
|
||||
echo "The runner is: ${RUNNER_NAME}"
|
||||
GPU_ARCH="mi30x"
|
||||
FALLBACK_IMAGE="rocm/sgl-dev:${MI30X_BASE_TAG}-20250715"
|
||||
FALLBACK_MSG="No mi30x image found in last 30 days, using fallback image"
|
||||
|
||||
# Check for mi350/mi355 runners
|
||||
if [[ "${RUNNER_NAME}" =~ ^linux-mi350-gpu-[0-9]+$ ]] || [[ "${RUNNER_NAME}" =~ ^linux-mi355-gpu-[0-9]+$ ]]; then
|
||||
echo "Runner is ${RUNNER_NAME}, will find mi35x image."
|
||||
GPU_ARCH="mi35x"
|
||||
FALLBACK_IMAGE="rocm/sgl-dev:${MI35X_BASE_TAG}-20250715"
|
||||
FALLBACK_MSG="No mi35x image found in last 30 days, using fallback image"
|
||||
# Check for mi300/mi325 runners
|
||||
elif [[ "${RUNNER_NAME}" =~ ^linux-mi300-gpu-[0-9]+$ ]] || [[ "${RUNNER_NAME}" =~ ^linux-mi325-gpu-[0-9]+$ ]]; then
|
||||
echo "Runner is ${RUNNER_NAME}, will find mi30x image."
|
||||
else
|
||||
echo "Runner type not recognized: '${RUNNER_NAME}'"
|
||||
echo "Defaulting to find mi30x image"
|
||||
fi
|
||||
|
||||
# Find and pull the latest image
|
||||
if IMAGE=$(find_latest_image "${GPU_ARCH}"); then
|
||||
echo "Pulling Docker image: $IMAGE"
|
||||
else
|
||||
echo "$FALLBACK_MSG" >&2
|
||||
IMAGE="$FALLBACK_IMAGE"
|
||||
echo "Pulling fallback Docker image: $IMAGE"
|
||||
fi
|
||||
docker pull "$IMAGE"
|
||||
|
||||
# Run the container
|
||||
echo "Starting container: ci_sglang"
|
||||
docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
|
||||
-v "${GITHUB_WORKSPACE:-$PWD}:/sglang-checkout" \
|
||||
--ipc=host --group-add video \
|
||||
--shm-size 32g \
|
||||
--cap-add=SYS_PTRACE \
|
||||
-e HF_TOKEN="${HF_TOKEN:-}" \
|
||||
--security-opt seccomp=unconfined \
|
||||
-w /sglang-checkout \
|
||||
--name ci_sglang \
|
||||
"$IMAGE"
|
||||
68
scripts/ci/ci_install_deepep.sh
Executable file
68
scripts/ci/ci_install_deepep.sh
Executable file
@@ -0,0 +1,68 @@
|
||||
#!/bin/bash
|
||||
# Install the dependency in CI.
|
||||
set -euxo pipefail
|
||||
|
||||
bash scripts/ci/ci_install_dependency.sh
|
||||
|
||||
export GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/
|
||||
export NVSHMEM_DIR=/opt/nvshmem/install
|
||||
export LD_LIBRARY_PATH="${NVSHMEM_DIR}/lib:$LD_LIBRARY_PATH"
|
||||
export PATH="${NVSHMEM_DIR}/bin:$PATH"
|
||||
export CUDA_HOME=/usr/local/cuda
|
||||
|
||||
if python3 -c "import deep_ep" >/dev/null 2>&1; then
|
||||
echo "deep_ep is already installed or importable. Skipping installation."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Install system dependencies
|
||||
apt install -y curl wget git sudo libibverbs-dev rdma-core infiniband-diags openssh-server perftest ibverbs-providers libibumad3 libibverbs1 libnl-3-200 libnl-route-3-200 librdmacm1 build-essential cmake
|
||||
|
||||
# Install GDRCopy
|
||||
rm -rf /opt/gdrcopy && mkdir -p /opt/gdrcopy
|
||||
rm -rf /opt/nvshmem && mkdir -p /opt/nvshmem
|
||||
cd /opt/gdrcopy
|
||||
git clone https://github.com/NVIDIA/gdrcopy.git .
|
||||
git checkout v2.4.4
|
||||
apt update
|
||||
apt install -y nvidia-dkms-535
|
||||
apt install -y build-essential devscripts debhelper fakeroot pkg-config dkms
|
||||
apt install -y check libsubunit0 libsubunit-dev python3-venv
|
||||
cd packages
|
||||
CUDA=/usr/local/cuda ./build-deb-packages.sh
|
||||
dpkg -i gdrdrv-dkms_*.deb
|
||||
dpkg -i libgdrapi_*.deb
|
||||
dpkg -i gdrcopy-tests_*.deb
|
||||
dpkg -i gdrcopy_*.deb
|
||||
|
||||
if [ ! -e "/usr/lib/x86_64-linux-gnu/libmlx5.so" ]; then
|
||||
ln -s /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so
|
||||
fi
|
||||
apt-get update && apt-get install -y libfabric-dev
|
||||
|
||||
# Install NVSHMEM
|
||||
cd /opt/nvshmem
|
||||
wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz
|
||||
tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz
|
||||
mv nvshmem_src nvshmem && cd nvshmem
|
||||
NVSHMEM_SHMEM_SUPPORT=0 \
|
||||
NVSHMEM_UCX_SUPPORT=0 \
|
||||
NVSHMEM_USE_NCCL=0 \
|
||||
NVSHMEM_MPI_SUPPORT=0 \
|
||||
NVSHMEM_IBGDA_SUPPORT=1 \
|
||||
NVSHMEM_PMIX_SUPPORT=0 \
|
||||
NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
|
||||
NVSHMEM_USE_GDRCOPY=1 \
|
||||
cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/opt/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90
|
||||
cd build
|
||||
make -j$(nproc) install
|
||||
|
||||
# Install DeepEP
|
||||
rm -rf /root/.cache/deepep && git clone https://github.com/deepseek-ai/DeepEP.git /root/.cache/deepep && cd /root/.cache/deepep && git checkout b6ce310bb0b75079682d09bc2ebc063a074fbd58
|
||||
cd /root/.cache/deepep && python3 setup.py install
|
||||
|
||||
# Verify configuration
|
||||
echo "=== Verify GDRCOPY ==="
|
||||
gdrcopy_copybw
|
||||
echo "=== Verify NVSHMEM ==="
|
||||
nvshmem-info -a
|
||||
72
scripts/ci/ci_install_dependency.sh
Executable file
72
scripts/ci/ci_install_dependency.sh
Executable file
@@ -0,0 +1,72 @@
|
||||
#!/bin/bash
|
||||
# Install the dependency in CI.
|
||||
set -euxo pipefail
|
||||
|
||||
IS_BLACKWELL=${IS_BLACKWELL:-0}
|
||||
|
||||
if [ "$IS_BLACKWELL" = "1" ]; then
|
||||
CU_VERSION="cu129"
|
||||
else
|
||||
CU_VERSION="cu126"
|
||||
fi
|
||||
|
||||
# Kill existing processes
|
||||
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
bash "${SCRIPT_DIR}/../killall_sglang.sh"
|
||||
|
||||
# Install apt packages
|
||||
apt install -y git libnuma-dev
|
||||
|
||||
# Install uv
|
||||
if [ "$IS_BLACKWELL" = "1" ]; then
|
||||
# The blackwell CI runner has some issues with pip and uv,
|
||||
# so we can only use pip with `--break-system-packages`
|
||||
PIP_CMD="pip"
|
||||
PIP_INSTALL_SUFFIX="--break-system-packages"
|
||||
|
||||
# Clean up existing installations
|
||||
$PIP_CMD uninstall -y flashinfer_python sgl-kernel sglang vllm $PIP_INSTALL_SUFFIX || true
|
||||
else
|
||||
# In normal cases, we use uv, which is much faster than pip.
|
||||
pip install --upgrade pip
|
||||
pip install uv
|
||||
export UV_SYSTEM_PYTHON=true
|
||||
|
||||
PIP_CMD="uv pip"
|
||||
PIP_INSTALL_SUFFIX="--index-strategy unsafe-best-match"
|
||||
|
||||
# Clean up existing installations
|
||||
$PIP_CMD uninstall flashinfer_python sgl-kernel sglang vllm || true
|
||||
fi
|
||||
|
||||
# Install the main package
|
||||
$PIP_CMD install -e "python[dev]" --extra-index-url https://download.pytorch.org/whl/${CU_VERSION} $PIP_INSTALL_SUFFIX
|
||||
|
||||
if [ "$IS_BLACKWELL" = "1" ]; then
|
||||
# TODO auto determine sgl-kernel version
|
||||
SGL_KERNEL_VERSION=0.3.2
|
||||
$PIP_CMD install https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}-cp39-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
|
||||
fi
|
||||
|
||||
# Show current packages
|
||||
$PIP_CMD list
|
||||
|
||||
# Install additional dependencies
|
||||
$PIP_CMD install mooncake-transfer-engine==0.3.5 nvidia-cuda-nvrtc-cu12 py-spy huggingface_hub[hf_xet] $PIP_INSTALL_SUFFIX
|
||||
|
||||
if [ "$IS_BLACKWELL" != "1" ]; then
|
||||
# For lmms_evals evaluating MMMU
|
||||
git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
|
||||
$PIP_CMD install -e lmms-eval/ $PIP_INSTALL_SUFFIX
|
||||
|
||||
# Install xformers
|
||||
$PIP_CMD install xformers --index-url https://download.pytorch.org/whl/${CU_VERSION} --no-deps $PIP_INSTALL_SUFFIX
|
||||
fi
|
||||
|
||||
# Install FlashMLA for attention backend tests
|
||||
# $PIP_CMD install git+https://github.com/deepseek-ai/FlashMLA.git $PIP_INSTALL_SUFFIX
|
||||
|
||||
# Show current packages
|
||||
$PIP_CMD list
|
||||
|
||||
echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}"
|
||||
23
scripts/ci/ci_install_rust.sh
Executable file
23
scripts/ci/ci_install_rust.sh
Executable file
@@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
set -euxo pipefail
|
||||
|
||||
# Check if sudo is available
|
||||
if command -v sudo >/dev/null 2>&1; then
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y libssl-dev pkg-config
|
||||
else
|
||||
apt-get update
|
||||
apt-get install -y libssl-dev pkg-config
|
||||
fi
|
||||
|
||||
# Install rustup (Rust installer and version manager)
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
|
||||
|
||||
# Follow the installation prompts, then reload your shell
|
||||
. "$HOME/.cargo/env"
|
||||
source $HOME/.cargo/env
|
||||
|
||||
# Verify installation
|
||||
rustc --version
|
||||
cargo --version
|
||||
94
scripts/ci/ci_start_disaggregation_servers.sh
Executable file
94
scripts/ci/ci_start_disaggregation_servers.sh
Executable file
@@ -0,0 +1,94 @@
|
||||
#!/bin/bash
|
||||
|
||||
MODEL_PATH="/raid/models/meta-llama/Llama-3.1-8B-Instruct"
|
||||
|
||||
# Function to find the first available active IB device
|
||||
find_active_ib_device() {
|
||||
for device in mlx5_{0..11}; do
|
||||
if ibv_devinfo $device >/dev/null 2>&1; then
|
||||
state=$(ibv_devinfo $device | grep "state:" | head -1 | awk '{print $2}')
|
||||
if [[ "$state" == "PORT_ACTIVE" ]]; then
|
||||
echo "$device"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
done
|
||||
echo "No active IB device found" >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
# Get the first available active IB device
|
||||
DEVICE=$(find_active_ib_device)
|
||||
echo "Using IB device: $DEVICE"
|
||||
|
||||
# Launch prefill servers on GPU 0–3
|
||||
for i in {0..3}; do
|
||||
PORT=$((30001 + i))
|
||||
BOOTSTRAP_PORT=$((9001 + i))
|
||||
HOST="127.0.0.$((i + 1))"
|
||||
echo "Launching PREFILL server on GPU $i at $HOST:$PORT (bootstrap: $BOOTSTRAP_PORT)"
|
||||
CUDA_VISIBLE_DEVICES=$i \
|
||||
python3 -m sglang.launch_server \
|
||||
--model-path "$MODEL_PATH" \
|
||||
--disaggregation-mode prefill \
|
||||
--host "$HOST" \
|
||||
--port "$PORT" \
|
||||
--disaggregation-ib-device "$DEVICE" \
|
||||
--disaggregation-bootstrap-port "$BOOTSTRAP_PORT" &
|
||||
done
|
||||
|
||||
# Launch decode servers on GPU 4–7
|
||||
for i in {4..7}; do
|
||||
PORT=$((30001 + i))
|
||||
HOST="127.0.0.$((i + 1))"
|
||||
echo "Launching DECODE server on GPU $i at $HOST:$PORT"
|
||||
CUDA_VISIBLE_DEVICES=$i \
|
||||
python3 -m sglang.launch_server \
|
||||
--model-path "$MODEL_PATH" \
|
||||
--disaggregation-mode decode \
|
||||
--host "$HOST" \
|
||||
--port "$PORT" \
|
||||
--disaggregation-ib-device "$DEVICE" \
|
||||
--base-gpu-id 0 &
|
||||
done
|
||||
|
||||
# Wait for disaggregation servers to initialize
|
||||
echo "Waiting for disaggregation servers to initialize..."
|
||||
|
||||
# Health check with 5-minute timeout
|
||||
TIMEOUT=300
|
||||
START_TIME=$(date +%s)
|
||||
|
||||
echo "Checking health of all 8 servers..."
|
||||
while true; do
|
||||
CURRENT_TIME=$(date +%s)
|
||||
ELAPSED=$((CURRENT_TIME - START_TIME))
|
||||
|
||||
if [ $ELAPSED -ge $TIMEOUT ]; then
|
||||
echo "❌ Timeout: Servers did not become healthy within 5 minutes"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
HEALTHY_COUNT=0
|
||||
# Check all 8 servers (127.0.0.1-8:30001-30008)
|
||||
for i in {1..8}; do
|
||||
if curl -s -f "http://127.0.0.$i:$((30000 + i))/health" >/dev/null 2>&1; then
|
||||
HEALTHY_COUNT=$((HEALTHY_COUNT + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Healthy servers: $HEALTHY_COUNT/8 (elapsed: ${ELAPSED}s)"
|
||||
|
||||
if [ $HEALTHY_COUNT -eq 8 ]; then
|
||||
echo "✅ All 8 servers are healthy!"
|
||||
break
|
||||
else
|
||||
sleep 10 # Wait 10 seconds before next check
|
||||
fi
|
||||
done
|
||||
|
||||
# Don't launch router here - just keep servers running
|
||||
echo "✅ All disaggregation servers are ready and waiting for router connections"
|
||||
|
||||
# Keep the script running
|
||||
wait
|
||||
59
scripts/ci/npu_ci_install_dependency.sh
Executable file
59
scripts/ci/npu_ci_install_dependency.sh
Executable file
@@ -0,0 +1,59 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
|
||||
PIP_INSTALL="pip install --no-cache-dir"
|
||||
|
||||
|
||||
# Update apt & pip sources
|
||||
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
|
||||
pip config set global.index-url http://${CACHING_URL}/pypi/simple
|
||||
pip config set global.trusted-host ${CACHING_URL}
|
||||
|
||||
|
||||
# Install the required dependencies in CI.
|
||||
apt update -y && apt install -y \
|
||||
build-essential \
|
||||
cmake \
|
||||
wget \
|
||||
curl \
|
||||
net-tools \
|
||||
zlib1g-dev \
|
||||
lld \
|
||||
clang \
|
||||
locales \
|
||||
ccache \
|
||||
ca-certificates
|
||||
update-ca-certificates
|
||||
python3 -m ${PIP_INSTALL} --upgrade pip
|
||||
|
||||
|
||||
### Download MemFabricV2
|
||||
MF_WHL_NAME="mf_adapter-1.0.0-cp311-cp311-linux_aarch64.whl"
|
||||
MEMFABRIC_URL="https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/${MF_WHL_NAME}"
|
||||
wget "${MEMFABRIC_URL}" && ${PIP_INSTALL} "./${MF_WHL_NAME}"
|
||||
|
||||
|
||||
### Install vLLM
|
||||
VLLM_TAG=v0.8.5
|
||||
git clone --depth 1 https://github.com/vllm-project/vllm.git --branch $VLLM_TAG
|
||||
(cd vllm && VLLM_TARGET_DEVICE="empty" ${PIP_INSTALL} -v -e .)
|
||||
|
||||
|
||||
### Install PyTorch and PTA
|
||||
PYTORCH_VERSION=2.6.0
|
||||
TORCHVISION_VERSION=0.21.0
|
||||
PTA_VERSION=2.6.0
|
||||
${PIP_INSTALL} torch==$PYTORCH_VERSION torchvision==$TORCHVISION_VERSION --index-url https://download.pytorch.org/whl/cpu
|
||||
${PIP_INSTALL} torch_npu==$PTA_VERSION
|
||||
|
||||
|
||||
### Install Triton-Ascend
|
||||
TRITON_ASCEND_NAME="triton_ascend-3.2.0.dev20250729-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl"
|
||||
TRITON_ASCEND_URL="https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/${TRITON_ASCEND_NAME}"
|
||||
${PIP_INSTALL} attrs==24.2.0 numpy==1.26.4 scipy==1.13.1 decorator==5.1.1 psutil==6.0.0 pytest==8.3.2 pytest-xdist==3.6.1 pyyaml pybind11
|
||||
wget "${TRITON_ASCEND_URL}" && ${PIP_INSTALL} "./${TRITON_ASCEND_NAME}"
|
||||
|
||||
|
||||
### Install SGLang
|
||||
${PIP_INSTALL} -v -e "python[srt_npu]"
|
||||
Reference in New Issue
Block a user