2025-06-27 00:18:56 -07:00
|
|
|
|
#!/bin/bash
|
|
|
|
|
|
|
|
|
|
|
|
MODEL_PATH="/raid/models/meta-llama/Llama-3.1-8B-Instruct"
|
|
|
|
|
|
|
|
|
|
|
|
# Function to find the first available active IB device
|
|
|
|
|
|
find_active_ib_device() {
|
|
|
|
|
|
for device in mlx5_{0..11}; do
|
|
|
|
|
|
if ibv_devinfo $device >/dev/null 2>&1; then
|
|
|
|
|
|
state=$(ibv_devinfo $device | grep "state:" | head -1 | awk '{print $2}')
|
|
|
|
|
|
if [[ "$state" == "PORT_ACTIVE" ]]; then
|
|
|
|
|
|
echo "$device"
|
|
|
|
|
|
return 0
|
|
|
|
|
|
fi
|
|
|
|
|
|
fi
|
|
|
|
|
|
done
|
|
|
|
|
|
echo "No active IB device found" >&2
|
|
|
|
|
|
return 1
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# Get the first available active IB device
|
|
|
|
|
|
DEVICE=$(find_active_ib_device)
|
|
|
|
|
|
echo "Using IB device: $DEVICE"
|
|
|
|
|
|
|
|
|
|
|
|
# Launch prefill servers on GPU 0–3
|
|
|
|
|
|
for i in {0..3}; do
|
|
|
|
|
|
PORT=$((30001 + i))
|
|
|
|
|
|
BOOTSTRAP_PORT=$((9001 + i))
|
|
|
|
|
|
HOST="127.0.0.$((i + 1))"
|
|
|
|
|
|
echo "Launching PREFILL server on GPU $i at $HOST:$PORT (bootstrap: $BOOTSTRAP_PORT)"
|
|
|
|
|
|
CUDA_VISIBLE_DEVICES=$i \
|
|
|
|
|
|
python3 -m sglang.launch_server \
|
|
|
|
|
|
--model-path "$MODEL_PATH" \
|
|
|
|
|
|
--disaggregation-mode prefill \
|
|
|
|
|
|
--host "$HOST" \
|
|
|
|
|
|
--port "$PORT" \
|
|
|
|
|
|
--disaggregation-ib-device "$DEVICE" \
|
|
|
|
|
|
--disaggregation-bootstrap-port "$BOOTSTRAP_PORT" &
|
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
|
|
# Launch decode servers on GPU 4–7
|
|
|
|
|
|
for i in {4..7}; do
|
|
|
|
|
|
PORT=$((30001 + i))
|
|
|
|
|
|
HOST="127.0.0.$((i + 1))"
|
|
|
|
|
|
echo "Launching DECODE server on GPU $i at $HOST:$PORT"
|
|
|
|
|
|
CUDA_VISIBLE_DEVICES=$i \
|
|
|
|
|
|
python3 -m sglang.launch_server \
|
|
|
|
|
|
--model-path "$MODEL_PATH" \
|
|
|
|
|
|
--disaggregation-mode decode \
|
|
|
|
|
|
--host "$HOST" \
|
|
|
|
|
|
--port "$PORT" \
|
|
|
|
|
|
--disaggregation-ib-device "$DEVICE" \
|
|
|
|
|
|
--base-gpu-id 0 &
|
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
|
|
# Wait for disaggregation servers to initialize
|
|
|
|
|
|
echo "Waiting for disaggregation servers to initialize..."
|
|
|
|
|
|
|
|
|
|
|
|
# Health check with 5-minute timeout
|
|
|
|
|
|
TIMEOUT=300
|
|
|
|
|
|
START_TIME=$(date +%s)
|
|
|
|
|
|
|
|
|
|
|
|
echo "Checking health of all 8 servers..."
|
|
|
|
|
|
while true; do
|
|
|
|
|
|
CURRENT_TIME=$(date +%s)
|
|
|
|
|
|
ELAPSED=$((CURRENT_TIME - START_TIME))
|
|
|
|
|
|
|
|
|
|
|
|
if [ $ELAPSED -ge $TIMEOUT ]; then
|
|
|
|
|
|
echo "❌ Timeout: Servers did not become healthy within 5 minutes"
|
|
|
|
|
|
exit 1
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
HEALTHY_COUNT=0
|
|
|
|
|
|
# Check all 8 servers (127.0.0.1-8:30001-30008)
|
|
|
|
|
|
for i in {1..8}; do
|
|
|
|
|
|
if curl -s -f "http://127.0.0.$i:$((30000 + i))/health" >/dev/null 2>&1; then
|
|
|
|
|
|
HEALTHY_COUNT=$((HEALTHY_COUNT + 1))
|
|
|
|
|
|
fi
|
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
|
|
echo "Healthy servers: $HEALTHY_COUNT/8 (elapsed: ${ELAPSED}s)"
|
|
|
|
|
|
|
|
|
|
|
|
if [ $HEALTHY_COUNT -eq 8 ]; then
|
|
|
|
|
|
echo "✅ All 8 servers are healthy!"
|
|
|
|
|
|
break
|
|
|
|
|
|
else
|
|
|
|
|
|
sleep 10 # Wait 10 seconds before next check
|
|
|
|
|
|
fi
|
|
|
|
|
|
done
|
|
|
|
|
|
|
2025-07-18 14:24:24 -07:00
|
|
|
|
# Don't launch router here - just keep servers running
|
|
|
|
|
|
echo "✅ All disaggregation servers are ready and waiting for router connections"
|
2025-06-27 00:18:56 -07:00
|
|
|
|
|
2025-07-18 14:24:24 -07:00
|
|
|
|
# Keep the script running
|
2025-07-28 16:58:23 -07:00
|
|
|
|
wait
|