[CI/Build] Add CI end-to-end (E2E) tests (#139)

* [CI/Build] Add CI end-to-end (E2E) tests
Signed-off-by: Chenchao Hu <huchenchao@example.com>
This commit is contained in:
1916hcc
2026-01-28 19:30:55 +08:00
committed by GitHub
parent c37ee19e3d
commit 7c2966a98c
12 changed files with 573 additions and 0 deletions

141
.github/workflows/_e2e_singlecard.yml vendored Normal file
View File

@@ -0,0 +1,141 @@
name: e2e-test
on:
workflow_call:
pull_request:
branches: [main]
types: [opened, synchronize, reopened]
push:
branches: [main]
concurrency:
group: e2e-singlecard
cancel-in-progress: false
jobs:
e2e:
name: e2e-test-singlecard
runs-on:
- self-hosted
- Linux
- X64
steps:
- name: Checkout PR code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Verify PR workspace
run: |
echo "===== WORKSPACE ====="
pwd
ls -l
echo "===== GIT INFO ====="
git rev-parse HEAD
git log -1 --oneline
git status --porcelain
- name: Start docker
run: |
bash ci/scripts/docker/start_docker.sh
- name: Install enviroments
run: |
bash ci/scripts/env/install_env.sh
- name: Start vLLM server
run: |
bash ci/scripts/server/start_vllm.sh
- name: Wait for vLLM ready
run: |
bash ci/scripts/server/wait_vllm.sh
- name: Accuracy testing
run: |
bash ci/scripts/tests/run_accuracy.sh
- name: Performance testing
run: |
docker exec aiak-e2e-singlecard bash -lc '
source ci/scripts/common/env.sh
source ci/scripts/common/log.sh
#!/bin/bash
# ==========================================
# 1. Define test dimensions
# (can be easily extended, e.g., add "2048x2048")
# ==========================================
DIMENSIONS=("1024x1024")
# ==========================================
# 2. Define concurrency generation logic (densification strategy)
# ============x==============================
# Use array concatenation to combine different density ranges
# Syntax: seq [start] [step] [end]
CONCURRENCIES=(1)
# ==========================================
# 3. Automatically assemble test cases
# ==========================================
TEST_COMBINATIONS=() # Initialize empty array
# 🔄 Modified: outer loop over batch size (concurrency), inner loop over dimensions
for bs in "${CONCURRENCIES[@]}"; do # ← outer loop: concurrency
for dim in "${DIMENSIONS[@]}"; do # ← inner loop: dimensions
case_str="${bs}x${dim}"
TEST_COMBINATIONS+=("$case_str")
done
done
# ==========================================
# 4. (Optional) Print generated cases for sanity check
# ==========================================
echo "Generated ${#TEST_COMBINATIONS[@]} test cases in total:"
echo "${TEST_COMBINATIONS[@]}" # Uncomment if you want to print all cases
# Progress counters
TOTAL_TESTS=${#TEST_COMBINATIONS[@]}
CURRENT_TEST=0
# Iterate over all test combinations
for COMBINATION in "${TEST_COMBINATIONS[@]}"; do
# Parse parameters from combination string
NUM_PROMPTS=$(echo $COMBINATION | cut -d'x' -f1)
INPUT_LEN=$(echo $COMBINATION | cut -d'x' -f2)
OUTPUT_LEN=$(echo $COMBINATION | cut -d'x' -f3)
# Update progress
CURRENT_TEST=$((CURRENT_TEST + 1))
echo "=========================================================="
echo "Test progress: $CURRENT_TEST / $TOTAL_TESTS"
echo "Current configuration: concurrency=$NUM_PROMPTS, input_len=$INPUT_LEN, output_len=$OUTPUT_LEN"
echo "=========================================================="
#OUTPUT_FILE="$RESULT_DIR/p800_${NUM_PROMPTS}_${INPUT_LEN}_${OUTPUT_LEN}.log"
# Run benchmark
python3 -m vllm.entrypoints.cli.main bench serve \
--host 127.0.0.1 \
--port ${VLLM_PORT:-8356}\
--backend vllm \
--model ${SERVED_MODEL_NAME:-Qwen3-8B} \
--dataset-name random \
--num-prompts $NUM_PROMPTS \
--random-input-len $INPUT_LEN \
--random-output-len $OUTPUT_LEN \
--tokenizer ${MODEL_PATH:-/ssd3/models/Qwen3-8B} \
--ignore-eos
done
'
- name: Set permissions
if: always()
run: |
bash ci/scripts/docker/set_permissions.sh
- name: Cleanup docker
if: always()
run: |
bash ci/scripts/docker/stop_docker.sh

8
.github/workflows/run-e2e.yml vendored Normal file
View File

@@ -0,0 +1,8 @@
name: run-e2e-test
on:
workflow_dispatch:
jobs:
call-e2e:
uses: ./.github/workflows/_e2e_singlecard.yml

28
ci/scripts/common/env.sh Executable file
View File

@@ -0,0 +1,28 @@
#!/usr/bin/env bash
set -euo pipefail
# static configuration
export DOCKER_NAME="${DOCKER_NAME:-aiak-e2e-singlecard}"
export IMAGE_NAME="${IMAGE_NAME:-iregistry.baidu-int.com/xmlir/xmlir_ubuntu_2004_x86_64:v0.32}"
export CONDA_ENV="${CONDA_ENV:-python310_torch25_cuda}"
export VLLM_HOST="${VLLM_HOST:-0.0.0.0}"
export VLLM_PORT="${VLLM_PORT:-8356}"
export VLLM_API_BASE="http://127.0.0.1:${VLLM_PORT}"
export MODEL_PATH="${MODEL_PATH:-/ssd3/models/Qwen3-8B}"
export SERVED_MODEL_NAME="${SERVED_MODEL_NAME:-Qwen3-8B}"
export XPU_VISIBLE_DEVICES="${XPU_VISIBLE_DEVICES:-5}"
# Proxy Configuration
export PROXY_URL="${PROXY_URL:-http://agent.baidu.com:8891}"
export NO_PROXY_LIST="${NO_PROXY_LIST:-localhost,127.0.0.1,::1}"
export WORKSPACE_MOUNT="${WORKSPACE_MOUNT:-/home/E2E/workspace:/workspace}"
# Log Path
export VLLM_LOG="${VLLM_LOG:-/workspace/vllm.log}"
export ACC_LOG="${ACC_LOG:-/workspace/evalscope_accuracy_report.log}"
export PERF_LOG="${PERF_LOG:-/workspace/benchmark_performance_report.log}"

6
ci/scripts/common/log.sh Executable file
View File

@@ -0,0 +1,6 @@
#!/usr/bin/env bash
set -euo pipefail
log() {
echo "[CI][$(date '+%Y-%m-%d %H:%M:%S')] $*"
}

View File

@@ -0,0 +1,11 @@
#!/usr/bin/env bash
set -euo pipefail
source ci/scripts/common/env.sh
source ci/scripts/common/log.sh
docker exec "${DOCKER_NAME}" bash -lc "
set -e
conda activate ${CONDA_ENV}
chmod -R 777 /workspace
"

101
ci/scripts/docker/start_docker.sh Executable file
View File

@@ -0,0 +1,101 @@
#!/usr/bin/env bash
set -euo pipefail
source ci/scripts/common/env.sh
source ci/scripts/common/log.sh
log "Starting docker container: ${DOCKER_NAME}"
if docker ps -a --format '{{.Names}}' | grep -q "^${DOCKER_NAME}$"; then
log "Container exists, removing first..."
docker stop "${DOCKER_NAME}" >/dev/null 2>&1 || true
docker rm "${DOCKER_NAME}" >/dev/null 2>&1 || true
fi
HOST_CUDA_LIB_PATH=""
for path in "/usr/local/cuda/lib64" /usr/local/cuda-*/lib64; do
if [ -d "$path" ]; then
HOST_CUDA_LIB_PATH="$path"
break
fi
done
if [ -n "${HOST_CUDA_LIB_PATH}" ]; then
log "Detected host CUDA lib path: ${HOST_CUDA_LIB_PATH}"
else
log "Host CUDA lib path not found, will use container CUDA"
fi
# NVIDIA device mapping
DEVICE_ARGS=""
if [ -e "/dev/nvidia0" ]; then
DEVICE_ARGS="--device /dev/nvidia0:/dev/nvidia0"
for i in $(seq 1 16); do
if [ -e "/dev/nvidia${i}" ]; then
DEVICE_ARGS="${DEVICE_ARGS} --device /dev/nvidia${i}:/dev/nvidia${i}"
fi
done
if [ -e "/dev/nvidia-uvm" ]; then
DEVICE_ARGS="${DEVICE_ARGS} --device /dev/nvidia-uvm:/dev/nvidia-uvm"
fi
if [ -e "/dev/nvidia-modeset" ]; then
DEVICE_ARGS="${DEVICE_ARGS} --device /dev/nvidia-modeset:/dev/nvidia-modeset"
fi
else
log "WARNING: /dev/nvidia0 not found, GPU may not be available"
fi
# Mount nvidia-smi
NVIDIA_BIN=""
if [ -f "/usr/bin/nvidia-smi" ]; then
NVIDIA_BIN="-v /usr/bin/nvidia-smi:/usr/bin/nvidia-smi"
log "Added nvidia-smi mount"
else
log "WARNING: nvidia-smi not found on host"
fi
# Mount critical NVIDIA libs
NVIDIA_LIBS=""
if [ -d "/usr/lib64" ]; then
for lib in libnvidia-ml.so libnvidia-ml.so.1; do
if [ -f "/usr/lib64/${lib}" ]; then
NVIDIA_LIBS="${NVIDIA_LIBS} -v /usr/lib64/${lib}:/usr/lib64/${lib}"
fi
done
fi
# Ensure libcuda symlink
ln -sf /usr/lib64/libcuda.so.1 /usr/lib64/libcuda.so || true
log "docker run ${IMAGE_NAME}"
docker run \
-h "$(hostname)" \
--privileged \
--net=host \
--user=root \
--name="${DOCKER_NAME}" \
-v /home:/home \
-v "${WORKSPACE_MOUNT}" \
-v /ssd2:/ssd2 \
-v /ssd1:/ssd1 \
-v /ssd3:/ssd3 \
-v /dev/shm:/dev/shm \
-v /usr/lib64/libcuda.so.1:/usr/lib64/libcuda.so.1 \
-v /usr/lib64/libcuda.so:/usr/lib64/libcuda.so \
-v /usr/lib64/libnvidia-ml.so.1:/usr/lib64/libnvidia-ml.so.1 \
-v /usr/lib64/libnvidia-ptxjitcompiler.so.1:/usr/lib64/libnvidia-ptxjitcompiler.so.1 2>/dev/null \
-v /var/run/docker.sock:/var/run/docker.sock \
-w /workspace \
${DEVICE_ARGS} \
${NVIDIA_BIN} \
${NVIDIA_LIBS} \
--shm-size=16G \
-e NVIDIA_VISIBLE_DEVICES=all \
-e NVIDIA_DRIVER_CAPABILITIES=compute,utility \
-itd "${IMAGE_NAME}"
log "Container started. Inject conda activate into bashrc"
docker exec "${DOCKER_NAME}" bash -lc "
echo 'conda activate ${CONDA_ENV}' >> ~/.bashrc
conda env list || true
"

View File

@@ -0,0 +1,11 @@
#!/usr/bin/env bash
set -euo pipefail
source ci/scripts/common/env.sh
source ci/scripts/common/log.sh
log "Stopping docker container: ${DOCKER_NAME}"
docker stop "${DOCKER_NAME}" >/dev/null 2>&1 || true
docker rm "${DOCKER_NAME}" >/dev/null 2>&1 || true
log "Cleanup done"

101
ci/scripts/env/install_env.sh vendored Normal file
View File

@@ -0,0 +1,101 @@
#!/usr/bin/env bash
set -euo pipefail
source ci/scripts/common/env.sh
source ci/scripts/common/log.sh
########################################
# Common setup
########################################
log "Using container ${DOCKER_NAME}, conda env ${CONDA_ENV}"
docker exec "${DOCKER_NAME}" bash -lc "
set -e
conda activate ${CONDA_ENV}
########################################
# Proxy setup
########################################
export http_proxy=${PROXY_URL}
export https_proxy=${PROXY_URL}
export NO_PROXY=${NO_PROXY_LIST}
export no_proxy=${NO_PROXY_LIST}
########################################
# 1. Install evalscope
########################################
echo '===== Installing evalscope ====='
pip install evalscope
########################################
# 2. Install vLLM-Kunlun (PR code)
########################################
echo '===== Installing vLLM-Kunlun (PR code) ====='
cd /workspace
git config --global --add safe.directory \"${GITHUB_WORKSPACE}\"
cd \"${GITHUB_WORKSPACE}\"
echo '===== USING PR CODE ====='
git rev-parse HEAD
git log -1 --oneline
# Disable proxy for local build
unset http_proxy
unset https_proxy
cd vLLM-Kunlun
pip install -r requirements.txt
python setup.py build
python setup.py install
# Patch torch dynamo eval_frame
cp vllm_kunlun/patches/eval_frame.py \
/root/miniconda/envs/${CONDA_ENV}/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py
########################################
# Kunlun runtime dependencies
########################################
echo '===== Installing Kunlun runtime dependencies ====='
wget -O xpytorch.run \
\"https://baidu-kunlun-public.su.bcebos.com/v1/baidu-kunlun-share/1130/xpytorch-cp310-torch251-ubuntu2004-x64.run?authorization=bce-auth-v1%2FALTAKypXxBzU7gg4Mk4K4c6OYR%2F2025-12-02T05%3A01%3A27Z%2F-1%2Fhost%2Ff3cf499234f82303891aed2bcb0628918e379a21e841a3fac6bd94afef491ff7\"
bash xpytorch.run
pip install \
\"https://baidu-kunlun-public.su.bcebos.com/v1/baidu-kunlun-share/1130/xtorch_ops-0.1.2209%2B6752ad20-cp310-cp310-linux_x86_64.whl?authorization=bce-auth-v1%2FALTAKypXxBzU7gg4Mk4K4c6OYR%2F2025-12-05T06%3A18%3A00Z%2F-1%2Fhost%2F14936c2b7e7c557c1400e4c467c79f7a9217374a7aa4a046711ac4d948f460cd\"
pip install \
\"https://cce-ai-models.bj.bcebos.com/v1/vllm-kunlun-0.11.0/triton-3.0.0%2Bb2cde523-cp310-cp310-linux_x86_64.whl\"
pip install \
\"https://cce-ai-models.bj.bcebos.com/XSpeedGate-whl/release_merge/20251219_152418/xspeedgate_ops-0.0.0-cp310-cp310-linux_x86_64.whl\"
########################################
# Setup Kunlun env
########################################
export NO_PROXY=${NO_PROXY_LIST}
export no_proxy=${NO_PROXY_LIST}
chmod +x \"${GITHUB_WORKSPACE}/vLLM-Kunlun/setup_env.sh\"
source \"${GITHUB_WORKSPACE}/vLLM-Kunlun/setup_env.sh\"
########################################
# 3. Install upstream vLLM 0.11.0
########################################
echo '===== Installing vLLM==0.11.0 ====='
pip uninstall -y vllm || true
env | grep -i proxy || true
pip install vllm==0.11.0 \
--no-build-isolation \
--no-deps \
--index-url https://pip.baidu-int.com/simple/
python -c 'import vllm; print(\"vllm version:\", vllm.__version__)'
echo '===== All installations completed successfully ====='
"

38
ci/scripts/server/start_vllm.sh Executable file
View File

@@ -0,0 +1,38 @@
#!/usr/bin/env bash
set -euo pipefail
source ci/scripts/common/env.sh
source ci/scripts/common/log.sh
log "Starting vLLM server in container ${DOCKER_NAME}"
docker exec -d "${DOCKER_NAME}" bash -lc "
set -e
chmod +x \"${GITHUB_WORKSPACE}/vLLM-Kunlun/setup_env.sh\"
source \"${GITHUB_WORKSPACE}/vLLM-Kunlun/setup_env.sh\"
rm -f ${VLLM_LOG}
export XPU_VISIBLE_DEVICES=${XPU_VISIBLE_DEVICES}
python -u -m vllm.entrypoints.openai.api_server \
--host ${VLLM_HOST} \
--port ${VLLM_PORT} \
--model ${MODEL_PATH} \
--gpu-memory-utilization 0.9 \
--trust-remote-code \
--max-model-len 32768 \
--tensor-parallel-size 1 \
--dtype float16 \
--max_num_seqs 128 \
--max_num_batched_tokens 32768 \
--block-size 128 \
--no-enable-prefix-caching \
--no-enable-chunked-prefill \
--distributed-executor-backend mp \
--served-model-name ${SERVED_MODEL_NAME} \
--compilation-config '{\"splitting_ops\": [\"vllm.unified_attention\",\"vllm.unified_attention_with_output\",\"vllm.unified_attention_with_output_kunlun\",\"vllm.mamba_mixer2\",\"vllm.mamba_mixer\",\"vllm.short_conv\",\"vllm.linear_attention\",\"vllm.plamo2_mamba_mixer\",\"vllm.gdn_attention\",\"vllm.sparse_attn_indexer\"]}' \
2>&1 | tee ${VLLM_LOG}
"
log "vLLM start command issued (running in background)"

25
ci/scripts/server/wait_vllm.sh Executable file
View File

@@ -0,0 +1,25 @@
#!/usr/bin/env bash
set -euo pipefail
source ci/scripts/common/env.sh
source ci/scripts/common/log.sh
log "Waiting for vLLM to be ready: ${VLLM_API_BASE}/v1/models"
docker exec "${DOCKER_NAME}" bash -lc "
set -e
for i in {1..90}; do
if curl -sf ${VLLM_API_BASE}/v1/models >/dev/null; then
echo 'vLLM is ready'
tail -n 500 ${VLLM_LOG} || true
exit 0
fi
sleep 5
done
echo 'vLLM start failed'
echo '==== last 500 lines of vllm.log ===='
tail -n 500 ${VLLM_LOG} || true
exit 1
"

View File

@@ -0,0 +1,23 @@
#!/usr/bin/env bash
set -euo pipefail
source ci/scripts/common/env.sh
source ci/scripts/common/log.sh
log "Running accuracy test via evalscope"
docker exec "${DOCKER_NAME}" bash -lc "
set -e
rm -f ${ACC_LOG}
export http_proxy=${PROXY_URL}
export https_proxy=${PROXY_URL}
export NO_PROXY=${NO_PROXY_LIST}
export no_proxy=${NO_PROXY_LIST}
evalscope eval \
--model ${SERVED_MODEL_NAME} \
--api-url http://localhost:${VLLM_PORT}/v1 \
--datasets gsm8k arc \
--limit 10 2>&1 | tee ${ACC_LOG}
"

View File

@@ -0,0 +1,80 @@
#!/usr/bin/env bash
set -eo pipefail
bs=""
source ci/scripts/common/env.sh
source ci/scripts/common/log.sh
log "Running performance test via bench"
docker exec "${DOCKER_NAME}" bash -lc "
source /root/miniconda/etc/profile.d/conda.sh
conda activate ${CONDA_ENV}
#!/bin/bash
# ==========================================
# 1. Define test dimensions
# (can be easily extended, e.g., add "2048x2048")
# ==========================================
DIMENSIONS=("1024x1024")
# ==========================================
# 2. Define concurrency generation logic (densification strategy)
# ============x==============================
# Use array concatenation to combine different density ranges
# Syntax: seq [start] [step] [end]
CONCURRENCIES=(1)
# ==========================================
# 3. Automatically assemble test cases
# ==========================================
TEST_COMBINATIONS=() # Initialize empty array
# 🔄 Modified: outer loop over batch size (concurrency), inner loop over dimensions
for bs in "${CONCURRENCIES[@]}"; do # ← outer loop: concurrency
for dim in "${DIMENSIONS[@]}"; do # ← inner loop: dimensions
case_str="${bs}x${dim}"
TEST_COMBINATIONS+=("$case_str")
done
done
# ==========================================
# 4. (Optional) Print generated cases for sanity check
# ==========================================
echo "Generated ${#TEST_COMBINATIONS[@]} test cases in total:"
echo "${TEST_COMBINATIONS[@]}" # Uncomment if you want to print all cases
# Progress counters
TOTAL_TESTS=${#TEST_COMBINATIONS[@]}
CURRENT_TEST=0
# Iterate over all test combinations
for COMBINATION in "${TEST_COMBINATIONS[@]}"; do
# Parse parameters from combination string
NUM_PROMPTS=$(echo $COMBINATION | cut -d'x' -f1)
INPUT_LEN=$(echo $COMBINATION | cut -d'x' -f2)
OUTPUT_LEN=$(echo $COMBINATION | cut -d'x' -f3)
# Update progress
CURRENT_TEST=$((CURRENT_TEST + 1))
echo "=========================================================="
echo "Test progress: $CURRENT_TEST / $TOTAL_TESTS"
echo "Current configuration: concurrency=$NUM_PROMPTS, input_len=$INPUT_LEN, output_len=$OUTPUT_LEN"
echo "=========================================================="
#OUTPUT_FILE="$RESULT_DIR/p800_${NUM_PROMPTS}_${INPUT_LEN}_${OUTPUT_LEN}.log"
# Run benchmark
python3 -m vllm.entrypoints.cli.main bench serve \
--host 127.0.0.1 \
--port ${VLLM_PORT} \
--backend vllm \
--model ${SERVED_MODEL_NAME} \
--dataset-name random \
--num-prompts $NUM_PROMPTS \
--random-input-len $INPUT_LEN \
--random-output-len $OUTPUT_LEN \
--tokenizer ${MODEL_PATH} \
--ignore-eos
done
"