diff --git a/.github/workflows/_e2e_singlecard.yml b/.github/workflows/_e2e_singlecard.yml new file mode 100644 index 0000000..2facfa4 --- /dev/null +++ b/.github/workflows/_e2e_singlecard.yml @@ -0,0 +1,141 @@ +name: e2e-test + +on: + workflow_call: + pull_request: + branches: [main] + types: [opened, synchronize, reopened] + push: + branches: [main] + +concurrency: + group: e2e-singlecard + cancel-in-progress: false + +jobs: + e2e: + name: e2e-test-singlecard + runs-on: + - self-hosted + - Linux + - X64 + + steps: + - name: Checkout PR code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Verify PR workspace + run: | + echo "===== WORKSPACE =====" + pwd + ls -l + echo "===== GIT INFO =====" + git rev-parse HEAD + git log -1 --oneline + git status --porcelain + + - name: Start docker + run: | + bash ci/scripts/docker/start_docker.sh + + - name: Install enviroments + run: | + bash ci/scripts/env/install_env.sh + + - name: Start vLLM server + run: | + bash ci/scripts/server/start_vllm.sh + + - name: Wait for vLLM ready + run: | + bash ci/scripts/server/wait_vllm.sh + + - name: Accuracy testing + run: | + bash ci/scripts/tests/run_accuracy.sh + + - name: Performance testing + run: | + docker exec aiak-e2e-singlecard bash -lc ' + source ci/scripts/common/env.sh + source ci/scripts/common/log.sh + #!/bin/bash + # ========================================== + # 1. Define test dimensions + # (can be easily extended, e.g., add "2048x2048") + # ========================================== + DIMENSIONS=("1024x1024") + + # ========================================== + # 2. Define concurrency generation logic (densification strategy) + # ============x============================== + # Use array concatenation to combine different density ranges + # Syntax: seq [start] [step] [end] + CONCURRENCIES=(1) + + # ========================================== + # 3. Automatically assemble test cases + # ========================================== + TEST_COMBINATIONS=() # Initialize empty array + + # 🔄 Modified: outer loop over batch size (concurrency), inner loop over dimensions + for bs in "${CONCURRENCIES[@]}"; do # ← outer loop: concurrency + for dim in "${DIMENSIONS[@]}"; do # ← inner loop: dimensions + case_str="${bs}x${dim}" + TEST_COMBINATIONS+=("$case_str") + done + done + + # ========================================== + # 4. (Optional) Print generated cases for sanity check + # ========================================== + echo "Generated ${#TEST_COMBINATIONS[@]} test cases in total:" + echo "${TEST_COMBINATIONS[@]}" # Uncomment if you want to print all cases + + # Progress counters + TOTAL_TESTS=${#TEST_COMBINATIONS[@]} + CURRENT_TEST=0 + + # Iterate over all test combinations + for COMBINATION in "${TEST_COMBINATIONS[@]}"; do + # Parse parameters from combination string + NUM_PROMPTS=$(echo $COMBINATION | cut -d'x' -f1) + INPUT_LEN=$(echo $COMBINATION | cut -d'x' -f2) + OUTPUT_LEN=$(echo $COMBINATION | cut -d'x' -f3) + + # Update progress + CURRENT_TEST=$((CURRENT_TEST + 1)) + + echo "==========================================================" + echo "Test progress: $CURRENT_TEST / $TOTAL_TESTS" + echo "Current configuration: concurrency=$NUM_PROMPTS, input_len=$INPUT_LEN, output_len=$OUTPUT_LEN" + echo "==========================================================" + + #OUTPUT_FILE="$RESULT_DIR/p800_${NUM_PROMPTS}_${INPUT_LEN}_${OUTPUT_LEN}.log" + + # Run benchmark + python3 -m vllm.entrypoints.cli.main bench serve \ + --host 127.0.0.1 \ + --port ${VLLM_PORT:-8356}\ + --backend vllm \ + --model ${SERVED_MODEL_NAME:-Qwen3-8B} \ + --dataset-name random \ + --num-prompts $NUM_PROMPTS \ + --random-input-len $INPUT_LEN \ + --random-output-len $OUTPUT_LEN \ + --tokenizer ${MODEL_PATH:-/ssd3/models/Qwen3-8B} \ + --ignore-eos + done + ' + + - name: Set permissions + if: always() + run: | + bash ci/scripts/docker/set_permissions.sh + + - name: Cleanup docker + if: always() + run: | + bash ci/scripts/docker/stop_docker.sh diff --git a/.github/workflows/run-e2e.yml b/.github/workflows/run-e2e.yml new file mode 100644 index 0000000..91ffc66 --- /dev/null +++ b/.github/workflows/run-e2e.yml @@ -0,0 +1,8 @@ +name: run-e2e-test + +on: + workflow_dispatch: + +jobs: + call-e2e: + uses: ./.github/workflows/_e2e_singlecard.yml diff --git a/ci/scripts/common/env.sh b/ci/scripts/common/env.sh new file mode 100755 index 0000000..209a95c --- /dev/null +++ b/ci/scripts/common/env.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +set -euo pipefail + +# static configuration +export DOCKER_NAME="${DOCKER_NAME:-aiak-e2e-singlecard}" +export IMAGE_NAME="${IMAGE_NAME:-iregistry.baidu-int.com/xmlir/xmlir_ubuntu_2004_x86_64:v0.32}" + +export CONDA_ENV="${CONDA_ENV:-python310_torch25_cuda}" + +export VLLM_HOST="${VLLM_HOST:-0.0.0.0}" +export VLLM_PORT="${VLLM_PORT:-8356}" +export VLLM_API_BASE="http://127.0.0.1:${VLLM_PORT}" + +export MODEL_PATH="${MODEL_PATH:-/ssd3/models/Qwen3-8B}" +export SERVED_MODEL_NAME="${SERVED_MODEL_NAME:-Qwen3-8B}" + +export XPU_VISIBLE_DEVICES="${XPU_VISIBLE_DEVICES:-5}" + +# Proxy Configuration +export PROXY_URL="${PROXY_URL:-http://agent.baidu.com:8891}" +export NO_PROXY_LIST="${NO_PROXY_LIST:-localhost,127.0.0.1,::1}" + +export WORKSPACE_MOUNT="${WORKSPACE_MOUNT:-/home/E2E/workspace:/workspace}" + +# Log Path +export VLLM_LOG="${VLLM_LOG:-/workspace/vllm.log}" +export ACC_LOG="${ACC_LOG:-/workspace/evalscope_accuracy_report.log}" +export PERF_LOG="${PERF_LOG:-/workspace/benchmark_performance_report.log}" diff --git a/ci/scripts/common/log.sh b/ci/scripts/common/log.sh new file mode 100755 index 0000000..ed7f553 --- /dev/null +++ b/ci/scripts/common/log.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +set -euo pipefail + +log() { + echo "[CI][$(date '+%Y-%m-%d %H:%M:%S')] $*" +} diff --git a/ci/scripts/docker/set_permissions.sh b/ci/scripts/docker/set_permissions.sh new file mode 100644 index 0000000..900abed --- /dev/null +++ b/ci/scripts/docker/set_permissions.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +set -euo pipefail + +source ci/scripts/common/env.sh +source ci/scripts/common/log.sh + +docker exec "${DOCKER_NAME}" bash -lc " + set -e + conda activate ${CONDA_ENV} + chmod -R 777 /workspace +" \ No newline at end of file diff --git a/ci/scripts/docker/start_docker.sh b/ci/scripts/docker/start_docker.sh new file mode 100755 index 0000000..0910c9c --- /dev/null +++ b/ci/scripts/docker/start_docker.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +set -euo pipefail + +source ci/scripts/common/env.sh +source ci/scripts/common/log.sh + +log "Starting docker container: ${DOCKER_NAME}" + +if docker ps -a --format '{{.Names}}' | grep -q "^${DOCKER_NAME}$"; then + log "Container exists, removing first..." + docker stop "${DOCKER_NAME}" >/dev/null 2>&1 || true + docker rm "${DOCKER_NAME}" >/dev/null 2>&1 || true +fi + +HOST_CUDA_LIB_PATH="" +for path in "/usr/local/cuda/lib64" /usr/local/cuda-*/lib64; do + if [ -d "$path" ]; then + HOST_CUDA_LIB_PATH="$path" + break + fi +done + +if [ -n "${HOST_CUDA_LIB_PATH}" ]; then + log "Detected host CUDA lib path: ${HOST_CUDA_LIB_PATH}" +else + log "Host CUDA lib path not found, will use container CUDA" +fi + +# NVIDIA device mapping +DEVICE_ARGS="" +if [ -e "/dev/nvidia0" ]; then + DEVICE_ARGS="--device /dev/nvidia0:/dev/nvidia0" + for i in $(seq 1 16); do + if [ -e "/dev/nvidia${i}" ]; then + DEVICE_ARGS="${DEVICE_ARGS} --device /dev/nvidia${i}:/dev/nvidia${i}" + fi + done + if [ -e "/dev/nvidia-uvm" ]; then + DEVICE_ARGS="${DEVICE_ARGS} --device /dev/nvidia-uvm:/dev/nvidia-uvm" + fi + if [ -e "/dev/nvidia-modeset" ]; then + DEVICE_ARGS="${DEVICE_ARGS} --device /dev/nvidia-modeset:/dev/nvidia-modeset" + fi +else + log "WARNING: /dev/nvidia0 not found, GPU may not be available" +fi + +# Mount nvidia-smi +NVIDIA_BIN="" +if [ -f "/usr/bin/nvidia-smi" ]; then + NVIDIA_BIN="-v /usr/bin/nvidia-smi:/usr/bin/nvidia-smi" + log "Added nvidia-smi mount" +else + log "WARNING: nvidia-smi not found on host" +fi + +# Mount critical NVIDIA libs +NVIDIA_LIBS="" +if [ -d "/usr/lib64" ]; then + for lib in libnvidia-ml.so libnvidia-ml.so.1; do + if [ -f "/usr/lib64/${lib}" ]; then + NVIDIA_LIBS="${NVIDIA_LIBS} -v /usr/lib64/${lib}:/usr/lib64/${lib}" + fi + done +fi + +# Ensure libcuda symlink +ln -sf /usr/lib64/libcuda.so.1 /usr/lib64/libcuda.so || true + +log "docker run ${IMAGE_NAME}" +docker run \ + -h "$(hostname)" \ + --privileged \ + --net=host \ + --user=root \ + --name="${DOCKER_NAME}" \ + -v /home:/home \ + -v "${WORKSPACE_MOUNT}" \ + -v /ssd2:/ssd2 \ + -v /ssd1:/ssd1 \ + -v /ssd3:/ssd3 \ + -v /dev/shm:/dev/shm \ + -v /usr/lib64/libcuda.so.1:/usr/lib64/libcuda.so.1 \ + -v /usr/lib64/libcuda.so:/usr/lib64/libcuda.so \ + -v /usr/lib64/libnvidia-ml.so.1:/usr/lib64/libnvidia-ml.so.1 \ + -v /usr/lib64/libnvidia-ptxjitcompiler.so.1:/usr/lib64/libnvidia-ptxjitcompiler.so.1 2>/dev/null \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -w /workspace \ + ${DEVICE_ARGS} \ + ${NVIDIA_BIN} \ + ${NVIDIA_LIBS} \ + --shm-size=16G \ + -e NVIDIA_VISIBLE_DEVICES=all \ + -e NVIDIA_DRIVER_CAPABILITIES=compute,utility \ + -itd "${IMAGE_NAME}" + +log "Container started. Inject conda activate into bashrc" +docker exec "${DOCKER_NAME}" bash -lc " + echo 'conda activate ${CONDA_ENV}' >> ~/.bashrc + conda env list || true +" diff --git a/ci/scripts/docker/stop_docker.sh b/ci/scripts/docker/stop_docker.sh new file mode 100755 index 0000000..43d824d --- /dev/null +++ b/ci/scripts/docker/stop_docker.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +set -euo pipefail + +source ci/scripts/common/env.sh +source ci/scripts/common/log.sh + + +log "Stopping docker container: ${DOCKER_NAME}" +docker stop "${DOCKER_NAME}" >/dev/null 2>&1 || true +docker rm "${DOCKER_NAME}" >/dev/null 2>&1 || true +log "Cleanup done" diff --git a/ci/scripts/env/install_env.sh b/ci/scripts/env/install_env.sh new file mode 100644 index 0000000..bacff9b --- /dev/null +++ b/ci/scripts/env/install_env.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +set -euo pipefail + +source ci/scripts/common/env.sh +source ci/scripts/common/log.sh + +######################################## +# Common setup +######################################## +log "Using container ${DOCKER_NAME}, conda env ${CONDA_ENV}" + +docker exec "${DOCKER_NAME}" bash -lc " + set -e + conda activate ${CONDA_ENV} + + ######################################## + # Proxy setup + ######################################## + export http_proxy=${PROXY_URL} + export https_proxy=${PROXY_URL} + export NO_PROXY=${NO_PROXY_LIST} + export no_proxy=${NO_PROXY_LIST} + + ######################################## + # 1. Install evalscope + ######################################## + echo '===== Installing evalscope =====' + pip install evalscope + + ######################################## + # 2. Install vLLM-Kunlun (PR code) + ######################################## + echo '===== Installing vLLM-Kunlun (PR code) =====' + + cd /workspace + + git config --global --add safe.directory \"${GITHUB_WORKSPACE}\" + + cd \"${GITHUB_WORKSPACE}\" + echo '===== USING PR CODE =====' + git rev-parse HEAD + git log -1 --oneline + + # Disable proxy for local build + unset http_proxy + unset https_proxy + + cd vLLM-Kunlun + + pip install -r requirements.txt + python setup.py build + python setup.py install + + # Patch torch dynamo eval_frame + cp vllm_kunlun/patches/eval_frame.py \ + /root/miniconda/envs/${CONDA_ENV}/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py + + ######################################## + # Kunlun runtime dependencies + ######################################## + echo '===== Installing Kunlun runtime dependencies =====' + + wget -O xpytorch.run \ + \"https://baidu-kunlun-public.su.bcebos.com/v1/baidu-kunlun-share/1130/xpytorch-cp310-torch251-ubuntu2004-x64.run?authorization=bce-auth-v1%2FALTAKypXxBzU7gg4Mk4K4c6OYR%2F2025-12-02T05%3A01%3A27Z%2F-1%2Fhost%2Ff3cf499234f82303891aed2bcb0628918e379a21e841a3fac6bd94afef491ff7\" + bash xpytorch.run + + pip install \ + \"https://baidu-kunlun-public.su.bcebos.com/v1/baidu-kunlun-share/1130/xtorch_ops-0.1.2209%2B6752ad20-cp310-cp310-linux_x86_64.whl?authorization=bce-auth-v1%2FALTAKypXxBzU7gg4Mk4K4c6OYR%2F2025-12-05T06%3A18%3A00Z%2F-1%2Fhost%2F14936c2b7e7c557c1400e4c467c79f7a9217374a7aa4a046711ac4d948f460cd\" + + pip install \ + \"https://cce-ai-models.bj.bcebos.com/v1/vllm-kunlun-0.11.0/triton-3.0.0%2Bb2cde523-cp310-cp310-linux_x86_64.whl\" + + pip install \ + \"https://cce-ai-models.bj.bcebos.com/XSpeedGate-whl/release_merge/20251219_152418/xspeedgate_ops-0.0.0-cp310-cp310-linux_x86_64.whl\" + + ######################################## + # Setup Kunlun env + ######################################## + export NO_PROXY=${NO_PROXY_LIST} + export no_proxy=${NO_PROXY_LIST} + + chmod +x \"${GITHUB_WORKSPACE}/vLLM-Kunlun/setup_env.sh\" + source \"${GITHUB_WORKSPACE}/vLLM-Kunlun/setup_env.sh\" + + ######################################## + # 3. Install upstream vLLM 0.11.0 + ######################################## + echo '===== Installing vLLM==0.11.0 =====' + + pip uninstall -y vllm || true + env | grep -i proxy || true + + pip install vllm==0.11.0 \ + --no-build-isolation \ + --no-deps \ + --index-url https://pip.baidu-int.com/simple/ + + python -c 'import vllm; print(\"vllm version:\", vllm.__version__)' + + echo '===== All installations completed successfully =====' +" diff --git a/ci/scripts/server/start_vllm.sh b/ci/scripts/server/start_vllm.sh new file mode 100755 index 0000000..c24fc18 --- /dev/null +++ b/ci/scripts/server/start_vllm.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +set -euo pipefail + +source ci/scripts/common/env.sh +source ci/scripts/common/log.sh + +log "Starting vLLM server in container ${DOCKER_NAME}" + +docker exec -d "${DOCKER_NAME}" bash -lc " + set -e + + chmod +x \"${GITHUB_WORKSPACE}/vLLM-Kunlun/setup_env.sh\" + source \"${GITHUB_WORKSPACE}/vLLM-Kunlun/setup_env.sh\" + + rm -f ${VLLM_LOG} + export XPU_VISIBLE_DEVICES=${XPU_VISIBLE_DEVICES} + + python -u -m vllm.entrypoints.openai.api_server \ + --host ${VLLM_HOST} \ + --port ${VLLM_PORT} \ + --model ${MODEL_PATH} \ + --gpu-memory-utilization 0.9 \ + --trust-remote-code \ + --max-model-len 32768 \ + --tensor-parallel-size 1 \ + --dtype float16 \ + --max_num_seqs 128 \ + --max_num_batched_tokens 32768 \ + --block-size 128 \ + --no-enable-prefix-caching \ + --no-enable-chunked-prefill \ + --distributed-executor-backend mp \ + --served-model-name ${SERVED_MODEL_NAME} \ + --compilation-config '{\"splitting_ops\": [\"vllm.unified_attention\",\"vllm.unified_attention_with_output\",\"vllm.unified_attention_with_output_kunlun\",\"vllm.mamba_mixer2\",\"vllm.mamba_mixer\",\"vllm.short_conv\",\"vllm.linear_attention\",\"vllm.plamo2_mamba_mixer\",\"vllm.gdn_attention\",\"vllm.sparse_attn_indexer\"]}' \ + 2>&1 | tee ${VLLM_LOG} +" + +log "vLLM start command issued (running in background)" diff --git a/ci/scripts/server/wait_vllm.sh b/ci/scripts/server/wait_vllm.sh new file mode 100755 index 0000000..d1ca7bc --- /dev/null +++ b/ci/scripts/server/wait_vllm.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +set -euo pipefail + +source ci/scripts/common/env.sh +source ci/scripts/common/log.sh + +log "Waiting for vLLM to be ready: ${VLLM_API_BASE}/v1/models" + +docker exec "${DOCKER_NAME}" bash -lc " + set -e + + for i in {1..90}; do + if curl -sf ${VLLM_API_BASE}/v1/models >/dev/null; then + echo 'vLLM is ready' + tail -n 500 ${VLLM_LOG} || true + exit 0 + fi + sleep 5 + done + + echo 'vLLM start failed' + echo '==== last 500 lines of vllm.log ====' + tail -n 500 ${VLLM_LOG} || true + exit 1 +" diff --git a/ci/scripts/tests/run_accuracy.sh b/ci/scripts/tests/run_accuracy.sh new file mode 100755 index 0000000..5123712 --- /dev/null +++ b/ci/scripts/tests/run_accuracy.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +set -euo pipefail + +source ci/scripts/common/env.sh +source ci/scripts/common/log.sh + +log "Running accuracy test via evalscope" + +docker exec "${DOCKER_NAME}" bash -lc " + set -e + rm -f ${ACC_LOG} + + export http_proxy=${PROXY_URL} + export https_proxy=${PROXY_URL} + export NO_PROXY=${NO_PROXY_LIST} + export no_proxy=${NO_PROXY_LIST} + + evalscope eval \ + --model ${SERVED_MODEL_NAME} \ + --api-url http://localhost:${VLLM_PORT}/v1 \ + --datasets gsm8k arc \ + --limit 10 2>&1 | tee ${ACC_LOG} +" diff --git a/ci/scripts/tests/run_performance.sh b/ci/scripts/tests/run_performance.sh new file mode 100755 index 0000000..8a56411 --- /dev/null +++ b/ci/scripts/tests/run_performance.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash +set -eo pipefail +bs="" +source ci/scripts/common/env.sh +source ci/scripts/common/log.sh + +log "Running performance test via bench" + +docker exec "${DOCKER_NAME}" bash -lc " + + source /root/miniconda/etc/profile.d/conda.sh + conda activate ${CONDA_ENV} + #!/bin/bash + # ========================================== + # 1. Define test dimensions + # (can be easily extended, e.g., add "2048x2048") + # ========================================== + DIMENSIONS=("1024x1024") + + # ========================================== + # 2. Define concurrency generation logic (densification strategy) + # ============x============================== + # Use array concatenation to combine different density ranges + # Syntax: seq [start] [step] [end] + CONCURRENCIES=(1) + + # ========================================== + # 3. Automatically assemble test cases + # ========================================== + TEST_COMBINATIONS=() # Initialize empty array + + # 🔄 Modified: outer loop over batch size (concurrency), inner loop over dimensions + for bs in "${CONCURRENCIES[@]}"; do # ← outer loop: concurrency + for dim in "${DIMENSIONS[@]}"; do # ← inner loop: dimensions + case_str="${bs}x${dim}" + TEST_COMBINATIONS+=("$case_str") + done + done + + # ========================================== + # 4. (Optional) Print generated cases for sanity check + # ========================================== + echo "Generated ${#TEST_COMBINATIONS[@]} test cases in total:" + echo "${TEST_COMBINATIONS[@]}" # Uncomment if you want to print all cases + + # Progress counters + TOTAL_TESTS=${#TEST_COMBINATIONS[@]} + CURRENT_TEST=0 + + # Iterate over all test combinations + for COMBINATION in "${TEST_COMBINATIONS[@]}"; do + # Parse parameters from combination string + NUM_PROMPTS=$(echo $COMBINATION | cut -d'x' -f1) + INPUT_LEN=$(echo $COMBINATION | cut -d'x' -f2) + OUTPUT_LEN=$(echo $COMBINATION | cut -d'x' -f3) + + # Update progress + CURRENT_TEST=$((CURRENT_TEST + 1)) + + echo "==========================================================" + echo "Test progress: $CURRENT_TEST / $TOTAL_TESTS" + echo "Current configuration: concurrency=$NUM_PROMPTS, input_len=$INPUT_LEN, output_len=$OUTPUT_LEN" + echo "==========================================================" + + #OUTPUT_FILE="$RESULT_DIR/p800_${NUM_PROMPTS}_${INPUT_LEN}_${OUTPUT_LEN}.log" + + # Run benchmark + python3 -m vllm.entrypoints.cli.main bench serve \ + --host 127.0.0.1 \ + --port ${VLLM_PORT} \ + --backend vllm \ + --model ${SERVED_MODEL_NAME} \ + --dataset-name random \ + --num-prompts $NUM_PROMPTS \ + --random-input-len $INPUT_LEN \ + --random-output-len $OUTPUT_LEN \ + --tokenizer ${MODEL_PATH} \ + --ignore-eos + done +"