[CI/Build] Add CI end-to-end (E2E) tests (#139)

* [CI/Build] Add CI end-to-end (E2E) tests Signed-off-by: Chenchao Hu <huchenchao@example.com>
2026-01-28 19:30:55 +08:00
parent c37ee19e3d
commit 7c2966a98c
12 changed files with 573 additions and 0 deletions
--- a/ci/scripts/server/start_vllm.sh
+++ b/ci/scripts/server/start_vllm.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+source ci/scripts/common/env.sh
+source ci/scripts/common/log.sh
+
+log "Starting vLLM server in container ${DOCKER_NAME}"
+
+docker exec -d "${DOCKER_NAME}" bash -lc "
+  set -e
+
+  chmod +x \"${GITHUB_WORKSPACE}/vLLM-Kunlun/setup_env.sh\"
+  source \"${GITHUB_WORKSPACE}/vLLM-Kunlun/setup_env.sh\"
+
+  rm -f ${VLLM_LOG}
+  export XPU_VISIBLE_DEVICES=${XPU_VISIBLE_DEVICES}
+
+  python -u -m vllm.entrypoints.openai.api_server \
+    --host ${VLLM_HOST} \
+    --port ${VLLM_PORT} \
+    --model ${MODEL_PATH} \
+    --gpu-memory-utilization 0.9 \
+    --trust-remote-code \
+    --max-model-len 32768 \
+    --tensor-parallel-size 1 \
+    --dtype float16 \
+    --max_num_seqs 128 \
+    --max_num_batched_tokens 32768 \
+    --block-size 128 \
+    --no-enable-prefix-caching \
+    --no-enable-chunked-prefill \
+    --distributed-executor-backend mp \
+    --served-model-name ${SERVED_MODEL_NAME} \
+    --compilation-config '{\"splitting_ops\": [\"vllm.unified_attention\",\"vllm.unified_attention_with_output\",\"vllm.unified_attention_with_output_kunlun\",\"vllm.mamba_mixer2\",\"vllm.mamba_mixer\",\"vllm.short_conv\",\"vllm.linear_attention\",\"vllm.plamo2_mamba_mixer\",\"vllm.gdn_attention\",\"vllm.sparse_attn_indexer\"]}' \
+    2>&1 | tee ${VLLM_LOG}
+"
+
+log "vLLM start command issued (running in background)"
--- a/ci/scripts/server/wait_vllm.sh
+++ b/ci/scripts/server/wait_vllm.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+source ci/scripts/common/env.sh
+source ci/scripts/common/log.sh
+
+log "Waiting for vLLM to be ready: ${VLLM_API_BASE}/v1/models"
+
+docker exec "${DOCKER_NAME}" bash -lc "
+  set -e
+
+  for i in {1..90}; do
+    if curl -sf ${VLLM_API_BASE}/v1/models >/dev/null; then
+      echo 'vLLM is ready'
+      tail -n 500 ${VLLM_LOG} || true
+      exit 0
+    fi
+    sleep 5
+  done
+
+  echo 'vLLM start failed'
+  echo '==== last 500 lines of vllm.log ===='
+  tail -n 500 ${VLLM_LOG} || true
+  exit 1
+"