[CI/Build] Add CI end-to-end (E2E) tests (#139)

* [CI/Build] Add CI end-to-end (E2E) tests
Signed-off-by: Chenchao Hu <huchenchao@example.com>
This commit is contained in:
1916hcc
2026-01-28 19:30:55 +08:00
committed by GitHub
parent c37ee19e3d
commit 7c2966a98c
12 changed files with 573 additions and 0 deletions

38
ci/scripts/server/start_vllm.sh Executable file
View File

@@ -0,0 +1,38 @@
#!/usr/bin/env bash
set -euo pipefail
source ci/scripts/common/env.sh
source ci/scripts/common/log.sh
log "Starting vLLM server in container ${DOCKER_NAME}"
docker exec -d "${DOCKER_NAME}" bash -lc "
set -e
chmod +x \"${GITHUB_WORKSPACE}/vLLM-Kunlun/setup_env.sh\"
source \"${GITHUB_WORKSPACE}/vLLM-Kunlun/setup_env.sh\"
rm -f ${VLLM_LOG}
export XPU_VISIBLE_DEVICES=${XPU_VISIBLE_DEVICES}
python -u -m vllm.entrypoints.openai.api_server \
--host ${VLLM_HOST} \
--port ${VLLM_PORT} \
--model ${MODEL_PATH} \
--gpu-memory-utilization 0.9 \
--trust-remote-code \
--max-model-len 32768 \
--tensor-parallel-size 1 \
--dtype float16 \
--max_num_seqs 128 \
--max_num_batched_tokens 32768 \
--block-size 128 \
--no-enable-prefix-caching \
--no-enable-chunked-prefill \
--distributed-executor-backend mp \
--served-model-name ${SERVED_MODEL_NAME} \
--compilation-config '{\"splitting_ops\": [\"vllm.unified_attention\",\"vllm.unified_attention_with_output\",\"vllm.unified_attention_with_output_kunlun\",\"vllm.mamba_mixer2\",\"vllm.mamba_mixer\",\"vllm.short_conv\",\"vllm.linear_attention\",\"vllm.plamo2_mamba_mixer\",\"vllm.gdn_attention\",\"vllm.sparse_attn_indexer\"]}' \
2>&1 | tee ${VLLM_LOG}
"
log "vLLM start command issued (running in background)"

25
ci/scripts/server/wait_vllm.sh Executable file
View File

@@ -0,0 +1,25 @@
#!/usr/bin/env bash
set -euo pipefail
source ci/scripts/common/env.sh
source ci/scripts/common/log.sh
log "Waiting for vLLM to be ready: ${VLLM_API_BASE}/v1/models"
docker exec "${DOCKER_NAME}" bash -lc "
set -e
for i in {1..90}; do
if curl -sf ${VLLM_API_BASE}/v1/models >/dev/null; then
echo 'vLLM is ready'
tail -n 500 ${VLLM_LOG} || true
exit 0
fi
sleep 5
done
echo 'vLLM start failed'
echo '==== last 500 lines of vllm.log ===='
tail -n 500 ${VLLM_LOG} || true
exit 1
"