[CI/Build] Add CI end-to-end (E2E) tests (#139)
* [CI/Build] Add CI end-to-end (E2E) tests Signed-off-by: Chenchao Hu <huchenchao@example.com>
This commit is contained in:
38
ci/scripts/server/start_vllm.sh
Executable file
38
ci/scripts/server/start_vllm.sh
Executable file
@@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
source ci/scripts/common/env.sh
|
||||
source ci/scripts/common/log.sh
|
||||
|
||||
log "Starting vLLM server in container ${DOCKER_NAME}"
|
||||
|
||||
docker exec -d "${DOCKER_NAME}" bash -lc "
|
||||
set -e
|
||||
|
||||
chmod +x \"${GITHUB_WORKSPACE}/vLLM-Kunlun/setup_env.sh\"
|
||||
source \"${GITHUB_WORKSPACE}/vLLM-Kunlun/setup_env.sh\"
|
||||
|
||||
rm -f ${VLLM_LOG}
|
||||
export XPU_VISIBLE_DEVICES=${XPU_VISIBLE_DEVICES}
|
||||
|
||||
python -u -m vllm.entrypoints.openai.api_server \
|
||||
--host ${VLLM_HOST} \
|
||||
--port ${VLLM_PORT} \
|
||||
--model ${MODEL_PATH} \
|
||||
--gpu-memory-utilization 0.9 \
|
||||
--trust-remote-code \
|
||||
--max-model-len 32768 \
|
||||
--tensor-parallel-size 1 \
|
||||
--dtype float16 \
|
||||
--max_num_seqs 128 \
|
||||
--max_num_batched_tokens 32768 \
|
||||
--block-size 128 \
|
||||
--no-enable-prefix-caching \
|
||||
--no-enable-chunked-prefill \
|
||||
--distributed-executor-backend mp \
|
||||
--served-model-name ${SERVED_MODEL_NAME} \
|
||||
--compilation-config '{\"splitting_ops\": [\"vllm.unified_attention\",\"vllm.unified_attention_with_output\",\"vllm.unified_attention_with_output_kunlun\",\"vllm.mamba_mixer2\",\"vllm.mamba_mixer\",\"vllm.short_conv\",\"vllm.linear_attention\",\"vllm.plamo2_mamba_mixer\",\"vllm.gdn_attention\",\"vllm.sparse_attn_indexer\"]}' \
|
||||
2>&1 | tee ${VLLM_LOG}
|
||||
"
|
||||
|
||||
log "vLLM start command issued (running in background)"
|
||||
25
ci/scripts/server/wait_vllm.sh
Executable file
25
ci/scripts/server/wait_vllm.sh
Executable file
@@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
source ci/scripts/common/env.sh
|
||||
source ci/scripts/common/log.sh
|
||||
|
||||
log "Waiting for vLLM to be ready: ${VLLM_API_BASE}/v1/models"
|
||||
|
||||
docker exec "${DOCKER_NAME}" bash -lc "
|
||||
set -e
|
||||
|
||||
for i in {1..90}; do
|
||||
if curl -sf ${VLLM_API_BASE}/v1/models >/dev/null; then
|
||||
echo 'vLLM is ready'
|
||||
tail -n 500 ${VLLM_LOG} || true
|
||||
exit 0
|
||||
fi
|
||||
sleep 5
|
||||
done
|
||||
|
||||
echo 'vLLM start failed'
|
||||
echo '==== last 500 lines of vllm.log ===='
|
||||
tail -n 500 ${VLLM_LOG} || true
|
||||
exit 1
|
||||
"
|
||||
Reference in New Issue
Block a user