添加 K100-vLLM-Patched-v2.0/entrypoint.sh

This commit is contained in:
2026-05-19 18:37:15 +08:00
parent ad2f32a585
commit 08aaffe144

View File

@@ -0,0 +1,24 @@
#!/bin/bash
set -e
MODEL_DIR=${MODEL_DIR:-/model}
FIX_TOKENIZER_DIR=/tmp/fixed_tokenizer
echo "[entrypoint] fixing tokenizer..."
python /opt/fix_tokenizer.py
echo "[entrypoint] checking head_size..."
set +e
HEAD_OUT=$(python /opt/detect_head_size.py)
RC=$?
set -e
if [ "$RC" = "2" ] && [ -n "$HEAD_OUT" ]; then
export VLLM_USE_FLASH_ATTN_PA=0
echo "[entrypoint] head_size=$HEAD_OUT not in FlashAttention whitelist, switching to Triton backend (VLLM_USE_FLASH_ATTN_PA=0)"
fi
echo "[entrypoint] starting vllm..."
exec vllm serve "$MODEL_DIR" \
--tokenizer "$FIX_TOKENIZER_DIR" \
"$@"