添加 K100-vLLM-Patched-v2.0/entrypoint.sh

2026-05-19 18:37:15 +08:00
parent ad2f32a585
commit 08aaffe144
1 changed files with 24 additions and 0 deletions
--- a/K100-vLLM-Patched-v2.0/entrypoint.sh
+++ b/K100-vLLM-Patched-v2.0/entrypoint.sh
@@ -0,0 +1,24 @@
+#!/bin/bash 
+set -e
+
+MODEL_DIR=${MODEL_DIR:-/model}
+FIX_TOKENIZER_DIR=/tmp/fixed_tokenizer
+
+echo "[entrypoint] fixing tokenizer..."
+python /opt/fix_tokenizer.py
+
+echo "[entrypoint] checking head_size..."
+set +e
+HEAD_OUT=$(python /opt/detect_head_size.py)
+RC=$?
+set -e
+
+if [ "$RC" = "2" ] && [ -n "$HEAD_OUT" ]; then
+    export VLLM_USE_FLASH_ATTN_PA=0
+    echo "[entrypoint] head_size=$HEAD_OUT not in FlashAttention whitelist, switching to Triton backend (VLLM_USE_FLASH_ATTN_PA=0)"
+fi
+
+echo "[entrypoint] starting vllm..."
+exec vllm serve "$MODEL_DIR" \
+  --tokenizer "$FIX_TOKENIZER_DIR" \
+  "$@"