diff --git a/K100-vLLM-Patched-v2.0/entrypoint.sh b/K100-vLLM-Patched-v2.0/entrypoint.sh
new file mode 100644
index 0000000..1e0eb51
--- /dev/null
+++ b/K100-vLLM-Patched-v2.0/entrypoint.sh
@@ -0,0 +1,24 @@
+#!/bin/bash 
+set -e
+
+MODEL_DIR=${MODEL_DIR:-/model}
+FIX_TOKENIZER_DIR=/tmp/fixed_tokenizer
+
+echo "[entrypoint] fixing tokenizer..."
+python /opt/fix_tokenizer.py
+
+echo "[entrypoint] checking head_size..."
+set +e
+HEAD_OUT=$(python /opt/detect_head_size.py)
+RC=$?
+set -e
+
+if [ "$RC" = "2" ] && [ -n "$HEAD_OUT" ]; then
+    export VLLM_USE_FLASH_ATTN_PA=0
+    echo "[entrypoint] head_size=$HEAD_OUT not in FlashAttention whitelist, switching to Triton backend (VLLM_USE_FLASH_ATTN_PA=0)"
+fi
+
+echo "[entrypoint] starting vllm..."
+exec vllm serve "$MODEL_DIR" \
+  --tokenizer "$FIX_TOKENIZER_DIR" \
+  "$@"
\ No newline at end of file