Files
enginex-S2-vllm-fix-tokenizer/entrypoint.sh

40 lines
1006 B
Bash
Raw Permalink Normal View History

2026-05-28 10:56:17 +08:00
#!/bin/bash
set -e
MODEL_DIR=${1:-/model}
shift || true
FIX_TOKENIZER_DIR=/tmp/fixed_tokenizer
AUTO_FIX=${AUTO_FIX_TOKENIZER:-auto}
echo "[entrypoint] model dir: $MODEL_DIR"
NEED_FIX=0
if [ "$AUTO_FIX" = "1" ] || [ "$AUTO_FIX" = "true" ]; then
NEED_FIX=1
elif [ "$AUTO_FIX" = "auto" ]; then
if [ -f "$MODEL_DIR/tokenizer_config.json" ]; then
if grep -q "TokenizersBackend\|TiktokenTokenizer" "$MODEL_DIR/tokenizer_config.json"; then
NEED_FIX=1
fi
# 检测 extra_special_tokens 是否为 list 格式
if grep -q '"extra_special_tokens":\s*\[' "$MODEL_DIR/tokenizer_config.json"; then
NEED_FIX=1
fi
2026-05-28 10:56:17 +08:00
fi
fi
if [ $NEED_FIX -eq 1 ]; then
echo "[entrypoint] fixing tokenizer..."
python3 /opt/fix_tokenizer.py
TOKENIZER_ARG="--tokenizer $FIX_TOKENIZER_DIR"
else
echo "[entrypoint] tokenizer OK, skip fix"
TOKENIZER_ARG=""
fi
echo "[entrypoint] starting vllm..."
exec vllm serve "$MODEL_DIR" $TOKENIZER_ARG "$@"