This commit is contained in:
134
.gitea/workflows/docker-build-push.yml
Normal file
134
.gitea/workflows/docker-build-push.yml
Normal file
@@ -0,0 +1,134 @@
|
||||
name: Docker Build and Push
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "v*"
|
||||
|
||||
jobs:
|
||||
docker:
|
||||
runs-on: amd64-ubuntu-24.04
|
||||
|
||||
steps:
|
||||
- name: Clone repository
|
||||
run: |
|
||||
git clone "${{ gitea.server_url }}/${{ gitea.repository }}.git" .
|
||||
git checkout "${{ gitea.ref_name }}"
|
||||
|
||||
- name: Set image metadata
|
||||
run: |
|
||||
IMAGE_NAME="$(echo "${{ gitea.repository }}" | tr '[:upper:]' '[:lower:]' | tr '_' '-')"
|
||||
IMAGE="${DOCKER_REGISTRY}/${DOCKER_USERNAME}/${IMAGE_NAME}:${{ gitea.ref_name }}"
|
||||
|
||||
echo "IMAGE_NAME=${IMAGE_NAME}" >> "$GITEA_ENV"
|
||||
echo "IMAGE=${IMAGE}" >> "$GITEA_ENV"
|
||||
|
||||
- name: Load and Validate Task Info
|
||||
run: |
|
||||
set -a
|
||||
. .gitea/workflows/task_info.env
|
||||
set +a
|
||||
|
||||
for name in FRAMEWORK GPU_TYPE TASK_TYPE; do
|
||||
eval "value=\${${name}:-}"
|
||||
if [ "$name" = "FRAMEWORK" ] && [ -z "$value" ]; then
|
||||
echo "${name} is empty in .gitea/workflows/task_info.env"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "${name}=${value}" >> "$GITEA_ENV"
|
||||
done
|
||||
|
||||
- name: Validate Image Verify Metadata
|
||||
run: |
|
||||
if [ -z "${FIXED_TOKEN:-}" ]; then
|
||||
echo "FIXED_TOKEN is not configured on runner"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! response="$(curl --silent --show-error --location --get 'https://modelhub.org.cn/adminApi/image-verify/validate' \
|
||||
--header "Xc-Token: ${FIXED_TOKEN}" \
|
||||
--data-urlencode "gpuType=${GPU_TYPE:-}" \
|
||||
--data-urlencode "taskType=${TASK_TYPE:-}")"; then
|
||||
echo "failed to call image verify validate API"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
VALIDATE_RESPONSE="$response" python3 - <<'PY'
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
raw = os.environ.get("VALIDATE_RESPONSE", "")
|
||||
try:
|
||||
body = json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
print("image verify validate API returned invalid JSON")
|
||||
print(raw)
|
||||
sys.exit(1)
|
||||
|
||||
if body.get("code") == 0 and body.get("data") is True:
|
||||
print("image verify metadata validation passed")
|
||||
sys.exit(0)
|
||||
|
||||
message = body.get("message") or "unknown error"
|
||||
print(f"image verify metadata validation failed: {message}")
|
||||
print(raw)
|
||||
sys.exit(1)
|
||||
PY
|
||||
|
||||
- name: Login to Docker Registry
|
||||
run: |
|
||||
echo "$DOCKER_PASSWORD" | docker login "$DOCKER_REGISTRY" \
|
||||
-u "$DOCKER_USERNAME" \
|
||||
--password-stdin
|
||||
|
||||
- name: Build Docker Image
|
||||
run: |
|
||||
docker build -t "$IMAGE" .
|
||||
|
||||
- name: Push Docker Image
|
||||
run: |
|
||||
for attempt in 1 2 3; do
|
||||
echo "Starting docker push attempt ${attempt}/3 for ${IMAGE}"
|
||||
docker push "$IMAGE" &
|
||||
PUSH_PID=$!
|
||||
|
||||
while kill -0 "$PUSH_PID" 2>/dev/null; do
|
||||
echo "docker push is still running at $(date -u '+%Y-%m-%dT%H:%M:%SZ')"
|
||||
sleep 60
|
||||
done
|
||||
|
||||
if wait "$PUSH_PID"; then
|
||||
echo "docker push completed successfully"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "docker push failed on attempt ${attempt}/3"
|
||||
sleep 30
|
||||
done
|
||||
|
||||
echo "docker push failed after 3 attempts"
|
||||
exit 1
|
||||
|
||||
- name: Notify Image Verify
|
||||
run: |
|
||||
if [ -z "${FIXED_TOKEN:-}" ]; then
|
||||
echo "FIXED_TOKEN is not configured on runner"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
curl --silent --show-error --fail-with-body --location --request POST 'https://modelhub.org.cn//adminApi/image-verify' \
|
||||
--header "Xc-Token: ${FIXED_TOKEN}" \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw "{
|
||||
\"framework\": \"${FRAMEWORK}\",
|
||||
\"gpuType\": \"${GPU_TYPE}\",
|
||||
\"imageUrl\": \"${IMAGE}\",
|
||||
\"taskType\": \"${TASK_TYPE}\",
|
||||
\"createBy\": \"${{ gitea.actor }}\",
|
||||
\"repoUrl\": \"${{ gitea.server_url }}/${{ gitea.repository }}\",
|
||||
\"tag\": \"${{ github.ref_name }}\"
|
||||
}"
|
||||
|
||||
|
||||
3
.gitea/workflows/task_info.env
Normal file
3
.gitea/workflows/task_info.env
Normal file
@@ -0,0 +1,3 @@
|
||||
FRAMEWORK=vllm_fix_tokenizer
|
||||
GPU_TYPE=Biren_166m
|
||||
TASK_TYPE=text-generation
|
||||
30
Dockerfile
Normal file
30
Dockerfile
Normal file
@@ -0,0 +1,30 @@
|
||||
from harbor.4pd.io/modelhubxc/enginex/xc-llm-biren166m:26.01
|
||||
ENV PKG_CONFIG_PATH=/usr/local/birensupa/sdk/1.10.0.0.rc1/brffmpeg/lib/pkgconfig:/usr/local/birensupa/sdk/1.10.0.0.rc1/bevc/lib/pkgconfig
|
||||
ENV CMAKE_INCLUDE_PATH=/usr/local/birensupa/sdk/1.10.0.0.rc1/suairan/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/deepep/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/brSimulator/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/surtc/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/tensor-engine/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/sutlass/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/surand/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/supti/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/supa/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/sulib/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/sudnn-eager/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/sufft/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/succl/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/sublas/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/libsufile/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/brperfworks/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/brjpegdec/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/sulib/include/sudnn:/usr/local/birensupa/sdk/1.10.0.0.rc1/brffmpeg/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/brcc/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/brbpp/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/bevc/include
|
||||
ENV LIBVA_DRIVER_NAME=bevc
|
||||
ENV PWD=/workspace
|
||||
ENV HOME=/root
|
||||
ENV LANG=C.UTF-8
|
||||
ENV PYTHONPATH=/usr/local/birensupa/sdk/1.10.0.0.rc1/tensor-engine/python:/usr/local/birensupa/sdk/1.10.0.0.rc1/tensor-engine/python/tvm/python:/usr/local/birensupa/sdk/1.10.0.0.rc1/sulib/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/python
|
||||
ENV CPLUS_INCLUDE_PATH=/usr/local/birensupa/sdk/1.10.0.0.rc1/suairan/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/deepep/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/brSimulator/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/surtc/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/tensor-engine/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/sutlass/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/surand/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/supti/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/supa/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/sulib/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/sudnn-eager/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/sufft/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/succl/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/sublas/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/libsufile/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/brperfworks/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/brjpegdec/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/sulib/include/sudnn:/usr/local/birensupa/sdk/1.10.0.0.rc1/brffmpeg/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/brcc/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/brbpp/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/bevc/include
|
||||
ENV LIBRARY_PATH=/usr/local/birensupa/sdk/1.10.0.0.rc1/suairan/lib/x86_64-linux-gnu:/usr/local/birensupa/sdk/1.10.0.0.rc1/deepep/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/brSimulator/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/surtc/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/tensor-engine/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/sutlass/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/surand/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/supti/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/suprofiler/sudx/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/supa/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/sulib/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/sudnn-eager/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/sufft/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/sudbg/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/succl/lib/x86_64-linux-gnu:/usr/local/birensupa/sdk/1.10.0.0.rc1/sublas/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/libsufile/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/brperfworks/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/brjpegdec/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/brffmpeg/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/brcc/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/brbpp/lib/x86_64-linux-gnu:/usr/local/birensupa/sdk/1.10.0.0.rc1/bevc/lib/dri:/usr/local/birensupa/sdk/1.10.0.0.rc1/bevc/lib
|
||||
ENV SHLVL=0
|
||||
ENV LD_LIBRARY_PATH=/usr/local/birensupa/sdk/1.10.0.0.rc1/suairan/lib/x86_64-linux-gnu:/usr/local/birensupa/sdk/1.10.0.0.rc1/deepep/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/brSimulator/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/surtc/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/tensor-engine/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/sutlass/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/surand/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/supti/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/suprofiler/sudx/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/supa/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/sulib/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/sudnn-eager/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/sufft/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/sudbg/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/succl/lib/x86_64-linux-gnu:/usr/local/birensupa/sdk/1.10.0.0.rc1/sublas/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/libsufile/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/brperfworks/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/brjpegdec/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/brffmpeg/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/brcc/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/brbpp/lib/x86_64-linux-gnu:/usr/local/birensupa/sdk/1.10.0.0.rc1/bevc/lib/dri:/usr/local/birensupa/sdk/1.10.0.0.rc1/bevc/lib
|
||||
ENV SUDNN_ENABLE_ANY_BATCH=1
|
||||
ENV BIREN_ENV_SETTED=1
|
||||
ENV LIBVA_DRIVERS_PATH=/usr/local/birensupa/sdk/1.10.0.0.rc1/bevc/lib/dri
|
||||
ENV LC_ALL=C.UTF-8
|
||||
ENV PATH=/usr/local/birensupa/sdk/1.10.0.0.rc1/suprofiler/bin:/usr/local/birensupa/sdk/1.10.0.0.rc1/brSimulator/bin:/usr/local/birensupa/sdk/1.10.0.0.rc1/suprof-cli/bin:/usr/local/birensupa/sdk/1.10.0.0.rc1/suPerfViz/bin:/usr/local/birensupa/sdk/1.10.0.0.rc1/sutlass/bin:/usr/local/birensupa/sdk/1.10.0.0.rc1/supti/bin:/usr/local/birensupa/sdk/1.10.0.0.rc1/supa-sanitizer/bin:/usr/local/birensupa/sdk/1.10.0.0.rc1/sucst/bin:/usr/local/birensupa/sdk/1.10.0.0.rc1/sudbg/bin:/usr/local/birensupa/sdk/1.10.0.0.rc1/brperfworks/bin:/usr/local/birensupa/sdk/1.10.0.0.rc1/brffmpeg/bin:/usr/local/birensupa/sdk/1.10.0.0.rc1/brcc/bin:/usr/local/birensupa/sdk/1.10.0.0.rc1/bevc/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
ENV SUFILE_ENV_PATH_JSON=/usr/local/birensupa/sdk/1.10.0.0.rc1/libsufile/sufile.json
|
||||
ENV C_INCLUDE_PATH=/usr/local/birensupa/sdk/1.10.0.0.rc1/suairan/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/deepep/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/brSimulator/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/surtc/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/tensor-engine/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/sutlass/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/surand/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/supti/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/supa/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/sulib/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/sudnn-eager/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/sufft/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/succl/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/sublas/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/libsufile/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/brperfworks/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/brjpegdec/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/sulib/include/sudnn:/usr/local/birensupa/sdk/1.10.0.0.rc1/brffmpeg/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/brcc/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/brbpp/include:/usr/local/birensupa/sdk/1.10.0.0.rc1/bevc/include
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV SUPA_PATH=/usr/local/birensupa/sdk/1.10.0.0.rc1/supa
|
||||
ENV CMAKE_LIBRARY_PATH=/usr/local/birensupa/sdk/1.10.0.0.rc1/suairan/lib/x86_64-linux-gnu:/usr/local/birensupa/sdk/1.10.0.0.rc1/deepep/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/brSimulator/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/surtc/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/tensor-engine/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/sutlass/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/surand/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/supti/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/suprofiler/sudx/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/supa/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/sulib/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/sudnn-eager/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/sufft/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/sudbg/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/succl/lib/x86_64-linux-gnu:/usr/local/birensupa/sdk/1.10.0.0.rc1/sublas/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/libsufile/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/brperfworks/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/brjpegdec/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/brffmpeg/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/brcc/lib:/usr/local/birensupa/sdk/1.10.0.0.rc1/brbpp/lib/x86_64-linux-gnu:/usr/local/birensupa/sdk/1.10.0.0.rc1/bevc/lib/dri:/usr/local/birensupa/sdk/1.10.0.0.rc1/bevc/lib
|
||||
ENV _=/usr/bin/env
|
||||
|
||||
COPY fix_tokenizer.py /opt/
|
||||
COPY detect_tokenizer.py /opt/
|
||||
COPY entrypoint.sh /opt/
|
||||
RUN chmod +x /opt/entrypoint.sh
|
||||
|
||||
ENTRYPOINT ["/opt/entrypoint.sh"]
|
||||
184
README.md
Normal file
184
README.md
Normal file
@@ -0,0 +1,184 @@
|
||||
# vLLM Tokenizer 自动修复方案
|
||||
|
||||
## 1. 背景
|
||||
|
||||
在使用 vLLM 部署部分模型时,可能会遇到如下报错:
|
||||
|
||||
```
|
||||
|
||||
ValueError: Tokenizer class TokenizersBackend does not exist or is not currently imported.
|
||||
|
||||
```
|
||||
|
||||
该问题通常由 transformers 的 tokenizer 加载机制导致:
|
||||
|
||||
- tokenizer_config.json 中指定了不存在或不兼容的 tokenizer_class
|
||||
- 开启 trust_remote_code=True 时,transformers 会强制加载该 class
|
||||
- vLLM 无法通过参数 override tokenizer class
|
||||
|
||||
---
|
||||
|
||||
## 2. 方案目标
|
||||
|
||||
本方案实现:
|
||||
|
||||
```
|
||||
|
||||
无需修改模型文件
|
||||
无需修改启动命令
|
||||
自动修复 tokenizer 并启动 vLLM
|
||||
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. 核心思路
|
||||
|
||||
在容器启动时:
|
||||
|
||||
```
|
||||
|
||||
entrypoint.sh
|
||||
↓
|
||||
检测 tokenizer 是否异常
|
||||
↓
|
||||
复制 tokenizer 文件 → /tmp/fixed_tokenizer
|
||||
↓
|
||||
修复 tokenizer_config.json
|
||||
↓
|
||||
vllm serve --tokenizer /tmp/fixed_tokenizer
|
||||
|
||||
````
|
||||
|
||||
---
|
||||
|
||||
## 4. 支持的自动修复场景
|
||||
|
||||
| 原 tokenizer_class | 修复为 |
|
||||
|-------------------|--------|
|
||||
| TokenizersBackend | PreTrainedTokenizerFast |
|
||||
| TiktokenTokenizer | GPT2TokenizerFast |
|
||||
| 缺失 tokenizer_config | 自动生成 |
|
||||
| SentencePiece | LlamaTokenizer |
|
||||
|
||||
### 修复 extra_special_tokens 格式
|
||||
|
||||
当 `extra_special_tokens` 为 list 格式时,自动转换为 dict 格式:
|
||||
|
||||
```json
|
||||
// 修复前
|
||||
"extra_special_tokens": ["<|im_start|>", "<|im_end|>", "<|box_start|>", "<|box_end|>", ...]
|
||||
|
||||
// 修复后
|
||||
"extra_special_tokens": {
|
||||
"<|im_start|>": "<|im_start|>",
|
||||
"<|im_end|>": "<|im_end|>",
|
||||
"<|box_start|>": "<|box_start|>",
|
||||
"<|box_end|>": "<|box_end|>",
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. 生成的 tokenizer 目录
|
||||
|
||||
```
|
||||
/tmp/fixed_tokenizer/
|
||||
├── tokenizer.json
|
||||
├── tokenizer_config.json (已修复)
|
||||
├── special_tokens_map.json (可选)
|
||||
├── vocab.json / merges.txt (如需要)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. 日志说明
|
||||
|
||||
### 正常情况
|
||||
|
||||
```
|
||||
[entrypoint] tokenizer OK, skip fix
|
||||
```
|
||||
|
||||
### 自动修复
|
||||
|
||||
```
|
||||
[entrypoint] fixing tokenizer...
|
||||
[fix] override bad tokenizer_class: TokenizersBackend → PreTrainedTokenizerFast
|
||||
[fix] converted extra_special_tokens from list (13 items) to dict format
|
||||
```
|
||||
|
||||
触发条件(AUTO_FIX=auto 时):
|
||||
- tokenizer_config.json 包含 `TokenizersBackend` 或 `TiktokenTokenizer`
|
||||
- tokenizer_config.json 中 `extra_special_tokens` 为 list 格式(`"extra_special_tokens": [`)
|
||||
|
||||
---
|
||||
|
||||
## 7. 验证方法
|
||||
|
||||
进入容器执行:
|
||||
|
||||
```python
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
tok = AutoTokenizer.from_pretrained("/tmp/fixed_tokenizer")
|
||||
|
||||
print(tok.encode("hello world"))
|
||||
print(tok.decode(tok.encode("hello world")))
|
||||
```
|
||||
|
||||
确保:
|
||||
|
||||
```
|
||||
encode → decode 可逆
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 8. 注意事项
|
||||
|
||||
### ⚠️ 1. tokenizer 文件必须存在
|
||||
|
||||
至少需要:
|
||||
|
||||
| 类型 | 必需文件 |
|
||||
| -------------- | ----------------------- |
|
||||
| Fast tokenizer | tokenizer.json |
|
||||
| BPE | vocab.json + merges.txt |
|
||||
| SentencePiece | tokenizer.model |
|
||||
|
||||
---
|
||||
|
||||
### ⚠️ 2. 不影响模型推理
|
||||
|
||||
本方案:
|
||||
|
||||
```
|
||||
仅影响 tokenizer(文本 ↔ token)
|
||||
不影响模型计算(attention / KV cache)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### ⚠️ 3. 特殊 token 风险
|
||||
|
||||
需确认:
|
||||
|
||||
```
|
||||
bos_token / eos_token / pad_token 一致
|
||||
```
|
||||
|
||||
否则可能影响生成结果
|
||||
|
||||
---
|
||||
|
||||
## 9. 总结
|
||||
|
||||
本方案通过在容器启动阶段引入 tokenizer 修复逻辑,实现:
|
||||
|
||||
```
|
||||
“模型不动,运行时自适应兼容”
|
||||
|
||||
```
|
||||
```
|
||||
25
detect_tokenizer.py
Normal file
25
detect_tokenizer.py
Normal file
@@ -0,0 +1,25 @@
|
||||
import os
|
||||
import json
|
||||
|
||||
def detect(model_dir):
|
||||
cfg_path = os.path.join(model_dir, "tokenizer_config.json")
|
||||
|
||||
if os.path.exists(cfg_path):
|
||||
with open(cfg_path) as f:
|
||||
cfg = json.load(f)
|
||||
cls = cfg.get("tokenizer_class", "")
|
||||
else:
|
||||
cls = ""
|
||||
|
||||
files = os.listdir(model_dir)
|
||||
|
||||
if "tokenizer.json" in files:
|
||||
return "fast", cls
|
||||
|
||||
if "tokenizer.model" in files:
|
||||
return "sentencepiece", cls
|
||||
|
||||
if "vocab.json" in files and "merges.txt" in files:
|
||||
return "bpe", cls
|
||||
|
||||
return "unknown", cls
|
||||
39
entrypoint.sh
Normal file
39
entrypoint.sh
Normal file
@@ -0,0 +1,39 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
MODEL_DIR=${1:-/model}
|
||||
shift || true
|
||||
|
||||
FIX_TOKENIZER_DIR=/tmp/fixed_tokenizer
|
||||
AUTO_FIX=${AUTO_FIX_TOKENIZER:-auto}
|
||||
|
||||
echo "[entrypoint] model dir: $MODEL_DIR"
|
||||
|
||||
NEED_FIX=0
|
||||
|
||||
if [ "$AUTO_FIX" = "1" ] || [ "$AUTO_FIX" = "true" ]; then
|
||||
NEED_FIX=1
|
||||
elif [ "$AUTO_FIX" = "auto" ]; then
|
||||
if [ -f "$MODEL_DIR/tokenizer_config.json" ]; then
|
||||
if grep -q "TokenizersBackend\|TiktokenTokenizer" "$MODEL_DIR/tokenizer_config.json"; then
|
||||
NEED_FIX=1
|
||||
fi
|
||||
# 检测 extra_special_tokens 是否为 list 格式
|
||||
if grep -q '"extra_special_tokens":\s*\[' "$MODEL_DIR/tokenizer_config.json"; then
|
||||
NEED_FIX=1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $NEED_FIX -eq 1 ]; then
|
||||
echo "[entrypoint] fixing tokenizer..."
|
||||
python3 /opt/fix_tokenizer.py
|
||||
TOKENIZER_ARG="--tokenizer $FIX_TOKENIZER_DIR"
|
||||
else
|
||||
echo "[entrypoint] tokenizer OK, skip fix"
|
||||
TOKENIZER_ARG=""
|
||||
fi
|
||||
|
||||
echo "[entrypoint] starting vllm..."
|
||||
|
||||
exec vllm serve "$MODEL_DIR" $TOKENIZER_ARG "$@"
|
||||
69
fix_tokenizer.py
Normal file
69
fix_tokenizer.py
Normal file
@@ -0,0 +1,69 @@
|
||||
import os
|
||||
import shutil
|
||||
import json
|
||||
from detect_tokenizer import detect
|
||||
|
||||
MODEL_DIR = os.environ.get("MODEL_DIR", "/model")
|
||||
OUT_DIR = os.environ.get("FIX_TOKENIZER_DIR", "/tmp/fixed_tokenizer")
|
||||
|
||||
os.makedirs(OUT_DIR, exist_ok=True)
|
||||
|
||||
def copy_if_exists(name):
|
||||
src = os.path.join(MODEL_DIR, name)
|
||||
if os.path.exists(src):
|
||||
shutil.copy(src, OUT_DIR)
|
||||
|
||||
# 复制所有可能相关文件
|
||||
for f in [
|
||||
"tokenizer.json",
|
||||
"tokenizer_config.json",
|
||||
"special_tokens_map.json",
|
||||
"vocab.json",
|
||||
"merges.txt",
|
||||
"tokenizer.model",
|
||||
]:
|
||||
copy_if_exists(f)
|
||||
|
||||
typ, orig_cls = detect(MODEL_DIR)
|
||||
|
||||
cfg_path = os.path.join(OUT_DIR, "tokenizer_config.json")
|
||||
|
||||
if os.path.exists(cfg_path):
|
||||
with open(cfg_path) as f:
|
||||
cfg = json.load(f)
|
||||
else:
|
||||
cfg = {}
|
||||
|
||||
# ===== 自动修复策略 =====
|
||||
if typ == "fast":
|
||||
cfg["tokenizer_class"] = "PreTrainedTokenizerFast"
|
||||
|
||||
elif typ == "sentencepiece":
|
||||
cfg["tokenizer_class"] = "LlamaTokenizer"
|
||||
|
||||
elif typ == "bpe":
|
||||
cfg["tokenizer_class"] = "GPT2TokenizerFast"
|
||||
|
||||
else:
|
||||
cfg["tokenizer_class"] = "PreTrainedTokenizerFast"
|
||||
|
||||
# 特殊 case 修复
|
||||
bad_classes = [
|
||||
"TokenizersBackend",
|
||||
"TiktokenTokenizer",
|
||||
]
|
||||
|
||||
if orig_cls in bad_classes:
|
||||
print(f"[fix] override bad tokenizer_class: {orig_cls} → {cfg['tokenizer_class']}")
|
||||
|
||||
# 修复 extra_special_tokens: list → dict 格式
|
||||
if "extra_special_tokens" in cfg and isinstance(cfg["extra_special_tokens"], list):
|
||||
orig_list = cfg["extra_special_tokens"]
|
||||
cfg["extra_special_tokens"] = {token: token for token in orig_list}
|
||||
print(f"[fix] converted extra_special_tokens from list ({len(orig_list)} items) to dict format")
|
||||
|
||||
# 写回
|
||||
with open(cfg_path, "w") as f:
|
||||
json.dump(cfg, f)
|
||||
|
||||
print(f"[fix_tokenizer] done → {OUT_DIR}")
|
||||
Reference in New Issue
Block a user