21 lines
643 B
Docker
21 lines
643 B
Docker
FROM git.modelhub.org.cn:9443/enginex-hygon/vllm:0.9.2
|
|
|
|
# 修复1 transformers 库级 patch
|
|
COPY patch.py /tmp/patch.py
|
|
RUN python3 /tmp/patch.py
|
|
|
|
# 修复2 ROCm 平台 Triton backend head_size 验证绕过
|
|
COPY patch_triton.py /tmp/patch_triton.py
|
|
RUN python3 /tmp/patch_triton.py
|
|
|
|
# 修复3 运行时 tokenizer 配置修复脚本
|
|
COPY detect_tokenizer.py /opt/detect_tokenizer.py
|
|
COPY fix_tokenizer.py /opt/fix_tokenizer.py
|
|
|
|
# 修复4 head_size 检测,自动切换 attention backend
|
|
COPY detect_head_size.py /opt/detect_head_size.py
|
|
|
|
COPY entrypoint.sh /opt/entrypoint.sh
|
|
RUN chmod +x /opt/entrypoint.sh
|
|
|
|
ENTRYPOINT ["/opt/entrypoint.sh"] |