This commit is contained in:
Chranos
2026-04-02 14:07:42 +08:00
parent 24df76db9d
commit 37627677c6
2 changed files with 212 additions and 0 deletions

78
Dockerfile Normal file
View File

@@ -0,0 +1,78 @@
# vLLM for VastAI VA16 (VACC)
# 基础镜像git.modelhub.org.cn:9443/enginex/xc-llm-va16:26.03
#
# 构建:
# docker build -t git.modelhub.org.cn:9443/enginex/xc-llm-va16:26.03 .
#
# 运行示例:
# docker run --rm -it --device /dev/vacc0 \
# -v /tmp/va16_model_cache:/models \
# -p 8000:8000 \
# git.modelhub.org.cn:9443/enginex/xc-llm-va16:26.03 \
# python -m vllm.entrypoints.openai.api_server \
# --model /models/leaderboard/modelHubXC/Qwen/Qwen2-1.5B-Instruct \
# --host 0.0.0.0 --port 8000
FROM python:3.12-slim
ARG DEBIAN_FRONTEND=noninteractive
# 安装系统依赖
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
curl \
git \
libnuma-dev \
&& rm -rf /var/lib/apt/lists/*
# 安装 Python 依赖
# vLLM v0.11.0 核心依赖
RUN pip install --no-cache-dir \
torch \
numpy \
transformers \
tokenizers \
sentencepiece \
fastapi \
uvicorn[standard] \
pydantic \
aiohttp \
openai \
pillow \
prometheus-client \
py-cpuinfo \
msgspec \
gguf \
importlib-metadata \
partial-json-parser \
mistral-common \
lm-format-enforcer \
outlines \
typing_extensions \
filelock \
pyzmq \
psutil \
ray \
blake3 \
compressed-tensors \
depyf
WORKDIR /workspace/vllm
# 复制项目代码
COPY . .
# 设置 Python 路径,确保 vllm、vllm_vacc、torch_vacc 可被 import
ENV PYTHONPATH="/workspace/vllm:${PYTHONPATH}"
# VA16 默认环境变量
ENV VLLM_VACC_KVCACHE_SPACE=16
ENV VLLM_USE_V1=1
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
# 暴露 API 端口
EXPOSE 8000
# 默认启动 OpenAI 兼容 API 服务
ENTRYPOINT ["python", "-m", "vllm.entrypoints.openai.api_server"]
CMD ["--host", "0.0.0.0", "--port", "8000"]