79 lines
1.9 KiB
Docker
79 lines
1.9 KiB
Docker
# vLLM for VastAI VA16 (VACC)
|
||
# 基础镜像:git.modelhub.org.cn:9443/enginex/xc-llm-va16:26.03
|
||
#
|
||
# 构建:
|
||
# docker build -t git.modelhub.org.cn:9443/enginex/xc-llm-va16:26.03 .
|
||
#
|
||
# 运行示例:
|
||
# docker run --rm -it --device /dev/vacc0 \
|
||
# -v /tmp/va16_model_cache:/models \
|
||
# -p 8000:8000 \
|
||
# git.modelhub.org.cn:9443/enginex/xc-llm-va16:26.03 \
|
||
# python -m vllm.entrypoints.openai.api_server \
|
||
# --model /models/leaderboard/modelHubXC/Qwen/Qwen2-1.5B-Instruct \
|
||
# --host 0.0.0.0 --port 8000
|
||
|
||
FROM python:3.12-slim
|
||
|
||
ARG DEBIAN_FRONTEND=noninteractive
|
||
|
||
# 安装系统依赖
|
||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||
build-essential \
|
||
curl \
|
||
git \
|
||
libnuma-dev \
|
||
&& rm -rf /var/lib/apt/lists/*
|
||
|
||
# 安装 Python 依赖
|
||
# vLLM v0.11.0 核心依赖
|
||
RUN pip install --no-cache-dir \
|
||
torch \
|
||
numpy \
|
||
transformers \
|
||
tokenizers \
|
||
sentencepiece \
|
||
fastapi \
|
||
uvicorn[standard] \
|
||
pydantic \
|
||
aiohttp \
|
||
openai \
|
||
pillow \
|
||
prometheus-client \
|
||
py-cpuinfo \
|
||
msgspec \
|
||
gguf \
|
||
importlib-metadata \
|
||
partial-json-parser \
|
||
mistral-common \
|
||
lm-format-enforcer \
|
||
outlines \
|
||
typing_extensions \
|
||
filelock \
|
||
pyzmq \
|
||
psutil \
|
||
ray \
|
||
blake3 \
|
||
compressed-tensors \
|
||
depyf
|
||
|
||
WORKDIR /workspace/vllm
|
||
|
||
# 复制项目代码
|
||
COPY . .
|
||
|
||
# 设置 Python 路径,确保 vllm、vllm_vacc、torch_vacc 可被 import
|
||
ENV PYTHONPATH="/workspace/vllm:${PYTHONPATH}"
|
||
|
||
# VA16 默认环境变量
|
||
ENV VLLM_VACC_KVCACHE_SPACE=16
|
||
ENV VLLM_USE_V1=1
|
||
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
|
||
|
||
# 暴露 API 端口
|
||
EXPOSE 8000
|
||
|
||
# 默认启动 OpenAI 兼容 API 服务
|
||
ENTRYPOINT ["python", "-m", "vllm.entrypoints.openai.api_server"]
|
||
CMD ["--host", "0.0.0.0", "--port", "8000"]
|