79 lines
1.9 KiB
Docker
79 lines
1.9 KiB
Docker
|
|
# vLLM for VastAI VA16 (VACC)
|
|||
|
|
# 基础镜像:git.modelhub.org.cn:9443/enginex/xc-llm-va16:26.03
|
|||
|
|
#
|
|||
|
|
# 构建:
|
|||
|
|
# docker build -t git.modelhub.org.cn:9443/enginex/xc-llm-va16:26.03 .
|
|||
|
|
#
|
|||
|
|
# 运行示例:
|
|||
|
|
# docker run --rm -it --device /dev/vacc0 \
|
|||
|
|
# -v /tmp/va16_model_cache:/models \
|
|||
|
|
# -p 8000:8000 \
|
|||
|
|
# git.modelhub.org.cn:9443/enginex/xc-llm-va16:26.03 \
|
|||
|
|
# python -m vllm.entrypoints.openai.api_server \
|
|||
|
|
# --model /models/leaderboard/modelHubXC/Qwen/Qwen2-1.5B-Instruct \
|
|||
|
|
# --host 0.0.0.0 --port 8000
|
|||
|
|
|
|||
|
|
FROM python:3.12-slim
|
|||
|
|
|
|||
|
|
ARG DEBIAN_FRONTEND=noninteractive
|
|||
|
|
|
|||
|
|
# 安装系统依赖
|
|||
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||
|
|
build-essential \
|
|||
|
|
curl \
|
|||
|
|
git \
|
|||
|
|
libnuma-dev \
|
|||
|
|
&& rm -rf /var/lib/apt/lists/*
|
|||
|
|
|
|||
|
|
# 安装 Python 依赖
|
|||
|
|
# vLLM v0.11.0 核心依赖
|
|||
|
|
RUN pip install --no-cache-dir \
|
|||
|
|
torch \
|
|||
|
|
numpy \
|
|||
|
|
transformers \
|
|||
|
|
tokenizers \
|
|||
|
|
sentencepiece \
|
|||
|
|
fastapi \
|
|||
|
|
uvicorn[standard] \
|
|||
|
|
pydantic \
|
|||
|
|
aiohttp \
|
|||
|
|
openai \
|
|||
|
|
pillow \
|
|||
|
|
prometheus-client \
|
|||
|
|
py-cpuinfo \
|
|||
|
|
msgspec \
|
|||
|
|
gguf \
|
|||
|
|
importlib-metadata \
|
|||
|
|
partial-json-parser \
|
|||
|
|
mistral-common \
|
|||
|
|
lm-format-enforcer \
|
|||
|
|
outlines \
|
|||
|
|
typing_extensions \
|
|||
|
|
filelock \
|
|||
|
|
pyzmq \
|
|||
|
|
psutil \
|
|||
|
|
ray \
|
|||
|
|
blake3 \
|
|||
|
|
compressed-tensors \
|
|||
|
|
depyf
|
|||
|
|
|
|||
|
|
WORKDIR /workspace/vllm
|
|||
|
|
|
|||
|
|
# 复制项目代码
|
|||
|
|
COPY . .
|
|||
|
|
|
|||
|
|
# 设置 Python 路径,确保 vllm、vllm_vacc、torch_vacc 可被 import
|
|||
|
|
ENV PYTHONPATH="/workspace/vllm:${PYTHONPATH}"
|
|||
|
|
|
|||
|
|
# VA16 默认环境变量
|
|||
|
|
ENV VLLM_VACC_KVCACHE_SPACE=16
|
|||
|
|
ENV VLLM_USE_V1=1
|
|||
|
|
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
|
|||
|
|
|
|||
|
|
# 暴露 API 端口
|
|||
|
|
EXPOSE 8000
|
|||
|
|
|
|||
|
|
# 默认启动 OpenAI 兼容 API 服务
|
|||
|
|
ENTRYPOINT ["python", "-m", "vllm.entrypoints.openai.api_server"]
|
|||
|
|
CMD ["--host", "0.0.0.0", "--port", "8000"]
|