Files
enginex-vastai-va16-vllm/Dockerfile
Chranos 37627677c6 update
2026-04-02 14:07:42 +08:00

79 lines
1.9 KiB
Docker
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# vLLM for VastAI VA16 (VACC)
# 基础镜像git.modelhub.org.cn:9443/enginex/xc-llm-va16:26.03
#
# 构建:
# docker build -t git.modelhub.org.cn:9443/enginex/xc-llm-va16:26.03 .
#
# 运行示例:
# docker run --rm -it --device /dev/vacc0 \
# -v /tmp/va16_model_cache:/models \
# -p 8000:8000 \
# git.modelhub.org.cn:9443/enginex/xc-llm-va16:26.03 \
# python -m vllm.entrypoints.openai.api_server \
# --model /models/leaderboard/modelHubXC/Qwen/Qwen2-1.5B-Instruct \
# --host 0.0.0.0 --port 8000
FROM python:3.12-slim
ARG DEBIAN_FRONTEND=noninteractive
# 安装系统依赖
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
curl \
git \
libnuma-dev \
&& rm -rf /var/lib/apt/lists/*
# 安装 Python 依赖
# vLLM v0.11.0 核心依赖
RUN pip install --no-cache-dir \
torch \
numpy \
transformers \
tokenizers \
sentencepiece \
fastapi \
uvicorn[standard] \
pydantic \
aiohttp \
openai \
pillow \
prometheus-client \
py-cpuinfo \
msgspec \
gguf \
importlib-metadata \
partial-json-parser \
mistral-common \
lm-format-enforcer \
outlines \
typing_extensions \
filelock \
pyzmq \
psutil \
ray \
blake3 \
compressed-tensors \
depyf
WORKDIR /workspace/vllm
# 复制项目代码
COPY . .
# 设置 Python 路径,确保 vllm、vllm_vacc、torch_vacc 可被 import
ENV PYTHONPATH="/workspace/vllm:${PYTHONPATH}"
# VA16 默认环境变量
ENV VLLM_VACC_KVCACHE_SPACE=16
ENV VLLM_USE_V1=1
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
# 暴露 API 端口
EXPOSE 8000
# 默认启动 OpenAI 兼容 API 服务
ENTRYPOINT ["python", "-m", "vllm.entrypoints.openai.api_server"]
CMD ["--host", "0.0.0.0", "--port", "8000"]