# vLLM for VastAI VA16 (VACC) # 基础镜像:git.modelhub.org.cn:9443/enginex/xc-llm-va16:26.03 # # 构建: # docker build -t git.modelhub.org.cn:9443/enginex/xc-llm-va16:26.03 . # # 运行示例: # docker run --rm -it --device /dev/vacc0 \ # -v /tmp/va16_model_cache:/models \ # -p 8000:8000 \ # git.modelhub.org.cn:9443/enginex/xc-llm-va16:26.03 \ # python -m vllm.entrypoints.openai.api_server \ # --model /models/leaderboard/modelHubXC/Qwen/Qwen2-1.5B-Instruct \ # --host 0.0.0.0 --port 8000 FROM python:3.12-slim ARG DEBIAN_FRONTEND=noninteractive # 安装系统依赖 RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ curl \ git \ libnuma-dev \ && rm -rf /var/lib/apt/lists/* # 安装 Python 依赖 # vLLM v0.11.0 核心依赖 RUN pip install --no-cache-dir \ torch \ numpy \ transformers \ tokenizers \ sentencepiece \ fastapi \ uvicorn[standard] \ pydantic \ aiohttp \ openai \ pillow \ prometheus-client \ py-cpuinfo \ msgspec \ gguf \ importlib-metadata \ partial-json-parser \ mistral-common \ lm-format-enforcer \ outlines \ typing_extensions \ filelock \ pyzmq \ psutil \ ray \ blake3 \ compressed-tensors \ depyf WORKDIR /workspace/vllm # 复制项目代码 COPY . . # 设置 Python 路径,确保 vllm、vllm_vacc、torch_vacc 可被 import ENV PYTHONPATH="/workspace/vllm:${PYTHONPATH}" # VA16 默认环境变量 ENV VLLM_VACC_KVCACHE_SPACE=16 ENV VLLM_USE_V1=1 ENV VLLM_WORKER_MULTIPROC_METHOD=spawn # 暴露 API 端口 EXPOSE 8000 # 默认启动 OpenAI 兼容 API 服务 ENTRYPOINT ["python", "-m", "vllm.entrypoints.openai.api_server"] CMD ["--host", "0.0.0.0", "--port", "8000"]