Files
enginex-biren-vllm/Dockerfile

32 lines
762 B
Docker
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# 基于壁仞官方 vLLM 推理镜像
FROM git.modelhub.org.cn:9443/enginex/xc-llm-biren166m:26.01
# 镜像元信息
LABEL maintainer="enginex"
LABEL description="vLLM inference engine for Biren BR166M (SUPA) GPU"
LABEL version="26.01"
# 设置工作目录
WORKDIR /workspace
# 复制壁仞适配代码
COPY vllm_br/ /workspace/vllm_br/
# 如果需要覆盖基础镜像中的 vllm按需启用
# COPY vllm/ /workspace/vllm/
# 确保 vllm_br 可被 Python 发现
ENV PYTHONPATH="/workspace:${PYTHONPATH}"
# 默认使用 V1 引擎
ENV VLLM_USE_V1=1
# SUPA 设备相关默认配置
ENV VLLM_BR_WEIGHT_TYPE=NUMA
ENV VLLM_BR_QUANT_METHOD=INT8
# 暴露 vLLM API 服务端口
EXPOSE 8000
ENTRYPOINT ["python", "-m", "vllm.entrypoints.openai.api_server"]