32 lines
762 B
Docker
32 lines
762 B
Docker
# 基于壁仞官方 vLLM 推理镜像
|
||
FROM git.modelhub.org.cn:9443/enginex/xc-llm-biren166m:26.01
|
||
|
||
# 镜像元信息
|
||
LABEL maintainer="enginex"
|
||
LABEL description="vLLM inference engine for Biren BR166M (SUPA) GPU"
|
||
LABEL version="26.01"
|
||
|
||
# 设置工作目录
|
||
WORKDIR /workspace
|
||
|
||
# 复制壁仞适配代码
|
||
COPY vllm_br/ /workspace/vllm_br/
|
||
|
||
# 如果需要覆盖基础镜像中的 vllm(按需启用)
|
||
# COPY vllm/ /workspace/vllm/
|
||
|
||
# 确保 vllm_br 可被 Python 发现
|
||
ENV PYTHONPATH="/workspace:${PYTHONPATH}"
|
||
|
||
# 默认使用 V1 引擎
|
||
ENV VLLM_USE_V1=1
|
||
|
||
# SUPA 设备相关默认配置
|
||
ENV VLLM_BR_WEIGHT_TYPE=NUMA
|
||
ENV VLLM_BR_QUANT_METHOD=INT8
|
||
|
||
# 暴露 vLLM API 服务端口
|
||
EXPOSE 8000
|
||
|
||
ENTRYPOINT ["python", "-m", "vllm.entrypoints.openai.api_server"]
|