docs: add README and Dockerfile for Biren BR166M vLLM
This commit is contained in:
31
Dockerfile
Normal file
31
Dockerfile
Normal file
@@ -0,0 +1,31 @@
|
||||
# 基于壁仞官方 vLLM 推理镜像
|
||||
FROM git.modelhub.org.cn:9443/enginex/xc-llm-biren166m:26.01
|
||||
|
||||
# 镜像元信息
|
||||
LABEL maintainer="enginex"
|
||||
LABEL description="vLLM inference engine for Biren BR166M (SUPA) GPU"
|
||||
LABEL version="26.01"
|
||||
|
||||
# 设置工作目录
|
||||
WORKDIR /workspace
|
||||
|
||||
# 复制壁仞适配代码
|
||||
COPY vllm_br/ /workspace/vllm_br/
|
||||
|
||||
# 如果需要覆盖基础镜像中的 vllm(按需启用)
|
||||
# COPY vllm/ /workspace/vllm/
|
||||
|
||||
# 确保 vllm_br 可被 Python 发现
|
||||
ENV PYTHONPATH="/workspace:${PYTHONPATH}"
|
||||
|
||||
# 默认使用 V1 引擎
|
||||
ENV VLLM_USE_V1=1
|
||||
|
||||
# SUPA 设备相关默认配置
|
||||
ENV VLLM_BR_WEIGHT_TYPE=NUMA
|
||||
ENV VLLM_BR_QUANT_METHOD=INT8
|
||||
|
||||
# 暴露 vLLM API 服务端口
|
||||
EXPOSE 8000
|
||||
|
||||
ENTRYPOINT ["python", "-m", "vllm.entrypoints.openai.api_server"]
|
||||
Reference in New Issue
Block a user