support kunlun r200

This commit is contained in:
aiyueqi
2025-09-22 15:13:55 +08:00
commit b4ef4b9aaa
6 changed files with 181 additions and 0 deletions

BIN
1.PNG Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 MiB

17
Dockerfile Normal file
View File

@@ -0,0 +1,17 @@
#FROM git.modelhub.org.cn:980/enginex-kunlunxin/xmlir/r200-8f_xmlir-ubuntu_2004_x86_64:v0.27
FROM diffuser:r200-8f
ENV HF_ENDPOINT=https://hf-mirror.com
RUN /root/miniconda/envs/python38_torch201_cuda/bin/python3 -m pip install transformers==4.46.3 uvicorn\[standard\] fastapi -i https://pypi.tuna.tsinghua.edu.cn/simple
WORKDIR /app
COPY ./ /app
RUN sed -i 's/(inv_freq_expanded\.float() @ position_ids_expanded\.float())/(inv_freq_expanded.float().clone() @ position_ids_expanded.float().clone())/g' /root/miniconda/envs/python38_torch201_cuda/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py
EXPOSE 8000
CMD ["sh", "-c", "/root/miniconda/envs/python38_torch201_cuda/bin/python3 server.py"]

36
README.md Normal file
View File

@@ -0,0 +1,36 @@
i# enginex-kunlun-r200-translation
# translation-transformers
## Quickstart
```shell
#构建docker镜像
docker build . -t kunlun_r200_vl
#运行docker容器
docker run -it -p 10055:8000 --device=/dev/xpu2:/dev/xpu0 --device=/dev/xpuctrl:/dev/xpuctrl -v /home/aiyueqi/mnt/models/vlm/MiniCPM-V-4:/model:ro --name kunlun_r200_vl_test kunlun_r200_vl
```
等待模型Load完成出现以下日志时代表服务启动成功, 且模型加载完成
```shell
INFO: Application startup complete.
INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
```
执行测试程序
```shell
python3 test.py
```
测试程序执行结果
```
Succeed!
Response: {'output_text': '图片中的物体是一只狗,具体来说是一只金毛寻回犬。这只狗穿着带有图案的项圈,表明它可能被驯养并经常在户外活动。它正与一个坐着的人互动,看起来像是在玩或进行某种形式的身体接触。地面上有一个物体,可能是狗玩具或零食,这表明狗和人在海滩上进行休闲活动。背景中的海和日落暗示着一个宁静而放松的环境,通常与宠物的陪伴相关联。'}
```
停止docker容器
```
docker stop kunlun_r200_translation_test
```
## 模型支持
在Quickstart中运行容器时通过磁盘目录挂载的方式指定模型的类型和具体的模型名称
```
-v /home/aiyueqi/mnt/models/vlm/MiniCPM-V-4:/model:ro
```
目前支持MiniCPM模型, 参考https://modelscope.cn/models/OpenBMB/MiniCPM-V-4

13
logger.py Normal file
View File

@@ -0,0 +1,13 @@
# -*- coding: utf-8 -*-
import logging
import os
logging.basicConfig(
format="%(asctime)s %(name)-12s %(levelname)-4s %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
level=os.environ.get("LOGLEVEL", "INFO"),
)
def get_logger(file):
return logging.getLogger(file)

84
server.py Normal file
View File

@@ -0,0 +1,84 @@
import base64
import gc
import io
import os
import time
import uvicorn
from typing import List, Optional, Dict, Any, Tuple
import torch
from PIL import Image
from fastapi import FastAPI, HTTPException, Query
from pydantic import BaseModel
from transformers import (AutoTokenizer, AutoConfig, AutoModelForCausalLM, AutoModelForVision2Seq, AutoModel)
import logger
log = logger.get_logger(__file__)
app = FastAPI()
model_type = None
model = None
device = None
tokenizer = None
class GenParams(BaseModel):
max_new_tokens: int = 128
temperature: float = 0.0
top_p: float = 1.0
do_sample: bool = False
class InferRequest(BaseModel):
prompt: str
generation: GenParams = GenParams()
dtype: str = "auto" # "auto"|"float16"|"bfloat16"|"float32"
warmup_runs: int = 1
measure_token_times: bool = False
@app.on_event("startup")
def load_model():
log.info("loading model")
global status, device, model_type, model, tokenizer
model_path = "/model"
cfg = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
model_type = cfg.model_type
log.info(f"model type: {model_type}")
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, use_fast=True)
model = AutoModel.from_pretrained(model_path, torch_dtype=torch.float32,
device_map=None, trust_remote_code=True)
model.to("cuda")
model.eval()
status = "success"
log.info(f"model loaded successfully")
@app.post("/infer")
def infer(req: InferRequest):
image = Image.open('1.PNG').convert('RGB')
if model_type == "minicpmv":
text = handle_minicpmv(image, req.prompt, req.generation)
log.info(f"text={text}")
result = dict()
result["output_text"] = text
return result
def handle_minicpmv(image: Image.Image, prompt: str, gen: GenParams):
# Prepare msgs in the format expected by model.chat
msgs = [{"role": "user", "content": prompt}]
# Call the model's built-in chat method
response = model.chat(image=image, msgs=msgs, tokenizer=tokenizer,
sampling=gen.do_sample, temperature=gen.temperature, stream=False)
return response
if __name__ == '__main__':
uvicorn.run("server:app", host="0.0.0.0", port=8000, workers=1, access_log=False)

31
test.py Normal file
View File

@@ -0,0 +1,31 @@
import requests
def model_infer(vlm_url: str, payload):
try:
response = requests.post(vlm_url + "/infer", json=payload)
if response.status_code == 200:
print("Succeed!")
print("Response:", response.json())
else:
print(f"Failedcode: {response.status_code}")
print("Error detail:", response.text)
except requests.exceptions.RequestException as e:
print("request error:", str(e))
payload = {
"prompt": "图片有什么?详细描述",
"generation": {
"max_new_tokens": 64,
"temperature": 0.7,
"top_p": 0.9,
"do_sample": True
},
"dtype": "auto",
"warmup_runs": 0,
"measure_token_times": False
}
url = "http://127.0.0.1:10055"
model_infer(url, payload)