support metax c500
This commit is contained in:
13
Dockerfile
Normal file
13
Dockerfile
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
FROM git.modelhub.org.cn:9443/enginex-metax/maca-c500-pytorch:2.33.0.6-torch2.6-py310-ubuntu24.04-amd64
|
||||||
|
|
||||||
|
ENV HF_ENDPOINT=https://hf-mirror.com
|
||||||
|
ENV PATH=/opt/conda/bin:${PATH}
|
||||||
|
|
||||||
|
RUN pip install transformers==4.50.0 uvicorn\[standard\] fastapi
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY ./ /app
|
||||||
|
|
||||||
|
EXPOSE 8000
|
||||||
|
CMD ["sh", "-c", "python3 server.py"]
|
||||||
35
README.md
Normal file
35
README.md
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
# enginex-metax-c500-translation
|
||||||
|
# translation-transformers
|
||||||
|
## Quickstart
|
||||||
|
```shell
|
||||||
|
#构建docker镜像
|
||||||
|
docker build . -t metax_c500_vl
|
||||||
|
|
||||||
|
#运行docker容器
|
||||||
|
docker run -it -p 10055:8000 --device=/dev/mxcd --device=/dev/dri -v /home/aiyueqi/mnt/models/vlm/MiniCPM-V-4:/model:ro --name metax_c500_vl_test metax_c500_vl
|
||||||
|
```
|
||||||
|
等待模型Load完成,出现以下日志时,代表服务启动成功, 且模型加载完成
|
||||||
|
```shell
|
||||||
|
INFO: Application startup complete.
|
||||||
|
INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
|
||||||
|
```
|
||||||
|
执行测试程序
|
||||||
|
```shell
|
||||||
|
python3 test.py
|
||||||
|
```
|
||||||
|
测试程序执行结果
|
||||||
|
```
|
||||||
|
Succeed!
|
||||||
|
Response: {'output_text': '这幅图片包含几个元素,共同营造出宁静的氛围。主要对象是一个坐在沙滩上的金毛寻回犬和一个穿着格子衬衫的人。狗似乎正与这个人互动,可能是在玩耍或训练,因为它的爪子和人的手在接触。狗戴着颜色鲜艳的项圈,表明它可能接受过训练或习惯于与人互动。这个人看起来很放松,微笑着,暗示着他们之间的亲密关系。背景是一片宁静的海滩,太阳低垂在地平线上,为场景投射出温暖的金色光线。这可能是一天中的早晨或傍晚,因为光线柔和而扩散。海滩上没有其他人,强调了两个人之间的个人时刻。这张图片唤起了和平、陪伴和简单之美的感觉。'}
|
||||||
|
```
|
||||||
|
停止docker容器
|
||||||
|
```
|
||||||
|
docker stop metax_c500_translation_test
|
||||||
|
```
|
||||||
|
## 模型支持
|
||||||
|
在Quickstart中运行容器时,通过磁盘目录挂载的方式,指定模型的类型和具体的模型名称,即:
|
||||||
|
```
|
||||||
|
-v /home/aiyueqi/mnt/models/vlm/MiniCPM-V-4:/model:ro
|
||||||
|
```
|
||||||
|
目前支持MiniCPM模型, 参考https://modelscope.cn/models/OpenBMB/MiniCPM-V-4
|
||||||
|
|
||||||
12
logger.py
Normal file
12
logger.py
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
format="%(asctime)s %(name)-12s %(levelname)-4s %(message)s",
|
||||||
|
datefmt="%Y-%m-%d %H:%M:%S",
|
||||||
|
level=os.environ.get("LOGLEVEL", "INFO"),
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_logger(file):
|
||||||
|
return logging.getLogger(file)
|
||||||
84
server.py
Normal file
84
server.py
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
import base64
|
||||||
|
import gc
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import uvicorn
|
||||||
|
from typing import List, Optional, Dict, Any, Tuple
|
||||||
|
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
from fastapi import FastAPI, HTTPException, Query
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from transformers import (AutoTokenizer, AutoConfig, AutoModelForCausalLM, AutoModelForVision2Seq, AutoModel)
|
||||||
|
|
||||||
|
import logger
|
||||||
|
log = logger.get_logger(__file__)
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
|
||||||
|
model_type = None
|
||||||
|
model = None
|
||||||
|
device = None
|
||||||
|
tokenizer = None
|
||||||
|
|
||||||
|
class GenParams(BaseModel):
|
||||||
|
max_new_tokens: int = 128
|
||||||
|
temperature: float = 0.0
|
||||||
|
top_p: float = 1.0
|
||||||
|
do_sample: bool = False
|
||||||
|
|
||||||
|
class InferRequest(BaseModel):
|
||||||
|
prompt: str
|
||||||
|
generation: GenParams = GenParams()
|
||||||
|
dtype: str = "auto" # "auto"|"float16"|"bfloat16"|"float32"
|
||||||
|
warmup_runs: int = 1
|
||||||
|
measure_token_times: bool = False
|
||||||
|
|
||||||
|
@app.on_event("startup")
|
||||||
|
def load_model():
|
||||||
|
log.info("loading model")
|
||||||
|
global status, device, model_type, model, tokenizer
|
||||||
|
|
||||||
|
model_path = "/model"
|
||||||
|
cfg = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
|
||||||
|
model_type = cfg.model_type
|
||||||
|
log.info(f"model type: {model_type}")
|
||||||
|
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, use_fast=True)
|
||||||
|
|
||||||
|
model = AutoModel.from_pretrained(model_path, torch_dtype=torch.float32,
|
||||||
|
device_map=None, trust_remote_code=True)
|
||||||
|
model.to("cuda")
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
status = "success"
|
||||||
|
log.info(f"model loaded successfully")
|
||||||
|
|
||||||
|
@app.post("/infer")
|
||||||
|
def infer(req: InferRequest):
|
||||||
|
image = Image.open('1.PNG').convert('RGB')
|
||||||
|
|
||||||
|
if model_type == "minicpmv":
|
||||||
|
text = handle_minicpmv(image, req.prompt, req.generation)
|
||||||
|
log.info(f"text={text}")
|
||||||
|
|
||||||
|
result = dict()
|
||||||
|
result["output_text"] = text
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def handle_minicpmv(image: Image.Image, prompt: str, gen: GenParams):
|
||||||
|
# Prepare msgs in the format expected by model.chat
|
||||||
|
msgs = [{"role": "user", "content": prompt}]
|
||||||
|
|
||||||
|
# Call the model's built-in chat method
|
||||||
|
response = model.chat(image=image, msgs=msgs, tokenizer=tokenizer,
|
||||||
|
sampling=gen.do_sample, temperature=gen.temperature, stream=False)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
uvicorn.run("server:app", host="0.0.0.0", port=8000, workers=1, access_log=False)
|
||||||
|
|
||||||
30
test.py
Normal file
30
test.py
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
import requests
|
||||||
|
|
||||||
|
def model_infer(vlm_url: str, payload):
|
||||||
|
try:
|
||||||
|
response = requests.post(vlm_url + "/infer", json=payload)
|
||||||
|
if response.status_code == 200:
|
||||||
|
print("Succeed!")
|
||||||
|
print("Response:", response.json())
|
||||||
|
else:
|
||||||
|
print(f"Failed,code: {response.status_code}")
|
||||||
|
print("Error detail:", response.text)
|
||||||
|
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print("request error:", str(e))
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"prompt": "图片有什么?详细描述",
|
||||||
|
"generation": {
|
||||||
|
"max_new_tokens": 64,
|
||||||
|
"temperature": 0.7,
|
||||||
|
"top_p": 0.9,
|
||||||
|
"do_sample": True
|
||||||
|
},
|
||||||
|
"dtype": "auto",
|
||||||
|
"warmup_runs": 0,
|
||||||
|
"measure_token_times": False
|
||||||
|
}
|
||||||
|
|
||||||
|
url = "http://127.0.0.1:10055"
|
||||||
|
model_infer(url, payload)
|
||||||
Reference in New Issue
Block a user